支持多线程获取网页
Array Module_HttpClient_Driver_Fsock::request_urls( Array/string $urls [, Int $timeout = integer 10 ] )
参数列表
参数 类型 描述 默认值 $urls
Array/string
$urls $timeout
Int
$timeout integer 10
Array
protected function request_urls($urls, $timeout = 10)
{
# 去重
$urls = array_unique($urls);
if (!$urls)return array();
# 监听列表
$listener_list = array();
# 返回值
$result = array();
# 总列队数
$list_num = 0;
# 记录页面跳转数据
$redirect_list = array();
# 排队列表
$multi_list = array();
foreach ( $urls as $url )
{
if ( $this->multi_exec_num>0 && $list_num>=$this->multi_exec_num )
{
# 加入排队列表
$multi_list[] = $url;
}
else
{
# 列队数控制
$listener_list[] = array($url,$this->_create($url, $timeout));
$list_num++;
}
$result[$url] = null;
$this->http_data[$url] = null;
}
# 已完成数
$done_num = 0;
while($listener_list)
{
list($done_url, $f) = array_shift($listener_list);
$time = microtime(1);
$str = '';
while (!feof($f))
{
$str .= fgets($f);
}
fclose($f);
$time = microtime(1)-$time;
list($header, $body) = explode("\r\n\r\n", $str, 2);
$header_arr = explode("\r\n", $header);
$first_line = array_shift($header_arr);
if ( preg_match('#^HTTP/1.1 ([0-9]+) #', $first_line, $m) )
{
$code = $m[1];
}
else
{
$code = 0;
}
if( strpos($header, 'Transfer-Encoding: chunked') )
{
$body = explode("\r\n", $body);
$body = array_slice($body, 1, -1);
$body = implode('', $body);
}
if ( preg_match('#Location(?:[ ]*):([^\r]+)\r\n#Uis', $header , $m) )
{
if (count($redirect_list[$done_url])>=10)
{
# 防止跳转次数太大
$body = $header = '';
$code = 0;
}
else
{
# 302 跳转
$new_url = trim($m[1]);
$redirect_list[$done_url][] = $new_url;
// 插入列队
if (preg_match('#Set-Cookie(?:[ ]*):([^\r+])\r\n#is', $header , $m2))
{
// 把cookie传递过去
$old_cookie = $this->cookies;
$this->cookies = $m2[1];
}
array_unshift($listener_list, array($done_url , $this->_create($new_url, $timeout)));
if (isset($old_cookie))
{
$this->cookies = $old_cookie;
}
continue;
}
}
$rs = array
(
'code' => $code,
'data' => $body,
'header' => $header_arr,
'time' => $time,
);
$this->http_data[$done_url] = $rs;
if ($rs['code']!=200)
{
Core::debug()->error('URL:'.$done_url.' ERROR,TIME:' . $this->http_data[$done_url]['time'] . ',CODE:' . $this->http_data[$done_url]['code'] );
$result[$done_url] = false;
}
else
{
Core::debug()->info('URL:'.$done_url.' OK.TIME:' . $this->http_data[$done_url]['time'] );
$result[$done_url] = $rs['data'];
}
$done_num++;
if ( $multi_list )
{
# 获取列队中的一条URL
$current_url = array_shift($multi_list);
# 更新监听列队信息
$listener_list[] = array($current_url, $this->_create($current_url, $timeout));
# 更新列队数
$list_num++;
}
if ($done_num>=$list_num)break;
}
return $result;
}