选择语言 :

 Module_HttpClient_Driver_Fsock::request_urls

支持多线程获取网页

Array Module_HttpClient_Driver_Fsock::request_urls( Array/string $urls [, Int $timeout = integer 10 ] )
see
http://cn.php.net/manual/en/function.curl-multi-exec.php#88453

参数列表

参数 类型 描述 默认值
$urls Array/string $urls
$timeout Int $timeout integer 10
返回值
  • Array
File: ./modules/httpclient/driver/fsock.class.php
protected function request_urls($urls, $timeout = 10)
{
    # 去重
    $urls = array_unique($urls);

    if (!$urls)return array();

    # 监听列表
    $listener_list = array();

    # 返回值
    $result = array();

    # 总列队数
    $list_num = 0;

    # 记录页面跳转数据
    $redirect_list = array();

    # 排队列表
    $multi_list = array();
    foreach ( $urls as $url )
    {
        if ( $this->multi_exec_num>0 && $list_num>=$this->multi_exec_num )
        {
            # 加入排队列表
            $multi_list[] = $url;
        }
        else
        {
            # 列队数控制
            $listener_list[] = array($url,$this->_create($url, $timeout));
            $list_num++;
        }

        $result[$url] = null;
        $this->http_data[$url] = null;
    }

    # 已完成数
    $done_num = 0;

    while($listener_list)
    {
        list($done_url, $f) = array_shift($listener_list);

        $time = microtime(1);
        $str = '';
        while (!feof($f))
        {
            $str .= fgets($f);
        }

        fclose($f);
        $time = microtime(1)-$time;

        list($header, $body) = explode("\r\n\r\n", $str, 2);

        $header_arr = explode("\r\n", $header);
        $first_line = array_shift($header_arr);

        if ( preg_match('#^HTTP/1.1 ([0-9]+) #', $first_line, $m) )
        {
            $code = $m[1];
        }
        else
        {
            $code = 0;
        }

        if( strpos($header, 'Transfer-Encoding: chunked') )
        {
            $body = explode("\r\n", $body);
            $body = array_slice($body, 1, -1);
            $body = implode('', $body);
        }

        if ( preg_match('#Location(?:[ ]*):([^\r]+)\r\n#Uis', $header , $m) )
        {
            if (count($redirect_list[$done_url])>=10)
            {
                # 防止跳转次数太大
                $body = $header = '';
                $code = 0;
            }
            else
            {
                # 302 跳转
                $new_url = trim($m[1]);
                $redirect_list[$done_url][] = $new_url;

                // 插入列队
                if (preg_match('#Set-Cookie(?:[ ]*):([^\r+])\r\n#is', $header , $m2))
                {
                    // 把cookie传递过去
                    $old_cookie    = $this->cookies;
                    $this->cookies = $m2[1];
                }

                array_unshift($listener_list, array($done_url , $this->_create($new_url, $timeout)));

                if (isset($old_cookie))
                {
                    $this->cookies = $old_cookie;
                }
                continue;
            }
        }

        $rs = array
        (
            'code'   => $code,
            'data'   => $body,
            'header' => $header_arr,
            'time'   => $time,
        );

        $this->http_data[$done_url] = $rs;

        if ($rs['code']!=200)
        {
            Core::debug()->error('URL:'.$done_url.' ERROR,TIME:' . $this->http_data[$done_url]['time'] . ',CODE:' . $this->http_data[$done_url]['code'] );
            $result[$done_url] = false;
        }
        else
        {
            Core::debug()->info('URL:'.$done_url.' OK.TIME:' . $this->http_data[$done_url]['time'] );
            $result[$done_url] = $rs['data'];
        }

        $done_num++;

        if ( $multi_list )
        {
            # 获取列队中的一条URL
            $current_url = array_shift($multi_list);

            # 更新监听列队信息
            $listener_list[] = array($current_url, $this->_create($current_url, $timeout));

            # 更新列队数
            $list_num++;
        }

        if ($done_num>=$list_num)break;
    }

    return $result;
}