file_get_contents 设置代理抓取页面

普通页面获取

$url = "http://www.epooll.com/archives/806/";
$contents = file_get_contents($url);
preg_match_all("/<h1>(.*?)</h1>/is", $content, $matchs);
print_r($matchs[0]);

设置代理IP去采集数据

$context = array( 
    'http' => array( 
        'proxy' => 'tcp://192.168.0.2:3128',  //这里设置你要使用的代理ip及端口号 
        'request_fulluri' => true, 
    ), 
); 
$context = stream_context_create($context); 
$html = file_get_contents("http://www.epooll.com/archives/806/", false, $context); 
echo $html;

设置需要验证的代理IP去采集数据

$auth = base64_encode('USER:PASS');   //LOGIN:PASSWORD 这里是代理服务器的账户名及密码 
$context = array( 
    'http' => array( 
        'proxy' => 'tcp://192.168.0.2:3128',  //这里设置你要使用的代理ip及端口号 
        'request_fulluri' => true, 
        'header' => "Proxy-Authorization: Basic $auth",
    ), 
); 
$context = stream_context_create($context); 
$html = file_get_contents("http://www.epooll.com/archives/806/", false, $context); 
echo $html;

results matching ""

    No results matching ""