PHP利用正则表达式抓取页面数据
(记录一下,00后程序员第一天写博客,/4/1)
(抓取招头标网站中的数据)
<?phpheader('Content-Type:text/html;Cache-control:private;charset=utf-8');ini_set('mbstring.internal_encoding',"utf-8");date_default_timezone_set ('Asia/Shanghai');//抓取$url = "http://gpcgd./bsfw/cgxx/cgxxgg/index.html";$contents = file_get_contents($url);$encode = mb_detect_encoding($contents, array("ASCII","UTF-8","GB2312","GBK","BIG5")); if( $encode != 'UTF-8' ){$contents = iconv($encode, "utf-8",$contents);}//echo $contents;$preg='/<a href=\'(.*?)\'>(.*?)<\/a>/';preg_match_all($preg,$contents,$array); //$array[1]为链接,$array[2]为标题foreach($array[1] as $key => $val){echo $val.' '.$array[2][$key].'<br>';}//echo "<pre>";//print_r($array);//echo "</pre>";die();