hous365的房源信息的采集
自己以前写的一个采集程序比较简单
<?phpinclude('global.php');$conn = new db();if(isset($_GET['company'])){$company = $_GET['company'];}else{$company = '';}if(isset($_GET['memberid'])){$memberid = $_GET['memberid'];}else{$memberid = '';}if(isset($_GET['zj_num'])){$zj_num = $_GET['zj_num'];}else{$zj_num = '';}if(isset($_GET['zj_code'])){$zj_code = $_GET['zj_code'];}else{$zj_code = '';}$url="http://zsb.house365.com/main.php?infotype=0&price=0&buildarea=0&district=0&keyword=&order_=1&page=".$zj_num."&agentcode=".$zj_code."&pkind=selllist&roomtype=&topic=&order=";$text = @file_get_contents($url);preg_match_all('/<td align="center" valign="middle"><a href=\'(.*?)\' target=\'_blank\' title="(.*?)"><img src="http:\/\/sell.house365.com\/images\/sellesflist_12.gif" width="77" height="18" border="0" \/><\/a><\/td>/i',$text,$row);$num_all = 0;$len = count($row[1]);//$len = 1;for($i=0;$i<$len;$i++){ $mrent = array(); $url1 = $row[1][$i]; $text1 = @file_get_contents($url1); $mrent['coltype'] = 'second'; $mrent['menuid'] = '10'; $mrent['memberid'] = $memberid; $mrent['memberprop'] = '2'; $mrent['shangquan'] = '0'; $mrent['infotype'] = 'sale'; $mrent['jz'] = '1'; $mrent['menuid'] ='10'; $mrent['author'] = $company;//iconv("UTF-8", "GBK", $company); //房源名称 preg_match_all('/<td colspan="2" => "住宅" , "31" =>"写字楼" , "32" =>"商铺" , "33" =>"别墅" , ); if(in_array($housetype,$cat_arr)){ $catid = array_search($housetype,$cat_arr); }else{ $catid = 0; } $mrent['catid'] = $catid; //更新时间 preg_match_all('/<td => "玄武区", "45" => "鼓楼区", "48" => "白下区", "49" => "建邺区", "47" => "秦淮区", "44" => "下关区", "51" => "雨花台区", "50" => "栖霞区", "52" => "江宁区", "53" => "浦口区", "54" => "六合区", "55" => "溧水县", "56" => "高淳县", "60" => "其它", ); $zoneid = array_search($cat,$district); $mrent['zoneid'] = $zoneid; //板块 preg_match_all('/<td width="150" align="center"><strong><span>(.*?)<\/span><\/strong><\/td>\s*<td width="185">联系人:<span>(.*?)<\/span><\/td>/i',$text1,$lxr_arr); $lxr_tel = trim($lxr_arr[1][0]); $lxr_name = trim($lxr_arr[2][0]); $mrent['lxr'] = iconv( "GBK","UTF-8", $lxr_name); $th_tel = array('<span style="font-size:14px">','</span>'); $lxr_tel = str_replace($th_tel,"",$lxr_tel); $tel_arr = explode("-",$lxr_tel); for($tj=0;$tj<count($tel_arr);$tj++) { $tel_len = strlen($tel_arr[$tj]); if($tel_len==11) {$mrent['lxdh'] = $tel_arr[$tj]; //$mrent['lxrshouji'] = $tel_arr[$tj]; }else{ $mrent['lxdh'] = $tel_arr[$tj]; } } //详细信息 preg_match_all('/<div : $facnet = 1; case "管道煤气": $facgas = 1; case "有线电视": $factvnet = 1; case "电话" : $facphone = 1; case "冰箱" : $facfridge = 1; case "电视机" : $factv = 1; case "洗衣机" : $facwasher = 1; case "热水器" : $facwheater = 1; case "空调" : $facaircon = 1; case "家具" : $facfurniture = 1; } } $mrent['facfurniture'] = $facfurniture; $mrent['factvnet'] = $factvnet; $mrent['factv'] = $factv; $mrent['facnet'] = $facnet; $mrent['facphone'] = $facphone; $mrent['facwheater'] = $facwheater; $mrent['facaircon'] = $facaircon; $mrent['facwasher'] = $facwasher; $mrent['facfridge'] = $facfridge; $mrent['facgas'] = $facgas; if($mrent['loupan']!='') { $sql = "SELECT id FROM `hou_mrent` where memberid =$memberid loupan ='".$mrent['loupan']."'"; $chk = $conn-> Query2SingleRowArray($sql); if($chk['id']==''){ $conn ->insert('hou_mrent',$mrent,$debug = true); $rows = $conn->GetQueryAffectedRows();if($rows>0){$num_all++;} } }}$note = "抓取完成,本页一共抓取".$num_all."条房源";$note = iconv("GBK", "UTF-8", $note); ?> <script type="text/javascript">parent.document.all('note').innerHTML="<?=$note?>";parent.document.all('btn_sc').disabled="";parent.document.all('btn_zq').disabled="";</script>
?