<?php
//采集http://www.iq888.com/joke/45/43275.htm
set_time_limit(0);
header('Content-type: text/html; charset=utf-8');
$log="./log.txt";
$error="./error.txt";
$sql_file="./".time().".sql";
if (!file_exists($sql_file))
{
$handle=fopen($sql_file,"w");
fclose($handle);
}
if (!file_exists($error))
{
$handle=fopen($error,"w");
fclose($handle);
}
require_once("Snoopy.class.php");
$snoopy=new Snoopy();
if (file_exists($log))
{
//读取最后一次采集的条数,$i,$j
$st=file_get_contents($log);
if ($st)
{
$tt=@explode("_",$st);
$i=$tt[0];
$j=$tt[1];
}
}
if (!isset($i))
{
$i=1;
}
if (!isset($j))
{
$j=1;
}
$error=array();//存放错误记录
$now='';//存放当前采集的当前记录
for (;$i<10;$i++)//手动改
{
$titles='';
$title=array();
$come='';
$newcontent='';
for (;$j<100;$j++)//手动改
{
$snoopy->fetch("http://www.iq888.com/joke/{$i}/{$j}.htm");
$code=substr(trim($snoopy->response_code),-2,2);
if($code=='OK')
{
//判定是否正确获取到网页
$content=$snoopy->results;//取得的内容
//获取标题和内容
preg_match_all ("|<p class=\"containtit\">(.*)</p>|Uis",$content,$title, PREG_PATTERN_ORDER);
preg_match_all ("|<p id=\"ptxt\">(.*)</p>|Uis",$content,$cc, PREG_PATTERN_ORDER);
$titles=trim(strip_tags($title[1][0]));
$time=time();
$newcontent=trim($cc[1][0]);
$sql.="INSERT INTO `boblog_blogs` (`blogid`, `title`, `pubtime`, `authorid`, `replies`, `tbs`, `views`, `property`, `category`, `tags`, `sticky`, `htmlstat`, `ubbstat`, `emotstat`, `content`, `editorid`, `edittime`, `weather`, `mobile`, `pinged`, `permitgp`, `starred`, `blogpsw`, `frontpage`, `entrysummary`, `comefrom`, `originsrc`, `blogalias`) VALUES
('".$j."','".$titles."','".time()."',1, 0, 0, 0, 0, 0, '', 0, 0, 1, 1, '".$newcontent."', 0, 0, 'sunny', 0, '', '', 0, '', 0, '', '', '', '');
";
//写入sql文件
if (filesize($sql_file)>(2*1024*1024))
{
//文件超过2M,新建文件
$sql_file=substr($sql_file,-4,4)."_1.sql";
$handle=fopen($sql_file,"w");
fclose($handle);
}
$handle=fopen($sql_file,"a+");
fwrite($handle,$sql);
fclose($handle);
//写入日志文件
$now=$i."_".$j;
$handle=fopen($log,"w");
fwrite($handle,$now);
fclose($handle);
}
else
{
//输出错误到文件
$error[$i][$j]=$j;
$handle=fopen($error,'a+');
fwrite($handle,serialize($error));
fclose($handle);
}
}
}
echo time()."采集完成";
?>

