文章采集api(终于找到解决方案了,这是一个值得庆祝的事情..)
优采云 发布时间: 2022-02-11 00:09文章采集api(终于找到解决方案了,这是一个值得庆祝的事情..)
终于找到了解决办法,值得庆贺....
原来是因为微信在源码中添加了反采集代码,把文章源码中的这一段去掉就好了!
具体代码如下:
public function getCon(){<br style="margin:0px;padding:0px;" /> header('Content-type: text/html; charset=utf-8');<br style="margin:0px;padding:0px;" /> import('Vendor.QL.QueryList');<br style="margin:0px;padding:0px;" /> $w_url=$_POST['wurl']; //接收到的文章地址<br style="margin:0px;padding:0px;" />// 测试文章地址<br style="margin:0px;padding:0px;" />// $w_url='http://mp.weixin.qq.com/s?__biz=MzA5NzQ5OTMxMA==&mid=2650621512&idx=1&sn=2059946e820805c0d62a450aa3af62be&chksm=88960789bfe18e9f47417eb45cd8efe458af9e93fea3e8e4e242ea2376fd3e4c69f5218293cb&scene=0#wechat_redirect';<br style="margin:0px;padding:0px;" />// echo "alert('".$w_url."');";<br style="margin:0px;padding:0px;" /> $html = file_get_contents($w_url); //获取文章源码并保存到参数中<br style="margin:0px;padding:0px;" />// echo "alert('".$html."');";<br style="margin:0px;padding:0px;" /> $html = str_replace("", "", $html); //去除微信中的抓取干扰代码<br style="margin:0px;padding:0px;" />// die($w_url);<br style="margin:0px;padding:0px;" /><br style="margin:0px;padding:0px;" />// var_dump($html);<br style="margin:0px;padding:0px;" /> $data = \QueryList::Query($html,array(<br style="margin:0px;padding:0px;" /> //采集规则库<br style="margin:0px;padding:0px;" /> //'规则名' => array('jQuery选择器','要采集的属性'),<br style="margin:0px;padding:0px;" /> 'titleTag' => array('title','text'),<br style="margin:0px;padding:0px;" />// 'title' => array('#activity-name','text'),<br style="margin:0px;padding:0px;" /> 'content' => array('body','text'),<br style="margin:0px;padding:0px;" />// 'image' => array('img','src'),<br style="margin:0px;padding:0px;" /> //微信规则<br style="margin:0px;padding:0px;" /> 'contentWx' => array('#js_content','text'),<br style="margin:0px;padding:0px;" />// 'imageWx' => array('img','data-src'),<br style="margin:0px;padding:0px;" />// 'conText' => array('.rich_media_content>p','text'),<br style="margin:0px;padding:0px;" /> ))->data;<br style="margin:0px;padding:0px;" /> foreach ($data as $k => $v) {<br style="margin:0px;padding:0px;" /> $data[$k]['imageWx'] = $this->cut_str($v['imageWx'],'?',0);<br style="margin:0px;padding:0px;" /> }<br style="margin:0px;padding:0px;" />//打印结果<br style="margin:0px;padding:0px;" />// print_r($data);<br style="margin:0px;padding:0px;" /> $this->assign('conD',$data);<br style="margin:0px;padding:0px;" /> $this->display();<br style="margin:0px;padding:0px;" /><br style="margin:0px;padding:0px;" /> }
String token = AccessTokenTool.getAccessToken();
String URL = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=ACCESS_TOKEN&openid=OPENID&lang=zh_CN";
// 原始json
String jsonResult = HttpUtil.sendGet(URL.replace("OPENID", openid).replace("ACCESS_TOKEN", token));
System.out.println(jsonResult);
// 编码后的json
String json = new String(jsonResult.getBytes("ISO-8859-1"), "UTF-8");
System.out.println(json);
坐下来输入代码。没有什么技能不经过多年的深思熟虑就能轻易做到的