文章采集api(终于找到解决方案了,这是一个值得庆祝的事情..)

优采云 发布时间: 2022-02-11 00:09

  文章采集api(终于找到解决方案了,这是一个值得庆祝的事情..)

  终于找到了解决办法,值得庆贺....

  原来是因为微信在源码中添加了反采集代码,把文章源码中的这一段去掉就好了!

  具体代码如下:

  public function getCon(){<br style="margin:0px;padding:0px;" /> header(&#39;Content-type: text/html; charset=utf-8&#39;);<br style="margin:0px;padding:0px;" /> import(&#39;Vendor.QL.QueryList&#39;);<br style="margin:0px;padding:0px;" /> $w_url=$_POST[&#39;wurl&#39;]; //接收到的文章地址<br style="margin:0px;padding:0px;" />// 测试文章地址<br style="margin:0px;padding:0px;" />// $w_url=&#39;http://mp.weixin.qq.com/s?__biz=MzA5NzQ5OTMxMA==&mid=2650621512&idx=1&sn=2059946e820805c0d62a450aa3af62be&chksm=88960789bfe18e9f47417eb45cd8efe458af9e93fea3e8e4e242ea2376fd3e4c69f5218293cb&scene=0#wechat_redirect&#39;;<br style="margin:0px;padding:0px;" />// echo "alert(&#39;".$w_url."&#39;);";<br style="margin:0px;padding:0px;" /> $html = file_get_contents($w_url); //获取文章源码并保存到参数中<br style="margin:0px;padding:0px;" />// echo "alert(&#39;".$html."&#39;);";<br style="margin:0px;padding:0px;" /> $html = str_replace("", "", $html); //去除微信中的抓取干扰代码<br style="margin:0px;padding:0px;" />// die($w_url);<br style="margin:0px;padding:0px;" /><br style="margin:0px;padding:0px;" />// var_dump($html);<br style="margin:0px;padding:0px;" /> $data = \QueryList::Query($html,array(<br style="margin:0px;padding:0px;" /> //采集规则库<br style="margin:0px;padding:0px;" /> //&#39;规则名&#39; => array(&#39;jQuery选择器&#39;,&#39;要采集的属性&#39;),<br style="margin:0px;padding:0px;" /> &#39;titleTag&#39; => array(&#39;title&#39;,&#39;text&#39;),<br style="margin:0px;padding:0px;" />// &#39;title&#39; => array(&#39;#activity-name&#39;,&#39;text&#39;),<br style="margin:0px;padding:0px;" /> &#39;content&#39; => array(&#39;body&#39;,&#39;text&#39;),<br style="margin:0px;padding:0px;" />// &#39;image&#39; => array(&#39;img&#39;,&#39;src&#39;),<br style="margin:0px;padding:0px;" /> //微信规则<br style="margin:0px;padding:0px;" /> &#39;contentWx&#39; => array(&#39;#js_content&#39;,&#39;text&#39;),<br style="margin:0px;padding:0px;" />// &#39;imageWx&#39; => array(&#39;img&#39;,&#39;data-src&#39;),<br style="margin:0px;padding:0px;" />// &#39;conText&#39; => array(&#39;.rich_media_content>p&#39;,&#39;text&#39;),<br style="margin:0px;padding:0px;" /> ))->data;<br style="margin:0px;padding:0px;" /> foreach ($data as $k => $v) {<br style="margin:0px;padding:0px;" /> $data[$k][&#39;imageWx&#39;] = $this->cut_str($v[&#39;imageWx&#39;],&#39;?&#39;,0);<br style="margin:0px;padding:0px;" /> }<br style="margin:0px;padding:0px;" />//打印结果<br style="margin:0px;padding:0px;" />// print_r($data);<br style="margin:0px;padding:0px;" /> $this->assign(&#39;conD&#39;,$data);<br style="margin:0px;padding:0px;" /> $this->display();<br style="margin:0px;padding:0px;" /><br style="margin:0px;padding:0px;" /> }

   String token = AccessTokenTool.getAccessToken();

String URL = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=ACCESS_TOKEN&openid=OPENID&lang=zh_CN";

// 原始json

String jsonResult = HttpUtil.sendGet(URL.replace("OPENID", openid).replace("ACCESS_TOKEN", token));

System.out.println(jsonResult);

// 编码后的json

String json = new String(jsonResult.getBytes("ISO-8859-1"), "UTF-8");

System.out.println(json);

  坐下来输入代码。没有什么技能不经过多年的深思熟虑就能轻易做到的

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线