与token的代码与上篇文章衔接起来代码的流程
优采云 发布时间: 2021-08-04 00:37与token的代码与上篇文章衔接起来代码的流程
获取网址
在我们解决cookie和token之前,那我们只需要把这两个参数放到请求头中就可以得到我们想要的
首先搜索公司账号名称
这里打开素材管理,点击新建图形素材
点击超链接。转到此页面
在此处提取url、headers和参数并找出更改的参数
random参数是0-1之间的随机浮点数,query是搜索内容,前面提取token
接下来我们看看获取文章的请求和接口
我们对页面的分析完成了,接下来我们开始编写代码,这次的代码是和上一篇文章的代码连接起来的
import time
import json
import random
import csv
from selenium import webdriver
from lxml import html
import requests
import re
# 获取cookies和token
class C_ookie:
# 初始化
def __init__(self):
self.html = ''
# 获取cookie
def get_cookie(self):
cooki = {}
url = 'https://mp.weixin.qq.com'
Browner = webdriver.Chrome()
Browner.get(url)
# 获取账号输入框
ID = Browner.find_element_by_name('account')
# 获取密码输入框
PW = Browner.find_element_by_name('password')
# 输入账号
#输入账号
id =
#输入密码
pw =
# id = input('请输入账号:')
# pw = input('请输入密码:')
ID.send_keys(id)
PW.send_keys(pw)
# 获取登录button,点击登录
Browner.find_element_by_class_name('btn_login').click()
# 等待扫二维码
time.sleep(10)
cks = Browner.get_cookies()
for ck in cks:
cooki[ck['name']] = ck['value']
ck1 = json.dumps(cooki)
print(ck1)
with open('ck.txt','w') as f :
f.write(ck1)
f.close()
self.html = Browner.page_source
# 获取文章
class getEssay:
def __init__(self):
# 获取cookies
with open('ck.txt','r') as f :
cookie = f.read()
f.close()
self.cookie = json.loads(cookie)
# 获取token
self.header = {
"HOST": "mp.weixin.qq.com",
"User-Agent": 'Mozilla / 5.0(WindowsNT6.1;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 74.0.3729.131Safari / 537.36'
}
m_url = 'https://mp.weixin.qq.com'
response = requests.get(url=m_url, cookies=self.cookie)
self.token = re.findall(r'token=(\d+)', str(response.url))[0]
# fakeid与name
self.fakeid = []
# 获取公众号信息
def getGname(self):
# 请求头
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'mp.weixin.qq.com',
'Referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&token=%d&lang=zh_CN'%int(self.token),
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
# 地址
url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
# query = input('请输入要搜索的公众号关键字:')
# begin = int(input('请输入开始的页数:'))
query = 'python'
begin = 0
begin *= 5
# 请求参数
data = {
'action': 'search_biz',
'token': self.token,
'lang': 'zh_CN',
'f': 'json',
'ajax':' 1',
'random': random.random(),
'query': query,
'begin': begin,
'count': '5'
}
# 请求页面,获取数据
res = requests.get(url=url, cookies=self.cookie, headers=headers, params=data)
name_js = res.text
name_js = json.loads(name_js)
list = name_js['list']
for i in list:
time.sleep(1)
fakeid = i['fakeid']
nickname =i['nickname']
print(nickname,fakeid)
self.fakeid.append((nickname,fakeid))
# 获取文章url
def getEurl(self):
url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
headers = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Host': 'mp.weixin.qq.com',
'Referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&token=%d&lang=zh_CN'%int(self.token),
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
# 遍历fakeid,访问获取文章链接
for i in self.fakeid:
time.sleep(1)
fake = i[1]
data = {
'token': self.token,
'lang': 'zh_CN',
'f': 'json',
'ajax': '1',
'random': random.random(),
'action': 'list_ex',
'begin': 0,
'count': 5,
'fakeid': fake,
'type': 9
}
res = requests.get(url, cookies=self.cookie, headers=headers, params=data)
js = res.text
link_l = json.loads(js)
self.parJson(link_l)
# 解析提取url
def parJson(self,link_l):
l = link_l['app_msg_list']
for i in l:
link = i['link']
name = i['digest']
self.saveData(name,link)
# 保存数据进csv中
def saveData(self,name,link):
with open('link.csv' ,'a',encoding='utf8') as f:
w = csv.writer(f)
w.writerow((name,link))
print('ok')
C = C_ookie()
C.get_cookie()
G = getEssay()
G.getGname()
G.getEurl()
整个爬取过程就到这里,希望能帮到你