与token的代码与上篇文章衔接起来代码的流程

优采云 发布时间: 2021-08-04 00:37

  与token的代码与上篇文章衔接起来代码的流程

  获取网址

  在我们解决cookie和token之前,那我们只需要把这两个参数放到请求头中就可以得到我们想要的

  首先搜索公司账号名称

  

  这里打开素材管理,点击新建图形素材

  

  

  点击超链接。转到此页面

  

  在此处提取url、headers和参数并找出更改的参数

  random参数是0-1之间的随机浮点数,query是搜索内容,前面提取token

  接下来我们看看获取文章的请求和接口

  

  我们对页面的分析完成了,接下来我们开始编写代码,这次的代码是和上一篇文章的代码连接起来的

  import time

import json

import random

import csv

from selenium import webdriver

from lxml import html

import requests

import re

# 获取cookies和token

class C_ookie:

# 初始化

def __init__(self):

self.html = ''

# 获取cookie

def get_cookie(self):

cooki = {}

url = 'https://mp.weixin.qq.com'

Browner = webdriver.Chrome()

Browner.get(url)

# 获取账号输入框

ID = Browner.find_element_by_name('account')

# 获取密码输入框

PW = Browner.find_element_by_name('password')

# 输入账号

#输入账号

id =

#输入密码

pw =

# id = input('请输入账号:')

# pw = input('请输入密码:')

ID.send_keys(id)

PW.send_keys(pw)

# 获取登录button,点击登录

Browner.find_element_by_class_name('btn_login').click()

# 等待扫二维码

time.sleep(10)

cks = Browner.get_cookies()

for ck in cks:

cooki[ck['name']] = ck['value']

ck1 = json.dumps(cooki)

print(ck1)

with open('ck.txt','w') as f :

f.write(ck1)

f.close()

self.html = Browner.page_source

# 获取文章

class getEssay:

def __init__(self):

# 获取cookies

with open('ck.txt','r') as f :

cookie = f.read()

f.close()

self.cookie = json.loads(cookie)

# 获取token

self.header = {

"HOST": "mp.weixin.qq.com",

"User-Agent": 'Mozilla / 5.0(WindowsNT6.1;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 74.0.3729.131Safari / 537.36'

}

m_url = 'https://mp.weixin.qq.com'

response = requests.get(url=m_url, cookies=self.cookie)

self.token = re.findall(r'token=(\d+)', str(response.url))[0]

# fakeid与name

self.fakeid = []

# 获取公众号信息

def getGname(self):

# 请求头

headers = {

'Accept': 'application/json, text/javascript, */*; q=0.01',

'Accept-Encoding': 'gzip, deflate, br',

'Accept-Language': 'zh-CN,zh;q=0.9',

'Connection': 'keep-alive',

'Host': 'mp.weixin.qq.com',

'Referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&token=%d&lang=zh_CN'%int(self.token),

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',

'X-Requested-With': 'XMLHttpRequest'

}

# 地址

url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'

# query = input('请输入要搜索的公众号关键字:')

# begin = int(input('请输入开始的页数:'))

query = 'python'

begin = 0

begin *= 5

# 请求参数

data = {

'action': 'search_biz',

'token': self.token,

'lang': 'zh_CN',

'f': 'json',

'ajax':' 1',

'random': random.random(),

'query': query,

'begin': begin,

'count': '5'

}

# 请求页面,获取数据

res = requests.get(url=url, cookies=self.cookie, headers=headers, params=data)

name_js = res.text

name_js = json.loads(name_js)

list = name_js['list']

for i in list:

time.sleep(1)

fakeid = i['fakeid']

nickname =i['nickname']

print(nickname,fakeid)

self.fakeid.append((nickname,fakeid))

# 获取文章url

def getEurl(self):

url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'

headers = {

'Accept': 'application/json, text/javascript, */*; q=0.01',

'Accept-Encoding': 'gzip, deflate, br',

'Accept-Language': 'zh-CN,zh;q=0.9',

'Connection': 'keep-alive',

'Host': 'mp.weixin.qq.com',

'Referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=10&token=%d&lang=zh_CN'%int(self.token),

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',

'X-Requested-With': 'XMLHttpRequest'

}

# 遍历fakeid,访问获取文章链接

for i in self.fakeid:

time.sleep(1)

fake = i[1]

data = {

'token': self.token,

'lang': 'zh_CN',

'f': 'json',

'ajax': '1',

'random': random.random(),

'action': 'list_ex',

'begin': 0,

'count': 5,

'fakeid': fake,

'type': 9

}

res = requests.get(url, cookies=self.cookie, headers=headers, params=data)

js = res.text

link_l = json.loads(js)

self.parJson(link_l)

# 解析提取url

def parJson(self,link_l):

l = link_l['app_msg_list']

for i in l:

link = i['link']

name = i['digest']

self.saveData(name,link)

# 保存数据进csv中

def saveData(self,name,link):

with open('link.csv' ,'a',encoding='utf8') as f:

w = csv.writer(f)

w.writerow((name,link))

print('ok')

C = C_ookie()

C.get_cookie()

G = getEssay()

G.getGname()

G.getEurl()

  整个爬取过程就到这里,希望能帮到你

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线