网页抓取港股实时行情(港股所有的行情数据,如何获取港交所页面的Token?)

优采云 发布时间: 2021-10-09 17:23

  网页抓取港股实时行情(港股所有的行情数据,如何获取港交所页面的Token?)

  GitHub地址:%E8%AF%BB%E5%8F%96%E6%B8%AF%E4%BA%A4%E6%89%80%E6%95%B0%E6%8D%AE.py

  爬取港交所数据最大的问题是如何在港交所页面获取token。拿到token后,就可以在港交所界面请求数据了。

  下面的python首先解析港交所页面,从页面中获取港交所Token值,然后请求返回的数据。数据格式与Json类似,但需要稍加处理,可以用Json解析。

  完整的json数据如下:

  {{

"data": {

"responsecode": "000",

"responsemsg": "",

"quote": {

"hi": "74.350",

"rs_stock_flag": false,

"fiscal_year_end": "31 Dec 2018",

"hist_closedate": "30 May 2019",

"replication_method": null,

"amt_os": "3,856,240,500",

"primaryexch": "HKEX",

"ric": "0001.HK",

"product_subtype": null,

"db_updatetime": "31 May 2019 09:36",

"mkt_cap_u": "B",

"am_u": "M",

"ew_sub_right": "",

"secondary_listing": false,

"ew_amt_os_cur": null,

"ccy": "HKD",

"management_fee": "",

"ew_underlying_code": null,

"trdstatus": "N",

"nav": "",

"original_offer_price": "",

"issue": "",

"asset_class": null,

"eps": 10.1109,

"inline_upper_strike_price": "",

"sedol": "BW9P816",

"am": "697.27",

"iv": "",

"ew_strike": "",

"as": "74.100",

"geographic_focus": null,

"incorpin": "Cayman Islands",

"etp_baseCur": null,

"ew_amt_os": "",

"bd": "74.050",

"registrar": "Computershare Hong Kong Investor Services Ltd.",

"depositary": null,

"exotic_type": null,

"callput_indicator": null,

"primary_market": null,

"underlying_index": null,

"lot": "500",

"lo52": "72.800",

"shares_issued_date": "30 Apr 2019",

"premium": "",

"strike_price_ccy": null,

"yield": "",

"vo_u": "M",

"base_currency": null,

"coupon": "",

"expiry_date": "",

"chairman": "Li Tzar Kuoi Victor",

"underlying_ric": "0001.HK",

"hi52": "92.500",

"issuer_name": "CK Hutchison Holdings Ltd.",

"h_share_flag": false,

"ew_sub_per_from": "",

"div_yield": "4.28",

"interest_payment_date": "-",

"updatetime": "31 May 2019 16:08",

"aum_date": "",

"lo": "73.050",

"mkt_cap": "285.55",

"f_aum_hkd": null,

"ew_sub_per_to": "",

"ls": "74.050",

"nav_date": "",

"csic_classification": null,

"floating_flag": false,

"issued_shares_note": null,

"eff_gear": "",

"board_lot_nominal": "",

"hsic_ind_classification": "Conglomerates - Conglomerates",

"ew_desc": null,

"inception_date": "",

"nc": "+1.050",

"aum": "",

"vo": "9.41",

"secondary_listing_flag": false,

"listing_date": "1 Nov 1972",

"as_at_label": "as at",

"ew_amt_os_dat": "",

"nm": "CK Hutchison Holdings Ltd.",

"nm_s": "CKH HOLDINGS",

"sym": "1",

"inline_lower_strike_price": "",

"listing_category": "Primary Listing",

"ew_strike_cur": null,

"exotic_warrant_indicator": null,

"investment_focus": null,

"call_price": "",

"tck": "0.050",

"strike_price": "",

"summary": "CK Hutchison Holdings Limited is an investment holding company mainly engaged in the retail business. Along with subsidiaries, the Company operates its business through five segments: the Retail segment, the Telecommunications segment, the Infrastructure segment, the Ports and Related Services segment, and the Husky Energy segment. The Retail segment is involved in the manufacturing and sale of health and beauty products, as well as consumer electronics and electrical appliances. It also operates supermarkets, as well as manufactures and distributes bottled water and beverage products. The Telecommunications segment provides mobile telecommunications and data services by 3 Group Europe, Hutchison Telecommunications Hong Kong Holdings, and Hutchison Asia Telecommunications. The Infrastructure segment is involved in the energy infrastructure, transportation infrastructure, water infrastructure, waste management, waste-to-energy and infrastructure related businesses.",

"op": "73.050",

"aum_u": "",

"nav_ccy": null,

"os": "",

"wnt_gear": "",

"transfer_of_listing_date": "",

"hsic_sub_sector_classification": "Conglomerates",

"amt_ccy": null,

"domicile_country": null,

"entitlement_ratio": "",

"product_type": "EQTY",

"office_address": "48th Floor

Cheung Kong Center

2 Queen's Road Central

Hong Kong",

"pc": "+1.44",

"days_to_expiry": null,

"underlying_code": null,

"pe": "7.32",

"eps_ccy": "HKD",

"hdr": false,

"launch_date": "",

"hc": "73.000",

"isin": "KYG217651051",

"moneyness": ""

}

},

"qid": "NULL"

}}

  在程序中,我随机选取了几个字段进行输出

  #coding=utf-8

#!/usr/bin/python

# 导入requests库

import requests

# 导入文件操作库

import os

import re

import bs4

from bs4 import BeautifulSoup

import sys

import json

# 主方法

def main():

# 给请求指定一个请求头来模拟chrome浏览器

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}

page_max = 100

house = 'https://www.hkex.com.hk/?sc_lang=EN'

res = requests.get(house, headers=headers)

soup = BeautifulSoup(res.text, 'html.parser')

#print(re.search('Base64-AES-Encrypted-Token',soup.text).span())

#print (soup.text[2438:2465])

#定位到Base64-AES-Encrypted-Token

num = re.search('Base64-AES-Encrypted-Token',soup.text).span()

print(num)

print(num[1])

#从定位点向后取120个字符

numstr = soup.text[num[1]:num[1]+120]

#print (re.search('return',numstr).span())

#在120个字符中定位到return

num1 = re.search('return',numstr).span()

#从return的定位+2 向后取 100个字符

numstr1 = numstr[num1[1]+2:num1[1]+100]

print(numstr1)

news = ''

#对100个字符遍历,找到引号内的token

for s in range(len(numstr1)):

if numstr1[s] != '"':

news = news+ numstr1[s]

print(news)

else:

print('找到了token:'+news)

result = 'https://www1.hkex.com.hk/hkexwidget/data/getequityquote?sym=1&token=%s&lang=eng&qid=NULL&callback=NULL' %news

print(result)

break

res = requests.get(result, headers=headers)

soup1 = BeautifulSoup(res.text, 'html.parser')

print(json.loads(soup1.text[5:len(soup1.text)-1]))

jsonstr = json.loads(soup1.text[5:len(soup1.text)-1])

print(jsonstr['data']['quote']['hi'])

print(jsonstr['data']['quote']['db_updatetime'])

print(jsonstr['data']['quote']['amt_os'])

print(jsonstr['data']['quote']['ric'])

print(jsonstr['data']['quote']['primaryexch'])

#WriteTxt(json.loads(soup1.text[5:len(soup1.text)-1]), 'D:/', 'bbb1')

#存储在任意路径 , message:消息内容 , path:文件路径 , filmname:文件名

def WriteTxt( message, path, filmname):

strMessage = '\n' #+ time.strftime('%Y-%m-%d %H:%M:%S')

strMessage += ':\n%s' % message

fileName = os.path.join(path, "_" + filmname + '.txt')

with open(fileName, 'a', encoding='utf-8') as f:

f.write(strMessage)

if __name__ == '__main__':

main()

  输出如下:

  74.350

31 May 2019 09:36

3,856,240,500

0001.HK

HKEX

  有问题要谈。在这篇文章中,我只是以00001代码为例,所以我向接口发出请求,只返回了一个代码的数据。如果要每天批量爬取所有港股行情数据,首先要建立一个所有港股股票的代码表,通过遍历代码表检索每个股票代码对应的数据。

  获取数据的核心请求链接是:

  %s&lang=eng&qid=NULL&callback=NULL

  其中,链接中sym=1的地方就是对应的股票代码,这里的股票代码是00001,链接中前面的零都要去掉。同理,如果要获取股票代码00002的数据,那么需要在链接中写sym=2

  每次替换sym后对应的数字,就可以得到对应股票的行情数据。

  C#版:使用C#爬取港交所股市数据-附C#源码

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线