网页表格抓取(【每日一题】法比奥布拉加托的使用和提取表格 )
优采云 发布时间: 2021-11-07 23:02网页表格抓取(【每日一题】法比奥布拉加托的使用和提取表格
)
法比奥布拉加托 1
为什么当我使用 .getElementsByTagName 方法提取表格时,它没有提取表格中收录的所有数据?请注意,这是向下滚动页面。
Public Sub getHistoricCotation()
Dim mainURL As String
Dim elem As Object, tRow As Object
Dim S, R, C
Dim initial_date As String, final_date As String
Dim stock As String
initial_date = DateDiff("s", "1/1/1970 00:00:00", ufHistorico.txtDtInicial) + 86400
final_date = DateDiff("s", "1/1/1970 00:00:00", ufHistorico.txtDtFinal) + 86400
stock = ufHistorico.cbAcoes.Text
mainURL = "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & initial_date & "&period2=" & final_date & "&interval=1d&filter=history&frequency=1d"
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", mainURL, False
strCookie = .getAllResponseHeaders
strCookie = Split(Split(strCookie, "Cookie:")(1), ";")(0)
.Open "GET", mainURL, False
.setRequestHeader "Cookie", strCookie
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
.send
S = .responseText
End With
With CreateObject("htmlfile")
.body.innerHTML = S
For Each elem In .getElementsByTagName("tr")
For Each tRow In elem.Cells
C = C + 1: Cells(R + 1, C) = tRow.innerText
Next tRow
C = 0: R = R + 1
Next elem
End With
End Sub
SIM卡
脚本中可以解析的部分是静态的,但不能解析的部分是动态生成的。然而,好消息是该表的所有内容都可以在页面源代码中的某些脚本标签内找到。我已经创建了一个脚本来从那里挖掘出所需的部分。您现在要做的就是使用任何 json 转换器或正则表达式来处理内容。
这是它从那里获取所有相关数据的方式:
Sub FetchHistoricalPrice()
Const mainUrl$ = "https://finance.yahoo.com/quote/MGLU3.SA/history?p=MGLU3.SA"
Dim S$, Elem As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", mainUrl, False
.send
S = .responseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.Pattern = "HistoricalPriceStore[\s\S]+prices[^[]+(.*?]),"
Set Elem = .Execute(S)
If Elem.Count > 0 Then
Debug.Print Elem(0).SubMatches(0)
End If
End With
End Sub