vba xmlhttp 抓取网页(我想从单个网站页面(带有XMLHTTP请求)中抓取一个)

优采云 发布时间: 2021-12-26 07:12

  vba xmlhttp 抓取网页(我想从单个网站页面(带有XMLHTTP请求)中抓取一个)

  我想从单个网站页面(使用 XML HTTP 请求)抓取网站(以提取产品价格)。但在运行此脚本之前,我需要首先选择正确的商店(存储在浏览器 cookie 变量中,或者如果可能,以任何其他方式/请求包括在内),因为价格因商店而异。

  我已经创建了一个有效的代码,但是需要很长时间才能运行,所以我认为必须有一种更快更干净的方法:)。我还需要收录

应用程序以等待网站执行这些步骤。

  我当前的 VBA 代码:

  选择正确的存储(并将其保存在浏览器 cookie 中)

   Sub SetStore()

Dim IE As New SHDocVw.InternetExplorer

Dim HTMLDoc As MSHTML.HTMLDocument

Dim HTMLSearchbox As MSHTML.IHTMLElement

Dim HTMLSearchboxes As MSHTML.IHTMLElementCollection

Dim HTMLButton As MSHTML.IHTMLElement

Dim HTMLButtons As MSHTML.IHTMLElementCollection

Dim HTMLSearchButton As MSHTML.IHTMLElement

Dim HTMLSearchButtons As MSHTML.IHTMLElementCollection

Dim HTMLStoreID As MSHTML.IHTMLElement

Dim HTMLStoreIDs As MSHTML.IHTMLElementCollection

Dim HTMLSaveStore As MSHTML.IHTMLElement

Dim HTMLSaveStores As MSHTML.IHTMLElementCollection

'set on False to hide IE screen

IE.Visible = True

'navigate to url with limited content

IE.navigate "https://www.jumbo.com/content/algemene-voorwaarden/"

Do While IE.readyState READYSTATE_COMPLETE

Loop

Set HTMLDoc = IE.document

Set HTMLButtons = HTMLDoc.getElementsByTagName("button")

For Each HTMLButton In HTMLButtons

If HTMLButton.getAttribute("data-jum-action") = "openHomeStoreFinder" Then

HTMLButton.Click

Exit For

End If

Next HTMLButton

Application.Wait Now + #12:00:02 AM#

Set HTMLSearchboxes = HTMLDoc.getElementsByTagName("input")

For Each HTMLSearchbox In HTMLSearchboxes

If HTMLSearchbox.getAttribute("id") = "searchTerm__DkKYx4XylsAAAFJktpb2Guy" Then

'input field store name/location to show search results

HTMLSearchbox.Value = "Oosterhout"

Application.Wait Now + #12:00:03 AM#

HTMLSearchbox.Click

Exit For

End If

Next HTMLSearchbox

Set HTMLSearchButtons = HTMLDoc.getElementsByTagName("button")

For Each HTMLSearchButton In HTMLSearchButtons

If HTMLSearchButton.getAttribute("data-jum-filter") = "search" Then

HTMLSearchButton.Click

Exit For

End If

Next HTMLSearchButton

Application.Wait Now + #12:00:05 AM#

Set HTMLStoreIDs = HTMLDoc.getElementsByTagName("li")

For Each HTMLStoreID In HTMLStoreIDs

'oosterhout = YC8KYx4XB88AAAFIDcIYwKxJ

'nieuwegein = 84IKYx4XziUAAAFInSYYwKrH

'vaassen = JYYKYx4XC1oAAAFItvcYwKxJ

'brielle = OG8KYx4XP4wAAAFIlsEYwKxK

If HTMLStoreID.getAttribute("data-jum-store-id") = "YC8KYx4XB88AAAFIDcIYwKxJ" Then

HTMLStoreID.Click

Application.Wait Now + #12:00:03 AM#

Exit For

End If

Next HTMLStoreID

Set HTMLSaveStores = HTMLDoc.getElementsByTagName("button")

For Each HTMLSaveStore In HTMLSaveStores

If HTMLSaveStore.getAttribute("data-jum-action") = "saveHomeStore" Then

HTMLSaveStore.Click

Exit For

End If

Next HTMLSaveStore

'IE.Quit

End Sub

  从产品页面中提取数据(IE HTTP请求,使用cookie存储值)

  Sub GetJumboPriceIE()

Dim IE As New SHDocVw.InternetExplorer

Dim HTMLDoc As MSHTML.HTMLDocument

Dim JumInputs As MSHTML.IHTMLElementCollection

Dim JumInput As MSHTML.IHTMLElement

Dim JumPrice As MSHTML.IHTMLElement

Dim JumboPrice As Double

Dim Price_In_Cents_Tag As String

Dim SKU_tag As String, SKU_url As String

SKU_tag = "173140KST"

SKU_url = "https://www.jumbo.com/lu-bastogne-koeken-original-260g/173140KST/"

IE.Visible = False

IE.navigate SKU_url

Do While IE.readyState READYSTATE_COMPLETE

Loop

Set HTMLDoc = IE.document

IE.Quit

Set JumInputs = HTMLDoc.getElementsByTagName("input")

Price_In_Cents_Tag = "PriceInCents_" & SKU_tag

Set JumPrice = HTMLDoc.getElementById(Price_In_Cents_Tag)

JumboPrice = JumPrice.getAttribute("value") / 100

Debug.Print JumboPrice

End Sub

  上面的代码有效,但我希望使用以下 XML HTTP 请求代码(但使用正确的存储)。价格是 1,39。

  从产品页面提取数据(使用 XML HTTP 请求),但不使用 cookie 值

  Sub GetJumboPriceXML()

Dim XMLReq As New MSXML2.XMLHTTP60

Dim HTMLDoc As New MSHTML.HTMLDocument

Dim JumInputs As MSHTML.IHTMLElementCollection

Dim JumInput As MSHTML.IHTMLElement

Dim JumPrice As MSHTML.IHTMLElement

Dim JumboPrice As Double

Dim Price_In_Cents_Tag As String

Dim SKU_tag As String, SKU_url As String

SKU_tag = "173140KST"

SKU_url = "https://www.jumbo.com/lu-bastogne-koeken-original-260g/173140KST/"

XMLReq.Open "GET", SKU_url, False

XMLReq.send

If XMLReq.Status 200 Then

MsgBox "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText

Exit Sub

End If

HTMLDoc.body.innerHTML = XMLReq.responseText

Set JumInputs = HTMLDoc.getElementsByTagName("input")

Price_In_Cents_Tag = "PriceInCents_" & SKU_tag

Set JumPrice = HTMLDoc.getElementById(Price_In_Cents_Tag)

JumboPrice = JumPrice.getAttribute("value") / 100

Debug.Print JumboPrice

End Sub

  代码没有使用正确的商店并输出我不想要的价格(打印的价格是1.48)。

  综上所述:

  如果未选择商店(未设置 cookie),则以下 URL 现在的费用为 1.48 欧元。

  我希望 VB 脚本将商店设置为“Jumbo Oosterhout Nieuwe Bouwlingstraat”,然后在产品列表中抓取预定义的产品 URL 并提取价格(上面的 URL 是 €1,39)。

  然后将商店设置为另一个本地商店“Jumbo Brielle Thoelaverweg”并抓取相同的产品 URL 列表。上面的 URL 是 1.48 欧元。

  您可以通过单击页面右上角的位置图钉图标来选择其他商店。

  非常感谢您的帮助

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线