vba xmlhttp 抓取网页(MFaizanFarooqExcel工作表中的地址字段变为空白结果(r,4) )

优采云 发布时间: 2022-04-08 05:17

  vba xmlhttp 抓取网页(MFaizanFarooqExcel工作表中的地址字段变为空白结果(r,4)

)

  M费赞法鲁克

  Excel 工作表中的地址字段变为空白

  结果(r, 4) = comment("街道地址")

  在网页抓取期间;我正在从 网站 导入字段并相应地导入数据,请指导我。

  表 A1 =

  Option Explicit

Public Sub GetRestuarantInfo()

Dim s As String, re As Object, p As String, page As Long, r As String, json As Object 'Variable Definations

Const START_PAGE As Long = 2

Const END_PAGE As Long = 4

Const RESULTS_PER_PAGE As Long = 40

p = "\[{""@context"".*?\]"

Set re = CreateObject("VBScript.RegExp")

Application.ScreenUpdating = False

With CreateObject("MSXML2.XMLHTTP")

For page = START_PAGE To END_PAGE ' Run for loop for defined Page numbers

.Open "GET", Sheet1.Range("A1") & page, False

.send

If .Status = 200 Then

s = .responseText

r = GetValue(re, s, p)

If r "Not Found" Then

Set json = JsonConverter.ParseJson(r)

WriteOutResults page, RESULTS_PER_PAGE, json

End If

End If

Next

End With

Application.ScreenUpdating = True

End Sub

Public Sub WriteOutResults(ByVal page As Long, ByVal RESULTS_PER_PAGE As Long, ByVal json As Object)

Dim sheetName As String, results(), r As Long, headers(), ws As Worksheet

ReDim results(1 To RESULTS_PER_PAGE, 1 To 4)

sheetName = "page" & page ' This module is just to write results pagewise which is not needed in your case

headers = Array("Name", "Website", "Tel", "Address") 'Defination of headers

If Not WorksheetExists(sheetName) Then ' Creation of sheets

Set ws = ThisWorkbook.Worksheets.Add

ws.Name = sheetName

Else

ThisWorkbook.Worksheets(sheetName).ClearContents

End If

With ws

Dim review As Object

For Each review In json ' Bringing results from Json to excel sheet

r = r + 1

results(r, 1) = review("name") 'write results of name field

results(r, 2) = review("url") 'write results of url field

results(r, 3) = review("telephone") 'write results of telephone field

results(r, 4) = review("streetAddress") 'write results of telephone field

Next

.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results

End With

End Sub

Public Function GetValue(ByVal re As Object, inputString As String, ByVal pattern As String) As String

With re

.Global = True

.MultiLine = True

.IgnoreCase = False 'Use case-insensitive matching.

.pattern = pattern 'The pattern (written in Regex) which you want to match against (e.g. “(.*)”)

'Test (string) – returns True if the pattern can be matched agaist the provided string

'Web Link: https://analystcave.com/excel-regex-tutorial/

If .Test(inputString) Then

GetValue = .Execute(inputString)(0)

Else

GetValue = "Not found"

End If

End With

End Function

Public Function WorksheetExists(ByVal sName As String) As Boolean

WorksheetExists = Evaluate("ISREF('" & sName & "'!A1)")

End Function

  我一直在尝试获取以下代码的地址范围,我是 VBA 新手,因此将不胜感激。

  哈尔

  地址的json访问路径不同

  review("address") 给出字典

  

  您需要按键访问各个行,或者像我一样,连接字典的所有项目以为完整地址创建一个空格分隔的字符串。

  VBA:

  Option Explicit

Public Sub GetRestuarantInfo()

Dim s As String, re As Object, p As String, page As Long, r As String, json As Object 'Variable Definations

Const START_PAGE As Long = 2

Const END_PAGE As Long = 4

Const RESULTS_PER_PAGE As Long = 40

p = "\[{""@context"".*?\]"

Set re = CreateObject("VBScript.RegExp")

Application.ScreenUpdating = False

With CreateObject("MSXML2.XMLHTTP")

For page = START_PAGE To END_PAGE ' Run for loop for defined Page numbers

.Open "GET", "https://www.yellowpages.com/atlanta-ga/restaurants?page=" & page, False

.send

If .Status = 200 Then

s = .responseText

r = GetValue(re, s, p)

If r "Not Found" Then

Set json = JsonConverter.ParseJson(r)

WriteOutResults page, RESULTS_PER_PAGE, json

End If

End If

Next

End With

Application.ScreenUpdating = True

End Sub

Public Sub WriteOutResults(ByVal page As Long, ByVal RESULTS_PER_PAGE As Long, ByVal json As Object)

Dim sheetName As String, results(), r As Long, headers(), ws As Worksheet

ReDim results(1 To RESULTS_PER_PAGE, 1 To 4)

sheetName = "page" & page ' This module is just to write results pagewise which is not needed in your case

headers = Array("Name", "Website", "Tel", "Address") 'Defination of headers

If Not WorksheetExists(sheetName) Then ' Creation of sheets

Set ws = ThisWorkbook.Worksheets.Add

ws.Name = sheetName

Else

Set ws = ThisWorkbook.Worksheets(sheetName)

ws.Cells.ClearContents

End If

With ws

Dim review As Object

For Each review In json ' Bringing results from Json to excel sheet

r = r + 1

results(r, 1) = review("name") 'write results of name field

results(r, 2) = review("url") 'write results of url field

results(r, 3) = review("telephone") 'write results of telephone field

results(r, 4) = Replace$(Join$(review("address").items, " "), "PostalAddress ", vbNullString) 'write results of telephone field

Next

.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results

End With

End Sub

Public Function GetValue(ByVal re As Object, inputString As String, ByVal pattern As String) As String

With re

.Global = True

.MultiLine = True

.IgnoreCase = False 'Use case-insensitive matching.

.pattern = pattern 'The pattern (written in Regex) which you want to match against (e.g. “(.*)”)

'Test (string) – returns True if the pattern can be matched agaist the provided string

'Web Link: https://analystcave.com/excel-regex-tutorial/

If .test(inputString) Then

GetValue = .Execute(inputString)(0)

Else

GetValue = "Not found"

End If

End With

End Function

Public Function WorksheetExists(ByVal sName As String) As Boolean

WorksheetExists = Evaluate("ISREF('" & sName & "'!A1)")

End Function

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线