Im using this URL https://www.morningstar.com/stocks/xtks/1407/dividends
and the table with the upcoming dividends are displayed on my browser
I inspect the page and try to catch the content of the table
I tried this code :
Dim html As New htmlDocument
Dim HTTP As Object, Elem As Object, Quote As Object
Dim iTicker As Integer, nTicker As Integer, BBG_Ticker As String, DES As String, TYP As String, MKT_STATUS As String
Dim vHeader As Variant, vData As Variant, i As Integer, j As Integer, t As Integer, k As Integer, nFields As Integer, x As Integer
Dim n As Integer
Set HTTP = CreateObject("MSXML2.XMLHTTP")
ReDim vWebPxLast(0)
With HTTP
.Open "GET", URL, True
.Send
End With
While HTTP.ReadyState <> 4
DoEvents
Wend
html.Body.innerHTML = HTTP.responseText
Crash HTTP.responseText
If HTTP.Status = 200 Then
Set Elem = html.getElementsByClassName("dividends-recent")
but the Elem object is empty
I used selenium with chrome and it worked
Related
I'm trying to scrape the href of each model from this webpage: https://www.aprilia.com/en_EN/index.
The html showing the href data can be seen only after clicking on two buttons (the one on the top right corner and the one on the left called "Models"), one after the other.
Sub get_info()
Dim ie As Object
Dim address, str_chk As String
Dim my_data As Object
Dim oHTML_Element As IHTMLElement
Dim i As Long
address = "https://www.aprilia.com/en_EN/index"
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
For Each oHTML_Element In ie.document.getElementsByName("button")
If oHTML_Element.className = "header__menu-services__nav button button--icon" Then
oHTML_Element.Click
End If
Next
Application.Wait Now + #12:00:05 AM#
For Each oHTML_Element In ie.document.getElementsByName("Models")
oHTML_Element.Click
Next
Application.Wait Now + #12:00:05 AM#
'==>
Set my_data = html.getElementsByClassName("card-product card-product--family")
For Each elem In my_data
For i = 0 To elem.getElementsByTagName("a").Length - 1
str_chk = elem.getElementsByTagName("a")(i).href
ws.Cells(9 + j, 7).Value = str_chk
j = j + 1
Next i
Next elem
ie.Quit
Set ie = Nothing
End Sub
I got
Error '424' - Object Required
where I set my_data.
I guess that means that I'm not able to click on the two buttons and, as a consequence, html code is not available.
***************** Revised code:
Sub get_info22()
Dim address As String
Dim ie, ELE, nodes As Object
Dim i As Long
Dim t As Date
Const MAX_WAIT_SEC As Long = 10
address = "https://www.aprilia.com/en_EN/index"
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
'************** click on first button
t = Timer
Do
On Error Resume Next
Set ELE = ie.document.querySelector(".header__menu-services__nav")
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ELE Is Nothing
If Not ELE Is Nothing Then
ELE.Click
End If
While ie.Busy Or ie.readyState <> 4: DoEvents: Wend
'************** click on second button
Do
On Error Resume Next
Set ELE = ie.document.querySelector("li > button")
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ELE Is Nothing
If Not ELE Is Nothing Then
ELE.Click
End If
While ie.Busy Or ie.readyState <> 4: DoEvents: Wend
'************** get href for each model
Set nodes = ie.document.querySelectorAll(".card-product--family")
ReDim hrefs(nodes.Length - 1)
For i = 0 To nodes.Length - 1
hrefs(i) = nodes.Item(i).href
ActiveSheet.Cells(9 + i, 7).Value = hrefs(i)
Next
Try first to use more precise selectors. For the first button use:
ie.document.querySelector(".header__menu-services__nav").click
That targets the element by one of its classes. Then have a pause e.g.
While ie.Busy Or ie.ReadyState<>4:DoEvents:Wend
Or, use an explicit wait time or loop until next desired element is present.
Then target the next element with type selectors and child combinator as you want the first child button within a li element:
ie.document.querySelector("li > button").click
Then you need another wait.
Finally, you can use a single class from the target elements, with the links, and extract the href attributes and store in an array (for example)
Dim nodes As Object, hrefs(), i As Long
Set nodes = ie.Document.querySelectorAll(".card-product")
ReDim hrefs(nodes.Length - 1)
For i = 0 To nodes.Length - 1
hrefs(i) = nodes.Item(i).href
Next
EDIT:
Seems page uses ajax to retrieve the listings which makes this easier. I show to versions. The first where I grab just those links you describe after two button clicks; the second, where I grab model subtype links as well.
In both I mimic the request the page makes to get that info. In the first I then parse the returned json with a json parser and pull out the model links. With the second, I regex out all href info ie. all submodels.
Json library:
I use jsonconverter.bas. Download raw code from here and add to standard module called JsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime. Remove the top Attribute line from the copied code.
1)
Option Explicit
Public Sub ScrapeModelLinks1()
Dim data As Object, links() As Variant, s As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.aprilia.com/en_EN/aprilia/en/index?ajax=true", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
Set data = JsonConverter.ParseJson(s)("pageData")("main")("component-06")("items")
ReDim links(data.Count)
Dim item As Long, base As String
base = "https://www.aprilia.com"
For item = 1 To data.Count
links(item) = base & data(item)("href")
Next
Stop
End Sub
Public Sub ScrapeModelLinks2()
'grab all href which will include model subtypes
Dim s As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.aprilia.com/en_EN/aprilia/en/index?ajax=true", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
Dim re As Object, matches As Object, links() As Variant
Set re = CreateObject("VBScript.RegExp")
re.Pattern = """href"":""(.*?)"""
re.Global = True
Set matches = re.Execute(s)
ReDim links(matches.Count - 1)
Dim item As Long, base As String
base = "https://www.aprilia.com"
For item = 0 To matches.Count - 1
links(item) = base & matches(item).submatches(0)
Next
Stop
End Sub
Regex explanation:
Unfortunately my old account got deleted when changing website hosts. I looked throughout the site for a solution but only found people dealing with entire tables rather than just trying to obtain one value. I am having an issue grabbing Net Assets with innerText from the following webpage:
https://finance.yahoo.com/quote/UMBWX?p=UMBWX
Here is my attempt:
Sub FetchAssets()
Dim HTML As New HTMLDocument, elem As Object, URL As String
URL = https://finance.yahoo.com/quote/UMBWX?p=UMBWX
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
Application.Wait Now + 1 / (24 * 60 * 60# * 2)
.send
Application.Wait Now + 1 / (24 * 60 * 60# * 2)
HTML.body.innerHTML = .responseText
Set elem = HTML.querySelector("NET_ASSETS-value")
MsgBox elem.innerText
End With
End Sub
This attempt results in: Run-time error '91': Object variable or With block variable not set
I have tried the exact same code and swapped elem.innerText with elem.innerHTML and it results in the same error. The error happens on the MsgBox line of code, it sets elem fine.
Due to my reputation being under 10 I can not embed an image, but here is the line of code from the webpage:
<td class="Ta(end) Fw(600) Lh(14px)" data-test="NET_ASSETS-value" data-reactid="89"><span class="Trsdu(0.3s) " data-reactid="90">539.25M</span></td>
Try this
Sub FetchAssets()
Dim html As New HTMLDocument, elem As Object, sURL As String
sURL = "https://finance.yahoo.com/quote/UMBWX?p=UMBWX"
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sURL, False
.send
html.body.innerHTML = .responseText
Set elem = html.querySelector("td[data-test='NET_ASSETS-value']")
MsgBox elem.innerText
End With
End Sub
I need to get the page number in order to extract text from that specific page in a .PDF document. I am using Excel VBA function that makes use of the JSObject from the Acrobat Type Library 10.0
Here is the code snippet and the code hicks up on when I am trying to reference the pageNum property from Doc object. I am trying to avoid the AV Layer and use the PD Layer only, so my macro runs in the background only and doesn't invoke Acrobat Application.
Function getTextFromPDF_JS(ByVal strFilename As String) As String
Dim pdDoc As New AcroPDDoc
Dim pdfPage As Acrobat.AcroPDPage
Dim pdfBookmark As Acrobat.AcroPDBookmark
Dim jso As Object
Dim BookMarkRoot As Object
Dim vBookmark As Variant
Dim objSelection As AcroPDTextSelect
Dim objHighlight As AcroHiliteList
Dim currPage As Integer
Dim strText As String
Dim BM_flag As Boolean
Dim count As Integer
Dim word As Variant
strText = ""
If (pdDoc.Open(strFilename)) Then
Set jso = pdDoc.GetJSObject
Set BookMarkRoot = jso.BookMarkRoot
vBookmark = jso.BookMarkRoot.Children
'Add a function call to see if a particular bookmark exists within the .PDF
Set pdfBookmark = CreateObject("AcroExch.PDBookmark")
BM_flag = pdfBookmark.GetByTitle(pdDoc, "Title Page")
If (BM_flag) Then
For i = 0 To UBound(vBookmark)
If vBookmark(i).Name = "Title Page" Then
vBookmark(i).Execute
jso.pageNum
Set pdfPage = pdDoc.AcquirePage(pageNum)
Set objHighlight = New AcroHiliteList
objHighlight.Add 0, 10000 ' Adjust this up if it's not getting all the text on the page
Set objSelection = pdfPage.CreatePageHilite(objHighlight)
If Not objSelection Is Nothing Then
For tCount = 0 To objSelection.GetNumText - 1
strText = strText & objSelection.GetText(tCount)
Next tCount
End If
Exit For
End If
pdDoc.Close
End If
End If
getTextFromPDF_JS = strText
End Function
jso.pageNum = 0; set a page number
pageNo = jso.pageNum; get a page number
edit: 3.3.19
Mmmh, it seems you have to work with AVDoc in order to get the current actual page via jso.pageNum . Also if you work with AVdoc the Acobat window stay hidden in the background. Example:
strFilename = "d:\Test2.pdf"
set avDoc = CreateObject("AcroExch.AVDoc")
If (avDoc.Open(strFilename,"")) Then
Set pdDoc = avDoc.getPDDoc()
Set jso = pdDoc.GetJSObject
pageNo = jso.pageNum
msgbox(pageNo)
end if
I'm trying to fix an old macro working with a function GetData that retrieve the whole page in the response string. (Can't give the real URL here)
Function getDataFor(tva As String) As String
' Early binding, set Tools - Reference - Microsoft XML,v6.0
Dim oDOM As New MSXML2.DOMDocument30
Dim oNodeList As IXMLDOMNodeList
Dim oReq As MSXML2.XMLHTTP
Set oReq = New MSXML2.XMLHTTP
oReq.Open "GET", "https://xxxxxxx.asp?name=" & tva, False
oReq.send
While oReq.readyState <> 4
Wend
Dim response As String
Dim name As String
Dim hasLoginBelspo As String
response = oReq.responseText
Dim token1 As Long
Dim token2 As Long
token1 = InStr(response, "company.name=")
token2 = InStr(response, "company.address=")
On Error GoTo Proceed
name = Mid(response, token1 + 14, token2 - (token1 + 16))
On Error GoTo 0
token1 = InStr(response, "parent.SetLoginInfo")
hasLoginBelspo = Replace(Mid(response, token1 + 28, 5), ",", "")
GoTo Proceed2
Proceed:
name = ""
hasLoginBelspo = ""
Proceed2:
getDataFor = name & "," & hasLoginBelspo
End Function
The concerned page is loaded and the needed data is written between javascript tags:
Here is a copy of what I can find in the source page when I paste the URL in a browser.
<script language="javascript">
var company=new Object();company.number="xxx";company.name="xxx";company.address="xxx";company.zip="xxx";company.city="xxx";parent.SetLoginInfo(company,false,true,"xxx","");
</script>
All the data replaced by xxx is what I need. In the browser, I get it, but with the macro, sometimes those xxx data are empty. It should be because the library used to load the javascript may not be the good one (MSXML2).
Do you have an idea of what I need the change, or what kind of library should I use to be sure the javascript data will be catch?
Thanks a lot
I have a webmethod which will return an object with strings(i.e)str is object and str(0) would be string. str(0)'s length would be 27601. likewise all other array members having the same length.(i.e) str(1)'s length is 27601 also str(2),str(3)......
my webmethod return the exact object. but on the client side javascript doesn't accept the value it immediately fire the error method.
sample code is,
PageMethods.paging("","",pospara,"position_select","stored Procedure",function sucMulti(result){
alert(result);
});
web method is
<WebMethod()> _
<ScriptMethod()> _
Public Shared Function paging(ByVal query As String, ByVal tbl As String, ByVal para() As Object, ByVal spname As String, ByVal cmdtype As String) As Object()
Dim dsrt As New DataSet, dbacc As New dataaccess
If cmdtype = "stored Procedure" Then
dsrt = dbacc.retds1(spname, conn, para)
Else
dsrt = dbacc.retds(query, tbl, conn)
End If
'here dsrt will have 19 rows. so i split them by 8.
Dim no As Integer
Dim rows As Integer = 8
Dim r As Integer
no = Floor(dsrt.Tables(0).Rows.Count / rows)
If dsrt.Tables(0).Rows.Count < rows Then
r = 0
Else
r = dsrt.Tables(0).Rows.Count Mod rows
End If
Dim start As Integer = 0
Dim last As Integer = 7
Dim str(0) As Object
Dim dv As New DataView
dv = dsrt.Tables(0).DefaultView
If r <> 0 Then
no += 1
End If
If no >= 1 Then
ReDim str(no - 1)
For i As Integer = 1 To no
Dim ds As New DataSet
Dim dt As New DataTable
dt = dsrt.Tables(0).Clone
dt.Rows.Clear()
For j As Integer = start To last
dt.ImportRow(dsrt.Tables(0).Rows(j))
Next
start = (rows * i)
If r <> 0 And i = no - 1 Then
last = last + r
Else
last = start + 7
End If
ds.Tables.Add(dt)
str(i - 1) = ds.GetXml
Next
Else
str(0) = dsrt.GetXml
End If
Dim len As Integer = str(0).ToString.Length
Return str
End Function
everything goes fine . str will contain 4 rows, each row would have a string with 27601 length. but javascript doesn't alert the result. why?
I found a solution that to increase the jason max length by the following.
<system.web.extensions>
<scripting>
<webServices>
<jsonSerialization maxJsonLength="5000"/>
</webServices>
</scripting>
I put the above code inside the tag of web Config file.It works but it gives tooltip error message on the first tag. any suggestions to correct it?
the error message is,
"the element 'configuration' in namespace 'http://schemas.microsoft.com/.NetConfiguration/v2.0' has invalid child element 'system.we.extensions' in namespace 'http://schemas.microsoft.com/.NetConfiguration/v2.0'. List of possible elements expected: 'system.net' in namespace 'http://schemas.microsoft.com/.NetConfiguration/v2.0'"