ers,
I am trying to create a vba based html web scraper to gather some data. The data I am trying to scrape is from javascript that looks similar to this:
<script src="https://d1fkb6ohkxde1z.cloudfront.net/assets/common-bundle- a24f15b690f23a668028e412a6d5243a1b0eeac497627def104363c0941da290.js"></script>
<script src="https://d1fkb6ohkxde1z.cloudfront.net/assets/private-57441f796e1722a6986c300e765cb19ea439a663b341a68647ebdb60252a0682.js"></script>
<script>
$('#onboardingModal').modal('show')
var silo = $profarmer.currentSilo;
silo.service_url = 'https://prices.profarmergrain.com.au/';
silo.clear_service_url = 'https://www.cleargrain.com.au';
silo.bhc_site_name = 'Port Adelaide Zone';
silo.bhc_site_type = 'PortZone';
silo.port_name = 'Port Adelaide';
silo.bhc_site_type = 'PortZone';
silo.bulk_handler_code = 'Zone';
silo.bulk_handler_name = 'Port Prices';
silo.grain_type = 'Wheat';
silo.grain_season = '2015';
silo.initBinGradeSelector(true, 'APW1', [{"code":"APW1","description":"Australian Premium White Wheat No 1","seasons":["2015","2016","2014"]},{"code":"H1","description":"Hard Wheat No 1","seasons":["2015","2016","2014"]},{"code":"H2","description":"Hard Wheat No 2","seasons":["2015","2016","2014"]},{"code":"HPS1","description":"Wheat - High Protein Screenings No1","seasons":["2015"]},{"code":"AUH2","description":"Australian Hard Varieties Wheat (Utility Grade)","seasons":["2015","2014"]},{"code":"APW1 FMG","description":"APW1 Floating Spread Multi-Grade","seasons":["2015","2016"]},{"code":"APW1 MG","description":"Multigrade APW1","seasons":["2015","2016","2017"]},{"code":"ASW1","description":"Australian Standard White Varieties Wheat","seasons":["2016","2015","2014"]},{"code":"AGP1","description":"Australian General Purpose No.1","seasons":["2015","2016","2014"]},{"code":"AUW1","description":"Utility Wheat","seasons":["2015","2014"]},{"code":"FED1","description":"Feed Wheat","seasons":["2015","2016"]},{"code":"SFW1","description":"Stock Feed Winter Wheat","seasons":["2015"]},{"code":"DR1","description":"Durum Wheat No.1 Grade - Protein 13.2%","seasons":["2015"]},{"code":"DR1 MG","description":"Multigrade Durum No.1","seasons":["2016"]},{"code":"DR2","description":"Durum Wheat No.2 Grade - Protein Target 12.0%","seasons":["2015"]},{"code":"DR3","description":"Durum Wheat No.3 Protein No Min","seasons":["2015"]},{"code":"SFE1","description":"Soft Wheat No.1 Grade","seasons":["2015"]}]);
silo.pricing_label = 'Port Zone Bid';
silo.port_equivalent = false;
silo.tradable = false;
silo.freight_rate = 0.0;
React.mount({
"price-chart": $profarmer.components.PriceChart,
"price-search": $profarmer.components.PriceSearch,
"merchant-prices": $profarmer.components.MerchantPrices,
"clear-grain-prices": $profarmer.components.ClearGrainPrices,
"market-commentary": $profarmer.components.MarketCommentary,
}, {
props: {
snapshot: {"bhc_site_name":"Port Adelaide Zone","port_name":"Port Adelaide","grain_type_name":"Wheat","grain_season_start_year":2015,"bulk_handler_code":"Zone","bulk_handler_name":"Port Prices","market_prices":{"APW1":{"2015":{"port_price":"266.0","price":"266.0","price_high_movement":"-4.0","prices_last_updated_at":"2016-05-31T06:10:05.000Z"},"2016":{"port_price":"255.0","price":"255.0","price_high_movement":"-6.0","prices_last_
And continues with a few hundred more prices in a similar format. Ideally I would scrap everything from the line "props:" onwards. I have a simple code to rearrange the text once in excel, but my vba code to scrape the html/java is having some troubles.
Sub ie_open()
Dim wb As Workbook
Dim ws As Worksheet
Dim TxtRng As Range
Dim ie As Object
Set ie = CreateObject("INTERNETEXPLORER.APPLICATION")
ie.NAVIGATE "https://www.profarmergrain.com.au/prices/port-adelaide-zone/Zone/wheat"
ie.Visible = True
While ie.ReadyState <> 4
DoEvents
Wend
Dim V As Variant
Set V = ie.document.getelementbyid("src") ' - need better code here
Set wb = ActiveWorkbook
Set ws = wb.Sheets("Sheet7")
Set TxtRng = ws.Range("A1")
TxtRng = V.innertext
End Sub
I am not sure if there is a better way to scrape an element. Instead of by id or tag, is there a way to scrape by either line number or by or by a string number?
Cheers!
Related
I am a teacher designing a tool to take attendance from Zoom usage data (the days we live in!)
Zoom provides reports that you can copy and paste and stay formatted as a table. Like this:
Luke Name Name
lr#email.com
09/10/2020 08:22:03 AM 09/10/2020 08:33:36 AM 12
Barbara Name Name
bar#email.com
09/10/2020 08:22:12 AM 09/10/2020 08:31:57 AM 10
Joaquin Name Name Name
joa#email.com
09/10/2020 08:22:12 AM 09/10/2020 08:31:59 AM 10
Rafaella Name Name
raf#email.com
09/10/2020 08:22:18 AM 09/10/2020 08:31:55 AM 10
Andrea Name Name
and#email.com
09/10/2020 08:22:19 AM 09/10/2020 08:32:14 AM 10
Sara Name Name Name
sar#email.com
09/10/2020 08:22:20 AM 09/10/2020 08:31:56 AM 10
If this is posted into a text editor or a Google Sheet it does format into rows and columns correctly.
My code right now takes that long string of info and cuts out anything that is not an email and then checks the email against a class list in order to see who was absent. What I now want to do is to make it check for late students. Ideally the user would input the string and the time class started and it would display absent students and separately late students.
I think the part I can't figure out is how the user can copy paste the info into a prompt or into an html textarea and then correctly write it to rows and columns so I can manipulate the data.
Code:
//this function copies the info to a spreadsheet and sets formulas in order to find just emails and check them against a student list
function checkAttendance() {
var ss = SpreadsheetApp
var sheet = ss.getActiveSheet();
var response = SpreadsheetApp.getUi().prompt("Student Attendance", "Paste emails of students who attended.", SpreadsheetApp.getUi().ButtonSet.OK_CANCEL);
if(response.getSelectedButton() == SpreadsheetApp.getUi().Button.CANCEL) {return}
var responseText = response.getResponseText();
var splitText = responseText.split(" ");
var atHandle = SpreadsheetApp.getActive().getSheetByName("Attendance Handling");
atHandle.getRange(2, 1, 200, 100).clear();
atHandle.getRange(2, 3).setFormula("=FILTER(B2:B, ISNA(MATCH(B2:B, A2:A, 0)))");
var colNum = getColNum2("Student Email");
var rows = sheet.getLastRow();
sheet.getRange(2, colNum, rows-1, 1).copyTo(atHandle.getRange(2, 2, rows, 1));
atHandle.getRange(2, 1).setFormula('=TRANSPOSE(SPLIT(E2, " "))');
atHandle.getRange(2, 4).setFormula('=FILTER(A2:A, ISNA(MATCH(A2:A, B2:B, 0)))')
atHandle.getRange(2, 5).setValue(responseText);
atHandle.getRange(2, 6).setFormula('=if(AND(IFERROR(SEARCH("#", D2), "") <> "", IFERROR(SEARCH("(", D2), "") = ""), D2, "")');
SpreadsheetApp.getActive().getSheetByName("Attendance Handling").getRange(2, 6).copyTo(SpreadsheetApp.getActive().getSheetByName("Attendance Handling").getRange(2, 6, 200, 1));
atHandle.getRange(2, 7).setFormula('=FILTER(F2:F, F2:F <> "")');
showAttendance()
}
function showAttendance() {
var atHandle = SpreadsheetApp.getActive().getSheetByName("Attendance Handling");
var rows1 = getLastRow(3);
var rows2 = getLastRow(7)
var absent = atHandle.getRange(2, 3, rows1-1, 1).getValues().join("\n");
var present = atHandle.getRange(2, 7, rows2-1, 1).getValues().join("\n");
var t = HtmlService.createTemplateFromFile('attendance'); // Modified
t.Absent = absent
t.Present = present
html = t.evaluate().setWidth(600).setHeight(345); // Added
SpreadsheetApp.getUi() // Or DocumentApp or FormApp.
.showModalDialog(html, 'Class Attendance');
}
<!DOCTYPE html>
<html>
<head>
<base target="_top">
//style removed for space
</head>
<body>
<form>
<div>
Absent (in student list but not in pasted values):
<br>
<textarea id="tofield" name="tofield" rows="9" cols="60"><?!= Absent ?></textarea>
<br>
Guests (present in pasted values but not in student list):
<br>
<textarea id="tofield" name="tofield" rows="9" cols="60"><?!= Present ?></textarea>
<br>
<p>
</p>
</form>
Note: this information is not saved. Please use or copy and paste this information as needed beforing closing.
<p>
<button id="btn" style="margin-left: 8px; font-size: 15px; padding: 5px" onclick='google.script.host.close();' class="sbtn btn btn-secondary btn-c">Close</button>
</p>
</body>
</html>
You need to use something like google.script.run.doSomething(data) to pass values from the client-side to the server side code, where doSomething is a server side function and data is a a variable holding the value / object that you want to pass to the client side.
Note: There are some limitations regarding what values/object types can be passed to the server-side, i.e. Date objects can't be passed but you can pase the a string o the correspoining milliseconds.
The following code converts the sample data input into a bidimensional array that can be passed to sheet by using setValues(values).
var data = `Luke Name Name
lr#email.com
09/10/2020 08:22:03 AM 09/10/2020 08:33:36 AM 12
Barbara Name Name
bar#email.com
09/10/2020 08:22:12 AM 09/10/2020 08:31:57 AM 10
Joaquin Name Name Name
joa#email.com
09/10/2020 08:22:12 AM 09/10/2020 08:31:59 AM 10
Rafaella Name Name
raf#email.com
09/10/2020 08:22:18 AM 09/10/2020 08:31:55 AM 10
Andrea Name Name
and#email.com
09/10/2020 08:22:19 AM 09/10/2020 08:32:14 AM 10
Sara Name Name Name
sar#email.com
09/10/2020 08:22:20 AM 09/10/2020 08:31:56 AM 10`;
const arr = data.split('\n');
const matrix = [];
for(let i = 0; i < arr.length;i = i + 3){
var row = [arr[i],arr[i+1],
new Date(arr[i+2].slice(0,22)),
new Date(arr[i+2].slice(24,46)),
arr[i+2].slice(48)]
matrix.push(row);
}
console.info(matrix);
I suggest you to use the above on the server side, as mentioned previously, because Date objects can't be passed from client-side to sever-side, just be sure that the Apps Script, the Spreadsheet and Zoom are using the same timezone, otherwise you will have improve it to handling the timezones differences appropiately.
Resources
Client-to-Server communication | HTML Service | Google Apps Script
You can construct a new Date object from the parsed out times and compare them. If you can get the string representaiotn of the dat you can pass it to the construcotr of Date diruectrly
new Date('December 17, 1995 03:24:00');
Once you have the dates you can use operators to compare them, and determine if the student was late or not.
function isLate(classStart) {
var dateStr = /* get the cell value */
var studentArrvial = Date.parse(dateStr)
return studentArrival - dateStr > 0
}
On this page: https://www.check24.de/dsl/vergleich/
I tried to implement a crawler for bandwidth checks of different providers via httpUnit, version 2.31.
If you manually fill in the Address field on the page, you will see a pop-up that shows the progress of the bandwidth check, then you have the available Internet bandwidths for the requested address on the same page.
The requested address is in labels (the place where first the text input fields were.
In my attempt to write a crawler using htmlUnit, although I returned the same page after (a longer waiting time) the input field was not replaced by some label in a fieldset (id="tko-vcheck-done-wrapper") that show the address.
Here is my Code:
public Map<String, Integer> checkProviderBandWidthsByAddress(String zip, String city, String street, String hno){
WebClient webClient = null;
try{
webClient = getWebCient();
HtmlPage page = webClient.getPage("https://www.check24.de/dsl/vergleich/");
HtmlTextInput inputZipCity = (HtmlTextInput) page.getElementById("c24api_ac_widget_zipcity");
HtmlHiddenInput inputZip = (HtmlHiddenInput) page.getElementById("c24api_ac_widget_zipcode");
HtmlHiddenInput inputCity = (HtmlHiddenInput) page.getElementById("c24api_ac_widget_city");
HtmlTextInput inputStreet = (HtmlTextInput) page.getElementById("c24api_ac_widget_street");
HtmlTextInput inputStreetNumber = (HtmlTextInput) page.getElementById("c24api_ac_widget_streetnumber");
HtmlButton buttonCheck = (HtmlButton) page.getElementById("tko-filter-vcheck-submit");
inputZipCity.setValueAttribute(zip + " " + city);
inputZipCity.fireEvent(Event.TYPE_INPUT);
page.getWebClient().waitForBackgroundJavaScriptStartingBefore(1000);
inputZip.setValueAttribute(zip);
inputCity.setValueAttribute(city);
inputStreet.setValueAttribute(street);
inputStreetNumber.setValueAttribute(hno);
page = buttonCheck.click();
page.getWebClient().waitForBackgroundJavaScriptStartingBefore(30000);
DomElement done = page.getElementById("tko-vcheck-done-wrapper"); // <-- Probleme here: NULL
List<DomElement> providers = page.getByXPath("//div[contains(#class, 'tko-result-row tko-clearfix')]");
Map<String, Integer> bandWidths = findMaxSpeed(providers); // works fine to read the download BandWith for general tarif - but this dont contains the address-specific bandwith
return bandWidths;
}catch(Exception e){
e.printStackTrace();
return Collections.emptyMap();
}finally {
webClient.close();
}
}
public static WebClient getWebCient(){
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_52); // also tried with Other
webClient.setRefreshHandler(new WaitingRefreshHandler());
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.setCssErrorHandler(new SilentCssErrorHandler());
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getOptions().setUseInsecureSSL(true);
webClient.getOptions().setRedirectEnabled(true);
webClient.getCookieManager().setCookiesEnabled(true);
webClient.getOptions().setPopupBlockerEnabled(false);
return webClient;
}
If someone has an idea to solve the problem, I would be very happy about it
Pages like this horrible monster are a challenge for HtmlUnit.
But if you are a bit patient, then it will work.
(I'm using HtmlUnit release 2.32)
Have added some comments to the sample code; hope that helps.
And please take the code as proof of concept, there was not enough time for writing good code.
public static void main(String[] args) throws Exception {
String url = "https://www.check24.de/dsl/vergleich/";
try (final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_60)) {
HtmlPage page = webClient.getPage(url);
// this page has starts a lot of javascript
// we have to wait until this is finished to get a page
// that can respond to our typing
wait(webClient, 60);
HtmlTextInput inputZipCity = (HtmlTextInput) page.getElementById("c24api_ac_widget_zipcity");
inputZipCity.type("50126");
wait(webClient, 30);
// System.out.println(page.getElementById("tko-result-filter-form-acsuggest").asXml());
HtmlTextInput inputStreet = (HtmlTextInput) page.getElementById("c24api_ac_widget_street");
HtmlTextInput inputStreetNumber = (HtmlTextInput) page.getElementById("c24api_ac_widget_streetnumber");
inputStreet.type("Hauptstr.");
wait(webClient, 10);
inputStreetNumber.type("10");
wait(webClient, 10);
HtmlButton buttonCheck = (HtmlButton) page.getElementById("tko-filter-vcheck-submit");
buttonCheck.click();
wait(webClient, 4 * 60);
HtmlPage refreshedPage = ((HtmlPage) page.getEnclosingWindow().getEnclosedPage());
// System.out.println("----------------");
// System.out.println(refreshedPage.asText());
System.out.println(refreshedPage.getElementById("tko-result-sorting-text").getTextContent());
}
}
private static void wait(WebClient webClient, int seconds) {
long timeLimit = System.currentTimeMillis() + seconds * 1000;
int scriptCount = webClient.waitForBackgroundJavaScript(1000);
while (scriptCount > 1 && timeLimit > System.currentTimeMillis()) {
scriptCount = webClient.waitForBackgroundJavaScript(1000);
}
// seems like there is always one job in the queue (maybe some kind of heartbeat)
if (scriptCount > 1) {
System.out.println("Still some js is running " + scriptCount);
}
}
At least this produces something like
68 Tarife verfügbar von 12,91 € bis 107,47 € (Durchschnitt pro Monat)
Same text is shown on the web site when running with a real browser.
I am attempting to automate a certain process for my business by clicking on certain links on a webpage, inputting business data into a search box and compare it with existing data. I am however, unsuccessful at clicking a javascript based link which also the part of the table using VBA. The website's relevant HTML is as below,
<tr class="odd"> <td> <a onclick="viewCompanyDetails('B214273', '1529481460070');" href="javascript:;">Alinda Infrastructure Fund III (Euro) GP SARL</a></td> <td><a onclick="viewCompanyDetails('B214273', '1529481460070');" href="javascript:;">B214273</a></td> </tr>
Here is my poor attempt at doing the project,
Private Sub CommandButton1_Click()
Set IE = CreateObject("InternetExplorer.Application")
my_url = "https://www.lbr.lu/mjrcs/jsp/DisplayConsultDocumentsActionNotSecured.action?FROM_MENU=true&time=1528967707649¤tMenuLabel=menu.item.companyconsultation"
consoletext = consoletext & "Connection established to Luxembourg Business Registers on www.rcsl.lu via Internet Explorer..." & vbNewLine & vbNewLine
txtConsole.Value = consoletext
consoletext = consoletext & "Looking up Registre De Commerce et des Societes." & vbNewLine & vbNewLine
txtConsole.Value = consoletext
With IE
.Visible = True
.navigate my_url
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
End With
For Each objlink In IE.document.getElementsByTagName("href")
If objlink.href = "/mjrcs/jsp/DisplayConsultDocumentsActionNotSecured.action?FROM_MENU=true&time=1528969484260¤tMenuLabel=menu.item.companyconsultation" Then
objlink.Click
Exit For
End If
Next objlink
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
consoletext = consoletext & "Looking up " & Sheets("Results").Range("N1").Value & " in the registry." & vbNewLine & vbNewLine
txtConsole.Value = consoletext
Dim TradeName As String
TradeName = ThisWorkbook.Sheets("Results").Range("Y1").Value
IE.document.getElementById("companyName").Value = TradeName
Set objSubmit = IE.document.getElementsByTagName("input")
For Each btn In objSubmit
If btn.Value Like "Search" Then
btn.Click
End If
Next
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
For Each entitylink In IE.document.getElementsByClassName("tr")
If entitylink.getElementsByTagName = "a" And entitylink.textcontent = Sheets("Results").Range("N1").Value And entitylink.href = "javascript:;" Then
entitylink.Click
Exit For
End If
Next entitylink
txtXMLpath.Value = ""
End Sub
This is what the webpage content looks like,
enter image description here
I have hidden the name of the entity to protect the identity of the client. I need to click the result on the first row per the image.
I will be much obliged if you could provide any help. Again, I'm a beginner in VBA and have no idea how to proceed here. Please let me know should you require further clarification
You can use .querySelector to get the element you want which is the first with
.document.querySelector("a[onclick='viewCompanyDetails('B214273', '1529481460070');']")
Then you may need to do either:
.document.querySelector("a[onclick='viewCompanyDetails('B214273', '1529481460070');']").Click
or
.document.querySelector("a[onclick='viewCompanyDetails('B214273', '1529481460070');']").fireEvent 'onclick'
CSS selector:
The selector returns 2 items from your HTML but .querySelector will only return the first of these when applying the selector to the HTML. The first is the required "SARL".
More info on .fireEvent method:
fireEvent method: Fires a specified event on the object.
Syntax: object.fireEvent(bstrEventName, pvarEventObject,
pfCancelled)
It has a boolean return value which if TRUE tells you the event was successfully fired.
.querySelector method:
The Document method querySelector() returns the first Element
within the document that matches the specified selector, or group of
selectors. If no matches are found, null is returned.
Syntax: element = document.querySelector(selectors); <== note the ";"
is not used in VBA
Edit:
If .querySelector method is not supported you could try looping the a tags and clicking when a particular string is found e.g.
Dim c As Object, n As Object
Set c = ie.document.getElementsByTagName("a")
For Each n In c
If InStr(1, n.innerText, "Alinda Infrastructure Fund III (Euro) GP SARL") > 0 Then
n.Click
Exit For
End If
Next n
Without more HTML to target the specific a tags of interest you can do a general write out of all a tags with:
Dim c As Object, n As Object, counter As Long
Set c = ie.document.getElementsByTagName("a")
With ActiveSheet
For Each n In c
counter = counter + 1
.Cells(counter, 1) = n.innerText
Next n
End With
More HTML is needed to target this better; perhaps there is a className or Id to identify the element housing these a tags?
For your link to write table to sheet:
Option Explicit
Public Sub GetInfo()
Dim ie As Object
Application.ScreenUpdating = False
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.navigate "https://www.lbr.lu/mjrcs"
While .Busy Or .readyState < 4: DoEvents: Wend '<== Loop until loaded
Dim links As Object, link As Object
Set links = .document.getElementsByTagName("a")
For Each link In links
If link.innerText = "View the person's file" Then
link.Click
Exit For
End If
Next link
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("companyName").innerText = "Alinda"
.document.getElementsByClassName("button")(0).Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim hTable As HTMLTable
Set hTable = .document.getElementsByClassName("commonTable")(0)
Dim n As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, hBody As Object
r = 1
With ActiveSheet
Set hBody = hTable.getElementsByTagName("tbody")
For Each n In hBody 'HTMLTableSection
Set tRow = n.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell 'DispHTMLElementCollection
.Cells(r, c).Value = td.innerText 'HTMLTableCell
c = c + 1
Next td
r = r + 1
Next tr
Next n
End With
.Quit '<== Remember to quit application
End With
Application.ScreenUpdating = True
End Sub
I'm trying to automate my website by setting predetermined monthly featured videos.
I have JavaScript files already saved w/ the annual data for that particular year - e.g. choose_2017_video.js as well as 2018 & 2019 files. Each image URL text and description text I set in arrays but I can't seem to get them to display. Each array element corresponds to a month [0-11].
The getMonth() method will be the way of retrieving the data.
Somehow, I need to import the song info. into the HTML roughly like this:
<h2 align="center">Video of the month: javascript:song_info[mnth];</h2>
I also need to be able to import the corresponding image path which is saved in a parallel array (of filenames).
var song_info[12], img_URL[12], mnth = today().getMonth();
song_info[7] = "Newsboys - God's Not Dead"; // example data
img_URL[7] = "Newsboys-Gods_Not_Dead_video.JPG";
This site won't let me correctly describe how I'll display the img code using the img_URL element.
Can someone give me examples of how I can import the song information into the h2 code example and img src code?
This might be done a lot easier in an ASP script since I'm more familiar w/ BASIC. JS wasn't even conceived until after I had entered the workforce after a couple yrs. in college.
My intent is to declare new arrays and a pointer variable. The pointer is supposed to determine the month from the current date. One array holds the description of the song while the other holds the filename of the screenshot to be used to launch the YouTube URL in a different browser tab. If I wanted to embed the video, I would just use HTML5 code. It is called by a script src="JSfilename" from an HTML file.
Here is one of the JS files:
var song_info[12], img_URL[12], mnth = today().getMonth();
song_info[0] = "Sidewalk Prophets - Help Me Find It";
song_info[1] = "TobyMac with Kirk Franklin & Mandisa - Lose Your Soul";
song_info[2] = "MercyMe - Dear Younger Me";
song_info[3] = "Kari Jobe - I Am Not Alone";
song_info[4] = "Danny Gokey - Tell Your Heart to Beat Again";
song_info[5] = "Hawk Nelson - Drops In the Ocean";
song_info[6] = "Plumb - Exhale";
song_info[7] = "Newsboys - God's Not Dead";
song_info[8] = "Francesca Battistelli - Holy Spirit";
song_info[9] = "Brandon Heath - Give Me Your Eyes";
song_info[10] = "Matthew West - Strong Enough";
song_info[11] = "Jordan Feliz - The River";
img_URL[0] = "Sidewalk_Prophets-Help_Me_Find_It_video.png";
img_URL[1] = "TobyMac-LoseMySoul_video.png";
img_URL[2] = "MercyMe-DearYoungerMe_video.png";
img_URL[3] = "KariJobe-IAmNotAlone_video.png";
img_URL[4] = "DannyGokey-TellYourHeartToBeatAgain_video3.PNG";
img_URL[5] = "HawkNelson-DropsInTheOcean_video.PNG";
img_URL[6] = "Plumb-Exhale_video.PNG";
img_URL[7] = "Newsboys-Gods_Not_Dead_video.JPG";
img_URL[8] = "FrancescaBattistelli-HolySpirit_video.JPG";
img_URL[9] = "BrandonHeath-GiveMeYourEyes_video.JPG";
img_URL[10] = "Matthew_West-StrongEnough_video.JPG";
img_URL[11] = "JordanFeliz-TheRiver_video.PNG";
So to be clear, you want something like
<h2 align="center">Video of the month: {your_random_video_name_from_your_JSFile}</h2>
So why not just doing this :
<h2 align="center" id="video_name">Video of the month:</h2>
<img src="#" alt="" id="video_preview"/>
<script>
var today = new Date();
var song_info = new Array, img_URL = new Array, mnth = today.getMonth();
song_info[9] = "CHVRCHES - Leave A Trace"; // example data
img_URL[9] = "http://diymag.com/media/img/Artists/C/Chvrches/October-cover/_1500x1000_crop_center-center_75/chvrches-mike-massaro-diy-2015-05.jpg";
song_info[10] = "Newsboys - God's Not Dead"; // example data
img_URL[10] = "Newsboys-Gods_Not_Dead_video.JPG";
document.getElementById("video_name").insertAdjacentHTML('beforeend',song_info[mnth]);
var image = document.getElementById("video_preview");
image.src = img_URL[mnth];
</script>
Example JSFiddle
I have a web page that asks the user for a paragraph of text, then performs some operation on it. To demo it to lazy users, I'd like to add an "I feel lucky" button that will grab some random text from Wikipedia and populate the inputs.
How can I use Javascript to fetch a sequence of text from a random Wikipedia article?
I found some examples of fetching and parsing articles using the Wikipedia API, but they tend to be server side. I'm looking for a solution that runs entirely from the client and doesn't get scuppered by same origin policy.
Note random gibberish is not sufficient; I need human-readable sentences that make sense.
My answer builds on the technique suggested here.
The tricky part is formulating the correct query string:
http://en.wikipedia.org/w/api.php?action=query&generator=random&prop=extracts&exchars=500&format=json&callback=onWikipedia
generator=random selects a random page
prop=extracts and exchars=500 retrieves a 500 character extract
format=json returns JSON-formatted data
callback= causes that data to be wrapped in a function call so it can be treated like any other <script> and injected into your page (see JSONP), thus bypassing cross-domain barriers.
requestid can optionally be added, with a new value each time, to avoid stale results from the browser cache (required in IE9)
The page served by the query is something that looks like this (I've added whitespace for readability):
onWikipedia(
{"query":
{"pages":
{"12362520":
{"pageid":12362520,
"ns":0,
"title":"Power Building",
"extract":"<p>The <b>Power Building<\/b> is a historic commercial building in
the downtown of Cincinnati, Ohio, United States. Built in 1903, it
was designed by Harry Hake. It was listed on the National Register
of Historic Places on March 5, 1999. One week later, a group of
buildings in the northeastern section of downtown was named a
historic district, the Cincinnati East Manufacturing and Warehouse
District; the Power Building is one of the district's contributing
properties.<\/p>\n<h2> Notes<\/h2>"
} } } }
)
Of course you'll get a different article each time.
Here's a full, working example which you can try out on JSBin.
<HTML><BODY>
<p><textarea id="textbox" style="width:350px; height:150px"></textarea></p>
<p><button type="button" id="button" onclick="startFetch(100, 500)">
Fetch random Wikipedia extract</button></p>
<script type="text/javascript">
var textbox = document.getElementById("textbox");
var button = document.getElementById("button");
var tempscript = null, minchars, maxchars, attempts;
function startFetch(minimumCharacters, maximumCharacters, isRetry) {
if (tempscript) return; // a fetch is already in progress
if (!isRetry) {
attempts = 0;
minchars = minimumCharacters; // save params in case retry needed
maxchars = maximumCharacters;
button.disabled = true;
button.style.cursor = "wait";
}
tempscript = document.createElement("script");
tempscript.type = "text/javascript";
tempscript.id = "tempscript";
tempscript.src = "http://en.wikipedia.org/w/api.php"
+ "?action=query&generator=random&prop=extracts"
+ "&exchars="+maxchars+"&format=json&callback=onFetchComplete&requestid="
+ Math.floor(Math.random()*999999).toString();
document.body.appendChild(tempscript);
// onFetchComplete invoked when finished
}
function onFetchComplete(data) {
document.body.removeChild(tempscript);
tempscript = null
var s = getFirstProp(data.query.pages).extract;
s = htmlDecode(stripTags(s));
if (s.length > minchars || attempts++ > 5) {
textbox.value = s;
button.disabled = false;
button.style.cursor = "auto";
} else {
startFetch(0, 0, true); // retry
}
}
function getFirstProp(obj) {
for (var i in obj) return obj[i];
}
// This next bit borrowed from Prototype / hacked together
// You may want to replace with something more robust
function stripTags(s) {
return s.replace(/<\w+(\s+("[^"]*"|'[^']*'|[^>])+)?>|<\/\w+>/gi, "");
}
function htmlDecode(input){
var e = document.createElement("div");
e.innerHTML = input;
return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
}
</script>
</BODY></HTML>
One downside of generator=random is you often get talk pages or generated content that are not actual articles. If anyone can improve the query string to limit it to quality articles, that would be great!