I have thefollowing script with puppeter that works correctly , this code extract all information about table.
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const tableRows = await page.$$('table > tbody tr');
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
const time = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .time'));
return tables.map(table => table.textContent)
});
const teamHome = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-home'));
return tables.map(table => table.textContent)
});
const teamAway = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-away'));
return tables.map(table => table.textContent)
});
for (let i = 0; i < time.length; i++) {
console.log(time[i]);
console.log(teamHome[i]);
console.log(teamAway[i]);
}
await browser.close();
})();
Now I try to create this in a better way and I have the following code.
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
console.log("started evalating");
var data = await page.evaluate(() => {
Array.from(
document.querySelectorAll('table tr')
).map(row => {
return {
time: row.querySelector(".time"),
teamHome: row.querySelector(".team-home"),
teamAway: row.querySelector(".team-away")
};
});
});
console.log(data);
})();
When I try to execute the second script I receive and undefined.
The result will be to pass the first script to second script.
Could anyone helps to me ?
You need to specify tr elements more (like by adding .stage-scheduled class) and to return .textContent properties instead fo elements themselves. Try this:
var data = await page.evaluate(() => {
return Array.from(
document.querySelectorAll('table tr.stage-scheduled')
).map(row => {
return {
time: row.querySelector(".time").textContent,
teamHome: row.querySelector(".team-home").textContent,
teamAway: row.querySelector(".team-away").textContent,
};
});
});
Related
trying to navigate all pagination , get deals links , and console.log them.but problem is: it wont click the next page since page.click function not works inside page.evaluate() need to write them with js and not works
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
//await page.waitForSelector('[data-testid="grid-deals-container"]');
const siktir = await page.evaluate(() => {
var while_status = true;
var list = [];
while (while_status) {
setTimeout(() => {}, 5000);
let sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
if (document.querySelector(".a-last a") === null) {
while_status = false;
}
setTimeout(() => {
document.querySelector(".a-last a").click();
}, 3000);
}
return list;
});
console.log(siktir);
//await page.click(".a-last a",{delay:3000});
await browser.close();
})();
A lil help would be good
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: true,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
const numberOfDivs = await page.evaluate(() => {
return document.querySelector("li.a-disabled:nth-child(6)").textContent;
});
console.log(numberOfDivs);
var sayfa = 0;
for (let i = 0; i < numberOfDivs; i++) {
await page.waitForTimeout(3000);
sayfa++;
console.log(sayfa);
var lale = await page.evaluate(() => {
let list = [];
var sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
return list;
});
console.log(lale);
await page.click(".a-last a");
}
await browser.close();
})();
Still need to get fixed but at least i can get the links of the products.
I have this code in a puppeteer script. I need to access to the informations that are part of a table. I've tried with the page.$$eval() function but nothing is logged into console. What's wrong with the code?
(async() => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
page.goto(process.env.GATEWAY_ADDRESS, { waitUntil: ['load', 'networkidle2']});
const pwdField = await page.waitForSelector('#srp_password');
await pwdField.type(process.env.GATEWAY_PASSWORD);
const submitBtn = await page.waitForSelector('#sign-me-in');
await submitBtn.click();
page.waitForNavigation().then( (response) => {
page.goto(process.env.GATEWAY_PAGE, { waitUntil: ['load', 'networkidle2']}).then( (response) => {
page.$$eval('#calllog > tbody > tr', (rows) => {
console.log(rows);
// let rowsData = [];
// rows.forEach( (row) => {
// console.log(row);
// });
});
});
});
})();
I'm not a pro just trying to scrape some data from a website.
Some one from here helped me to select first "frame" but I need to scrape data from third frame and concatenate data from frame 1 + frame 2 + frame 3 in just one result.This is the site
This is what I have:
const puppeteer = require('puppeteer');
let scrape = async() => {
const browser = await puppeteer.launch({
headless: false,
slowMo: 250
});
const page = await browser.newPage();
await page.goto('', {
waituntil: "networkidle0"
});
const frame = await page.frames().find(f => f.name() === 'stanga');
const button = await frame.$('body > form > font > select > option:nth-child(12)');
button.click();
await page.waitFor(1000);
const frame1 = await page.frames().find(a => a.name() ==='centru');
const select = await frame1.$('body > form > font > select > option:nth-child(1)');
await page.waitFor(500);
select.click();
await page.waitFor(500);
const result = await page.$$eval("body > font", (options) => {
const timpi = options.map(option => option.innerText);
return timpi
});
await browser.close();
return result;
};
scrape().then((value) => {
console.log(value);
});
Thank you for any help.
I have fixed our script:
const puppeteer = require('puppeteer');
let scrape = async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('http://example.com/txt', { waitUntil: "networkidle2" });
const optionSelector = 'body > form > font > select > option';
const frames = await page.frames();
const expectedFrames = ['stanga', 'centru'];
const scrapedText = [];
const getOptions = (frameName) => {
return frameName.$$eval(optionSelector, (options) => {
const result = options.map(option => option.innerText);
return result;
}, optionSelector);
}
for (const frame of frames) {
const name = frame.name();
if (expectedFrames.includes(name)) {
await frame.click(optionSelector);
await page.waitFor(1000);
const result = await getOptions(frame);
scrapedText.push({[name]: result});
} else if (name === 'dreapta') {
const result = await frame.$eval('body', elm => elm.innerText);
scrapedText.push({[name]: result.split(/\n+/g)});
}
}
await browser.close();
return scrapedText;
};
scrape().then((value) => {
console.log(value);
});
OUTPUT:
[{
stanga: ['Mures','A Saguna', 'A.Guttenbrun_1', ... and more items]
},
{
centru: ['[0] E3']
},
{
dreapta: ['Linia: E3','2019-07-25 23:19:40','Sosire1: 23:39','Sosire2: 23:41']
}]
You have to improve your scraper not just to click on the select, but also to pull selected item value from the select object.
const frame = await page.frames().find(f => f.name() === "stanga");
const select1 = await frame.$(
"body > form > font > select > option:nth-child(12)"
);
const select1Value = await frame.evaluate(
select1 => select1.textContent,
select1
);
select1Value will have the value of the selected item in select box. The same must be done for select2 in the next frame.
In your code, you don't select frame3, thats why you cannot read data from it.
I have updated your code and this is the result I could get out of your code:
$ node scrape.js
Frame1: AT_Miresei_1
Frame2: [1] E1
Frame3: Linia: E12019-07-25 22:29:13Sosire1: 22:55 Sosire2: 23:00
This is what I ended up with, but there is a lot to improve (code quality and readability).
const puppeteer = require("puppeteer");
let scrape = async () => {
let result;
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("http://ratt.ro/txt", {
waituntil: "networkidle0"
});
// Frame 1
const frame = await page.frames().find(f => f.name() === "stanga");
const button = await frame.$(
"body > form > font > select > option:nth-child(12)"
);
const select1Value = await frame.evaluate(
button => button.textContent,
button
);
button.click();
await page.waitFor(1000);
// Frame 2
const frame1 = await page.frames().find(a => a.name() === "centru");
const select = await frame1.$(
"body > form > font > select > option:nth-child(1)"
);
const select2Value = await frame1.evaluate(
select => select.textContent,
select
);
await page.waitFor(200);
select.click();
await page.waitFor(200);
// Frame 3
const frame3 = await page.frames().find(f => f.name() === "dreapta");
const element = await frame3.$("body");
const frame3Text = await frame3.evaluate(
element => element.textContent,
element
);
await browser.close();
result =
"Frame1: " +
select1Value +
"\nFrame2: " +
select2Value +
"\nFrame3: " +
frame3Text.trim();
return result;
};
scrape().then(value => {
console.log(value);
});
I'm trying to implement an async on each loop on nodejs.
I have a variable html which contains the page content. There I want to iterate through all divs that have a particular class. Inside those divs, there are some links that I want to navigate and get some content from them too. So basically since each expects synchronous function it doesn't wait for the other code to be executed.
I tried to do it like this:
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
const page2 = await browser.newPage();
const mainUrl = "http ... ";
const html = await page.goto(mainUrl)
.then(function() {
return page.content();
});
await $('.data-row', html).each(function() => {
const url = await $(this).find(".link-details a").attr("href");
page2.goto(url)
.then(function() {
const title = await page.evaluate(el => el.innerHTML, await page.$('#title'));
// do other things
});
// do other things
// create a json with data add it to a list
});
But the title gives undefined and it's executed after the loop finishes executing ... What can I do here?
I've edited your code to show how Puppeteer was supposed to be used. Your main problem here was using jQuery where it was not needed and attempting to await things that were not asynchronous; while mixing in a promise chain.
(async () => {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
const page2 = await browser.newPage();
const mainUrl = "http ... ";
/*const html = await page.goto(mainUrl)
.then(function() {
return page.content();
});*/
await (page.goto(mainUrl))
await page.waitForSelector('.data-row');
const dataRows = await page.evaluate(() =>
document.querySelectorAll('.data-row');
)
/*await $('.data-row', html).each(function() => {
const url = await $(this).find(".link-details a").attr("href");
await page2.goto(url)
.then(function() {
const title = await page.evaluate(el => el.innerHTML, await page.$('#title'));
// do other things
});
// do other things
// create a json with data add it to a list
});*/
for (const row of dataRows) {
const url = dataRows.querySelector(".link-details a").href;
await page2.goto(url)
const title = await page2.evaluate(() => document.title)
console.log(title)
}
})()
You can't await jQuery.each, to you can try doing the following.
const rows = await $('.data-row', html).toArray();
for(const row of rows){
const url = await $(this).find(".link-details a").attr("href");
page2.goto(url)
.then(function() {
const title = await page.evaluate(el => el.innerHTML, await page.$('#title'));
// do other things
});
// do other things
// create a json with data add it to a list
}
I am having problem with getting the code into the beforeAll function finish and wait for the promise that resolves the storyLinks. The console log at the end of the snippet returns undefined but I need it to return the hrefs of the stories in my storybook. I cannot wrap this into an async function because of the testing pipeline being clogged on fail.
const puppeteer = require('puppeteer');
const { toMatchImageSnapshot } = require('jest-image-snapshot');
expect.extend({ toMatchImageSnapshot });
const timeout = 5000;
describe('visual tests', () => {
let page, browser, storyLinks;
const selector = `a[href*="selectedStory="]`;
beforeAll(async() => {
browser = await puppeteer.connect({browserWSEndpoint});
page = await browser.newPage();
await page.goto('http://localhost:8080');
await page.evaluate(() => {
const components = Array.from(document.querySelectorAll('div[data-name]'));
for(let i = 1; i < components.length; i++) {
components[i].addEventListener('click',() => {});
components[i].click();
}
});
storyLinks = await page.evaluate((selector) => {
const stories = Array.from(document.querySelectorAll(selector));
const links = stories.map(story => {
let href = story.href;
let name = story.text.replace(/[^A-Z0-9]/ig, '-').replace(/-{2,}/,'-');
let component = href.match(/selectedKind=(.*?)\&/).pop();
return {href: href, name: component + '-' + name};
});
return links;
}, selector);
}, timeout);
afterAll(async () => {
await page.close();
await browser.disconnect();
})
console.log(storyLinks);
}, timeout);
There's a few things I notice might be causing your issues. You need to add async to your describe block. Also, "describe" groups together multiple tests so you're missing an it or test block. Jest docs also note adding the expect.assertions(NUM_OF_ASSERTIONS); I'd do something like:
const puppeteer = require('puppeteer');
const { toMatchImageSnapshot } = require('jest-image-snapshot');
expect.extend({ toMatchImageSnapshot });
const timeout = 5000;
async function myStoryLinkTest(page) {
const selector = `a[href*="selectedStory="]`;
await page.goto('http://localhost:8080');
await page.evaluate(() => {
Array.from(document.querySelectorAll('div[data-name]'), item => {
item.addEventListener('click', () => {});
item.click();
});
});
const storyLinks = await page.evaluate(selector => {
return Array.from(document.querySelectorAll(selector), story => {
let href = story.href;
let name = story.text.replace(/[^A-Z0-9]/gi, '-').replace(/-{2,}/, '-');
let component = href.match(/selectedKind=(.*?)\&/).pop();
return { href: href, name: component + '-' + name };
});
});
return storyLinks;
}
describe('visual tests', async () => {
let page, browser;
beforeAll(async () => {
browser = await puppeteer.connect({ browserWSEndpoint });
page = await browser.newPage();
});
afterAll(async () => {
await page.close();
await browser.disconnect();
});
it('should do something with storyLinks', async () => {
expect.assertions(1);
const storyLinkResult = await myStoryLinkTest(page);
expect(storyLinkResult).toEqual('Some value you expect');
}, timeout);
});