node - Access table content using puppeteer - javascript

I have this code in a puppeteer script. I need to access to the informations that are part of a table. I've tried with the page.$$eval() function but nothing is logged into console. What's wrong with the code?
(async() => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
page.goto(process.env.GATEWAY_ADDRESS, { waitUntil: ['load', 'networkidle2']});
const pwdField = await page.waitForSelector('#srp_password');
await pwdField.type(process.env.GATEWAY_PASSWORD);
const submitBtn = await page.waitForSelector('#sign-me-in');
await submitBtn.click();
page.waitForNavigation().then( (response) => {
page.goto(process.env.GATEWAY_PAGE, { waitUntil: ['load', 'networkidle2']}).then( (response) => {
page.$$eval('#calllog > tbody > tr', (rows) => {
console.log(rows);
// let rowsData = [];
// rows.forEach( (row) => {
// console.log(row);
// });
});
});
});
})();

Related

javascript puppeteer next page problem when i evaluate

trying to navigate all pagination , get deals links , and console.log them.but problem is: it wont click the next page since page.click function not works inside page.evaluate() need to write them with js and not works
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
//await page.waitForSelector('[data-testid="grid-deals-container"]');
const siktir = await page.evaluate(() => {
var while_status = true;
var list = [];
while (while_status) {
setTimeout(() => {}, 5000);
let sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
if (document.querySelector(".a-last a") === null) {
while_status = false;
}
setTimeout(() => {
document.querySelector(".a-last a").click();
}, 3000);
}
return list;
});
console.log(siktir);
//await page.click(".a-last a",{delay:3000});
await browser.close();
})();
A lil help would be good
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: true,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
const numberOfDivs = await page.evaluate(() => {
return document.querySelector("li.a-disabled:nth-child(6)").textContent;
});
console.log(numberOfDivs);
var sayfa = 0;
for (let i = 0; i < numberOfDivs; i++) {
await page.waitForTimeout(3000);
sayfa++;
console.log(sayfa);
var lale = await page.evaluate(() => {
let list = [];
var sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
return list;
});
console.log(lale);
await page.click(".a-last a");
}
await browser.close();
})();
Still need to get fixed but at least i can get the links of the products.

Puppeteer querySelector returns empty object [duplicate]

Recently I started to crawl the web using Puppeteer. Below is a code for extracting a specific product name from the shopping mall.
const puppeteer = require('puppeteer');
(async () => {
const width = 1600, height = 1040;
const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };
const browser = await puppeteer.launch(option);
const page = await browser.newPage();
const vp = {width: width, height: height};
await page.setViewport(vp);
const navigationPromise = page.waitForNavigation();
await page.goto('https://shopping.naver.com/home/p/index.nhn');
await navigationPromise;
await page.waitFor(2000);
const textBoxId = 'co_srh_input';
await page.type('.' + textBoxId, '양말', {delay: 100});
await page.keyboard.press('Enter');
await page.waitFor(5000);
await page.waitForSelector('div.info > a.tit');
const stores = await page.evaluate(() => {
const links = Array.from(document.querySelectorAll('div.info > a.tit'));
return links.map(link => link.innerText).slice(0, 10) // 10개 제품만 가져오기
});
console.log(stores);
await browser.close();
})();
I have a question. How can I output the crawled results to an HTML document (without using the database)? Please use sample code to explain it.
I used what was seen on blog.kowalczyk.info
const puppeteer = require("puppeteer");
const fs = require("fs");
async function run() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://www.google.com/", { waitUntil: "networkidle2" });
// hacky defensive move but I don't know a better way:
// wait a bit so that the browser finishes executing JavaScript
await page.waitFor(1 * 1000);
const html = await page.content();
fs.writeFileSync("index.html", html);
await browser.close();
}
run();
fs.writeFile()
You can use the following write_file function that returns a Promise that resolves or rejects when fs.writeFile() succeeds or fails.
Then, you can await the Promise from within your anonymous, asynchronous function and check whether or not the data was written to the file:
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
const write_file = (file, data) => new Promise((resolve, reject) => {
fs.writeFile(file, data, 'utf8', error => {
if (error) {
console.error(error);
reject(false);
} else {
resolve(true);
}
});
});
(async () => {
// ...
const stores = await page.evaluate(() => {
return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
});
if (await write_file('example.html', stores.toString()) === false) {
console.error('Error: Unable to write stores to example.html.');
}
// ...
});

Puppeteer to listen for map.on('load') from within Node

Using Puppeteer to listen for map.on('load') from within Node.
(async () => {
const browser = await puppeteer.launch({ headless: false, devtools: true });
const page = await browser.newPage();
function nodeLog(msg) {
console.log(msg);
}
page.on('load', async () => {
await page.evaluate(() => {
window.map.on('load', () => {
console.log("This runs on the index.html js but I do not need that");
nodeLog("WHY IS THIS NOT WORKING??")
})
})
});
await page.goto(`file:${__dirname + '/index.html'}`);
})();
waitForSelector should work, eg. when using a selector from the readily rendered map... or listen for the map.bounds_changed or the map.idle event, which are triggered once the map is fully loaded. The map.load event might happen too soon.
Here's a working example, which I've just put together:
const puppeteer = require('puppeteer');
const url = 'https://developers-dot-devsite-v2-prod.appspot.com/maps/documentation/javascript/examples/full/map-simple';
run().then(() => {
console.log('entering asynchronous execution.')
}).catch(error => {
console.log(error)
});
async function run() {
puppeteer
.launch({devtools: true, headless: false})
.then(async browser => {
const page = await browser.newPage();
await page.goto(url);
await page.evaluate(() => {
window.map.addListener('idle', function(){
console.log('the map is idle now');
var div = document.createElement('div');
div.setAttribute('id', 'puppeteer-map-idle');
window.document.body.append(div);
});
});
await page.waitForSelector('#puppeteer-map-idle' , {
timeout: 5000
}).then((res) => {
console.log('selector #puppeteer-map-idle has been found.');
/* in here the map should be fully loaded. */
});
// await browser.close();
});
}
Admittedly that's kind of workaround, but the DOM manipulation can be observed.
I also figured out how to return information. I reread the docs and got some understanding. I was not understanding the context.
const nodeLog = msg => console.log;
const msg = await page.evaluate(() => { return 'this is working' });
nodeLog(msg);

Scraping table with puppeter

I have thefollowing script with puppeter that works correctly , this code extract all information about table.
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const tableRows = await page.$$('table > tbody tr');
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
const time = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .time'));
return tables.map(table => table.textContent)
});
const teamHome = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-home'));
return tables.map(table => table.textContent)
});
const teamAway = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-away'));
return tables.map(table => table.textContent)
});
for (let i = 0; i < time.length; i++) {
console.log(time[i]);
console.log(teamHome[i]);
console.log(teamAway[i]);
}
await browser.close();
})();
Now I try to create this in a better way and I have the following code.
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
console.log("started evalating");
var data = await page.evaluate(() => {
Array.from(
document.querySelectorAll('table tr')
).map(row => {
return {
time: row.querySelector(".time"),
teamHome: row.querySelector(".team-home"),
teamAway: row.querySelector(".team-away")
};
});
});
console.log(data);
})();
When I try to execute the second script I receive and undefined.
The result will be to pass the first script to second script.
Could anyone helps to me ?
You need to specify tr elements more (like by adding .stage-scheduled class) and to return .textContent properties instead fo elements themselves. Try this:
var data = await page.evaluate(() => {
return Array.from(
document.querySelectorAll('table tr.stage-scheduled')
).map(row => {
return {
time: row.querySelector(".time").textContent,
teamHome: row.querySelector(".team-home").textContent,
teamAway: row.querySelector(".team-away").textContent,
};
});
});

How to print an HTML document using Puppeteer?

Recently I started to crawl the web using Puppeteer. Below is a code for extracting a specific product name from the shopping mall.
const puppeteer = require('puppeteer');
(async () => {
const width = 1600, height = 1040;
const option = { headless: false, slowMo: true, args: [`--window-size=${width},${height}`] };
const browser = await puppeteer.launch(option);
const page = await browser.newPage();
const vp = {width: width, height: height};
await page.setViewport(vp);
const navigationPromise = page.waitForNavigation();
await page.goto('https://shopping.naver.com/home/p/index.nhn');
await navigationPromise;
await page.waitFor(2000);
const textBoxId = 'co_srh_input';
await page.type('.' + textBoxId, '양말', {delay: 100});
await page.keyboard.press('Enter');
await page.waitFor(5000);
await page.waitForSelector('div.info > a.tit');
const stores = await page.evaluate(() => {
const links = Array.from(document.querySelectorAll('div.info > a.tit'));
return links.map(link => link.innerText).slice(0, 10) // 10개 제품만 가져오기
});
console.log(stores);
await browser.close();
})();
I have a question. How can I output the crawled results to an HTML document (without using the database)? Please use sample code to explain it.
I used what was seen on blog.kowalczyk.info
const puppeteer = require("puppeteer");
const fs = require("fs");
async function run() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://www.google.com/", { waitUntil: "networkidle2" });
// hacky defensive move but I don't know a better way:
// wait a bit so that the browser finishes executing JavaScript
await page.waitFor(1 * 1000);
const html = await page.content();
fs.writeFileSync("index.html", html);
await browser.close();
}
run();
fs.writeFile()
You can use the following write_file function that returns a Promise that resolves or rejects when fs.writeFile() succeeds or fails.
Then, you can await the Promise from within your anonymous, asynchronous function and check whether or not the data was written to the file:
'use strict';
const fs = require('fs');
const puppeteer = require('puppeteer');
const write_file = (file, data) => new Promise((resolve, reject) => {
fs.writeFile(file, data, 'utf8', error => {
if (error) {
console.error(error);
reject(false);
} else {
resolve(true);
}
});
});
(async () => {
// ...
const stores = await page.evaluate(() => {
return Array.from(document.querySelectorAll('div.info > a.tit'), link => link.innerText).slice(0, 10); // 10개 제품만 가져오기
});
if (await write_file('example.html', stores.toString()) === false) {
console.error('Error: Unable to write stores to example.html.');
}
// ...
});

Categories