trying to navigate all pagination , get deals links , and console.log them.but problem is: it wont click the next page since page.click function not works inside page.evaluate() need to write them with js and not works
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
//await page.waitForSelector('[data-testid="grid-deals-container"]');
const siktir = await page.evaluate(() => {
var while_status = true;
var list = [];
while (while_status) {
setTimeout(() => {}, 5000);
let sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
if (document.querySelector(".a-last a") === null) {
while_status = false;
}
setTimeout(() => {
document.querySelector(".a-last a").click();
}, 3000);
}
return list;
});
console.log(siktir);
//await page.click(".a-last a",{delay:3000});
await browser.close();
})();
A lil help would be good
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: true,
slowMo: 20,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto("https://www.amazon.com.tr/deals?ref_=nav_cs_gb", {
waitUntil: "networkidle2",
});
const numberOfDivs = await page.evaluate(() => {
return document.querySelector("li.a-disabled:nth-child(6)").textContent;
});
console.log(numberOfDivs);
var sayfa = 0;
for (let i = 0; i < numberOfDivs; i++) {
await page.waitForTimeout(3000);
sayfa++;
console.log(sayfa);
var lale = await page.evaluate(() => {
let list = [];
var sayi = document.querySelector('[data-testid="grid-deals-container"]')
.children.length;
for (let i = 0; i < sayi; i++) {
list.push(
document
.querySelector('[data-testid="grid-deals-container"]')
.children.item(i)
.children.item(0)
.children.item(0)
.children.item(0).href
);
}
return list;
});
console.log(lale);
await page.click(".a-last a");
}
await browser.close();
})();
Still need to get fixed but at least i can get the links of the products.
Related
This is working correctly it but it only loops over the first 20 people then it stops. As I scroll down the chat window new grid cells are populating and the previous ones are disappearing.
Here is a piece of code that I've found online but I can't get it to scroll inside of a div. How can I get this running with my code?:
function extractItems() {
const extractedElements = document.querySelectorAll('#container > div.blog-post');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 800,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch(e) { }
return items;
}
Here is mine:
const fs = require('fs');
const puppeteer = require('puppeteer');
function extractItems() {
const extractedElements = document.querySelectorAll('#root > div.Fpg8t > div.BL7do > nav > div:nth-child(1) > div');
const items = [];
for (let element of extractedElements) {
items.push(element.innerText);
}
return items;
}
async function scrapeItems(
page,
extractItems,
itemCount,
scrollDelay = 800,
) {
let items = [];
try {
let previousHeight;
while (items.length < itemCount) {
items = await page.evaluate(extractItems);
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
}
} catch (e) { }
return items;
}
(async () => {
const browser = await puppeteer.launch({
headless: false,
executablePath: 'C:/Program Files/Google/Chrome/Application/chrome.exe',
ignoreDefaultArgs: true,
args: [
'--remote-debugging-port=9444',
'--user-data-dir=D:/chrome-profiles/Snapchat1',
'--no-first-run',
'--no-default-browser-check',
`--window-size=1920,1080`,
]
});
await new Promise(r => setTimeout(r, 2000));
const pages = await browser.pages();
const page = pages[0];
await page.setViewport({
width: 1920,
height: 1200,
deviceScaleFactor: 1,
});
await page.goto('https://web.snapchat.com/');
await new Promise(function (resolve) { setTimeout(resolve, 5000) });
const items = await scrapeItems(page, extractItems, 50);
fs.writeFileSync('./items.txt', items.join('\n') + '\n');
})();
I am attempting to write a script that locates the largest image on a page. The first step of this process would be to retrieve all the image sources on a particular website. This is where I am stuck.
const puppeteer = require('puppeteer');
function ImageFetcher(pageURL, partName) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page1 = await browser.newPage();
await page1.goto(pageURL);
try {
const images = await page.$$eval("img", els => els.map(x => x.getAttribute("src")));
console.log(images);
} catch(e) {console.log("ERR Locator")};
await page1.close();
await browser.close();
return resolve();
} catch(e) {console.log(`Error Image Fetcher Part Name: ${partName}`)};
});
}
async function start() {
pageURL = "https://www.grainger.com/product/NVENT-CADDY-Cushioned-Pipe-Clamp-1RVC3";
partName = "10000";
ImageFetcher(pageURL, partName);
} start();
//ERR Locator
How about this:
const puppeteer = require("puppeteer");
let testing = async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.grainger.com/product/NVENT-CADDY-Cushioned-Pipe-Clamp-1RVC3');
const image = await extractLargestImage(page);
return image;
};
async function extractLargestImage(page) {
return page.evaluate(() => {
let imgs = document.querySelectorAll('img');
let largestImgSrc = 'none yet';
let largestImgSize = 0;
for (var img of imgs) {
let imgSize = Number(img.height) * Number(img.width);
if (imgSize > largestImgSize) {
largestImgSize = imgSize;
largestImgSrc = img.src;
}
}
return largestImgSrc;
});
}
testing().then((value) => {
console.dir(value, {'maxArrayLength': null});
});
I have this code in a puppeteer script. I need to access to the informations that are part of a table. I've tried with the page.$$eval() function but nothing is logged into console. What's wrong with the code?
(async() => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
page.goto(process.env.GATEWAY_ADDRESS, { waitUntil: ['load', 'networkidle2']});
const pwdField = await page.waitForSelector('#srp_password');
await pwdField.type(process.env.GATEWAY_PASSWORD);
const submitBtn = await page.waitForSelector('#sign-me-in');
await submitBtn.click();
page.waitForNavigation().then( (response) => {
page.goto(process.env.GATEWAY_PAGE, { waitUntil: ['load', 'networkidle2']}).then( (response) => {
page.$$eval('#calllog > tbody > tr', (rows) => {
console.log(rows);
// let rowsData = [];
// rows.forEach( (row) => {
// console.log(row);
// });
});
});
});
})();
I want to select a p which is the only one with a classname. But why doesn't the javascript code work?
Note: I can't do elements[i].className =="test", because the class name changes every time I reopen the website.
When I run this in the console I get:
undefined
undefined
undefined
const puppeteer = require('puppeteer');
async function test() {
const browser = await puppeteer.launch({ headless: false, timeout: 0};
const page2 = await browser.newPage();
await page2.goto("localhost");
await page2.waitFor(2530);
const elements = await page2.evaluate(() => [...document.querySelectorAll('p')]);
for (var i = 0; i < elements.length; i++) {
if(elements[i].className){
console.log(elements[i].innerText);
}
}
}
test();
<p>text1</p>
<p class="test">text2</p>
<p>text3</p>
const puppeteer = require('puppeteer');
async function test() {
const browser = await puppeteer.launch({ headless: false, timeout: 0};
const page2 = await browser.newPage();
await page2.goto("localhost");
await page2.waitFor(2530);
const elements = await page2.evaluate(() => [...document.querySelectorAll('p')]);
for (var i = 0; i < elements.length; i++) {
if(elements[i].className){
console.log(elements[i].innerText[]);
}
}
}
test();
Change this line
console.log(elements[i].innerText[]);
to
console.log(elements[i].innerText);
And use this to return an array of paragraphs from the evaluate
const textsArray = await page2.evaluate(
() => [...document.querySelectorAll('p')].map(elem => elem.innerText)
);
This will return an array of all the text from the paragraph tags or else you can manipulate map to return an objects inside array with their class name also,if you want both
Like This
const textsArray = await page2.evaluate(() =>
[...document.querySelectorAll('p')].map(elem =>
{ return {class: elem.className, text:elem.innerText}
)
);
async function test() {
const browser = await puppeteer.launch({ headless: false, timeout: 0};
const page2 = await browser.newPage();
await page2.goto("localhost");
await page2.waitFor(2530);
let elements = await page.$$('p');
for (let i = 0; i < elements.length; i++) {
let el_class = await page.evaluate(el => el.getAttribute('class'), elements[i]);
console.log(el_class);
}
}
I have thefollowing script with puppeter that works correctly , this code extract all information about table.
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const tableRows = await page.$$('table > tbody tr');
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
const time = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .time'));
return tables.map(table => table.textContent)
});
const teamHome = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-home'));
return tables.map(table => table.textContent)
});
const teamAway = await page.evaluate(() => {
const tables = Array.from(document.querySelectorAll('table tr .team-away'));
return tables.map(table => table.textContent)
});
for (let i = 0; i < time.length; i++) {
console.log(time[i]);
console.log(teamHome[i]);
console.log(teamAway[i]);
}
await browser.close();
})();
Now I try to create this in a better way and I have the following code.
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.mismarcadores.com/baloncesto/espana/liga-endesa/partidos/");
console.log("started evalating");
var data = await page.evaluate(() => {
Array.from(
document.querySelectorAll('table tr')
).map(row => {
return {
time: row.querySelector(".time"),
teamHome: row.querySelector(".team-home"),
teamAway: row.querySelector(".team-away")
};
});
});
console.log(data);
})();
When I try to execute the second script I receive and undefined.
The result will be to pass the first script to second script.
Could anyone helps to me ?
You need to specify tr elements more (like by adding .stage-scheduled class) and to return .textContent properties instead fo elements themselves. Try this:
var data = await page.evaluate(() => {
return Array.from(
document.querySelectorAll('table tr.stage-scheduled')
).map(row => {
return {
time: row.querySelector(".time").textContent,
teamHome: row.querySelector(".team-home").textContent,
teamAway: row.querySelector(".team-away").textContent,
};
});
});