How do I scroll through just the side bar of a page and not the whole page?
I saw this solution but I don't know exactly how to get x and y. Using this doc as an example, how do I scroll through the scroll view on the side bar.
There are some ways you can scroll this part to its bottom:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({
headless: false,
args: ['--start-maximized'],
});
const [page] = await browser.pages();
await page.setViewport({ width: 1280, height: 600 });
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollTop = container.scrollHeight;
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scroll(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollBy(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollTo(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const lastElementInContainer = [...document.querySelectorAll('#column2 li')].pop();
lastElementInContainer.scrollIntoView();
});
} catch (err) {
console.error(err);
}
})();
Related
Im trying to scrap a web page, and after following some tutorials, i found how to scrap different products, and how to change the page, but not at the same time. I tryed some ways to do it, but couln't find out.
This is my scraping code:
const puppeteer = require('puppeteer');
const xlsx = require("xlsx");
async function getPageData(url,page){
await page.goto(url);
const h1 = await page.$eval(".product_main h1", h1 => h1.textContent);
const price = await page.$eval(".price_color", price => price.textContent);
const instock = await page.$eval(".instock.availability", instock => instock.innerText);
return {
title: h1,
price: price,
instock: instock
}
//await browser.close();
};
async function getLinks(){
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://books.toscrape.com/');
const links = await page.$$eval('.product_pod .image_container a', allAs =>
allAs.map(a => a.href));
}
async function main(){
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
const data = await getPageData("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",page);
console.log(data);
}
main();
And here is my code to change the page:
const puppeteer = require('puppeteer');
const xlsx = require("xlsx");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://books.toscrape.com/');
while(await page.$(".pager .next a")){
await page.click(".pager .next a");
await page.waitForTimeout(3000);
}
})();
I have this code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
The proble is that after clicking button1 the page change and puppeteer executes immediately the following line of code, instead I want it to wait for the new page to be loaded becuase otherwise It will throw an error since It can't find button2.
I found this solution on stackoverflow:
const puppeteer = require("puppeteer");
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time);
});
}
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
await delay(4000);
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ
marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
But of course this in't the best solution.
I think you have to modify a bit in your code:
await button1.click();
await page.waitForNavigation({waitUntil: 'networkidle2'});
For reference, see the documentation.
I found a solution, here's the code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse
matchpoint/quote/calcio/serie-a");
await page.waitForXPath('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
const [button1] = await page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
await button1.click();
await page.waitForXPath('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
button2.click();
})();
The URL I'm talking about is https://www.vudu.com/content/movies/movieslist. I'm trying to scroll through the part where the movies are. When I use the following code, it doesn't work.
await page.evaluate( () => {
window.scrollBy(0, window.innerHeight);
});
I think this is because you naturally have to hover over the movies before that part can scroll. How can I solve this problem? Thanks!
My full code:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setViewport({ width: 1280, height: 800 });
url = "https://www.vudu.com/content/movies/movieslist"
await page.goto(url, {waitUntil: 'load'});
await page.evaluate( () => {
window.scrollBy(0, window.innerHeight);
});
await page.waitFor(2000);
})()
You can use mouse.move() for this, but it seems this would not fix the issue as the document body is not scrollable. You need to find the scrollable element to scroll just it:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
const [page] = await browser.pages();
await page.goto('https://www.vudu.com/content/movies/movieslist');
await page.waitForSelector('.nr-pt-40');
const data = await page.evaluate(() => {
const container = document.querySelector('.nr-pt-40');
container.scrollBy(0, container.clientHeight);
});
// await browser.close();
} catch (err) {
console.error(err);
}
})();
It's not the window that you need to scroll, but the div that wraps the thunbnails.
Try getting a handle to the wrapper using:
var first = document.querySelector(".contentPosterWrapper");
var wrapper = first.parentNode.parentNode;
wrapper.scrollBy(0, 500);
My problem is that I need to set the comment selector to "all comments" whit puppeteer but the comments don't render after that puppeteer clicks on the correct button, "all the comments", the comment section just disappears, I will provide the code and a video of the browser in action.
const $ = require('cheerio');
const puppeteer = require('puppeteer');
const url = 'https://www.facebook.com/pg/SamsungGlobal/posts/';
const main = async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 0
});
page.mouse.click(50, 540, {});
for (var a = 0; a < 18; a++) {
setTimeout(() => {}, 16);
await page.keyboard.press('ArrowDown');
}
let bodyHTML = await page.evaluate(() => document.body.innerHTML);
var id = "#" + $("._427x ._4-u2.mbm._4mrt", bodyHTML).attr('id'); // selects id of first post
try {
var exp = await page.$(`${id} a._21q1`); // clicks on "most relevant" from the first post
await exp.evaluate(exp => exp.click());
await page.click('div[data-ordering="RANKED_UNFILTERED"]'); // selects "all the comments"
var exp = await page.$(`${id} a._42ft`); // should click on "more comments" but it doesn't load
await exp.evaluate(exp => exp.click());
await page.waitForSelector(`${id} a._5v47.fss`); // wait for the "others" in facebook comments
var exp = await page.$$(`${id} a._5v47.fss`);
await exp.evaluate(exp => exp.click());
await page.screenshot({
path: "./srn4.png"
});
// var post = await page.$eval(id + " .userContentWrapper", el => el.innerHTML);
// console.log("that's the post " + post);
} catch (e) {
console.log(e);
}
setTimeout(async function() {
await browser.close(); //close after some time
}, 1500);
};
main();
That's the video of the full execution process: https://youtu.be/jXpSOBfVskg
That's a slow motion of the moment it click on the menu: https://youtu.be/1OgfFNokxsA
You can try a variant with selectors:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto('https://www.facebook.com/pg/SamsungGlobal/posts/');
await page.waitForSelector('[data-ordering="RANKED_THREADED"]');
await page.click('[data-ordering="RANKED_THREADED"]');
await page.waitForSelector('[data-ordering="RANKED_UNFILTERED"]');
await page.click('[data-ordering="RANKED_UNFILTERED"]');
} catch (err) {
console.error(err);
}
})();
page.mouse.click(50, 540, {});
This is not going to work necessarily. What are you trying to click? You need to use CSS selectors to find elements that you want to click.
Also, dynamic elements might not appear in the page right away. You should use waitForSelector as needed.
I want to take a screenshot with puppeteer and it's working for one post. But I want to make it iterate.
If it's normal function I can just wrote the function name in the last side of the code so that it can iterate. But this is async function so I don't know how to iterate it.
const puppeteer = require('puppeteer');
let postNumber = 1;
let by;
(async () => {
const browser = await puppeteer.launch({
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
userDataDir: 'C:\\Users\\{computerName}\\AppData\\Local\\Google\\Chrome\\User Data',
headless: false
}); // default is true
const page = await browser.newPage();
await page.goto(`https://band.us/band/{someNumbers}/post/${postNumber}`, {
waitUntil: 'networkidle2'
});
let element = await page.$('.boardList');
by = await page.evaluate(() => document.getElementsByClassName('text')[0].textContent);
console.log(by);
await element.screenshot({
path: `./image/${postNumber}-${by}.png`
});
console.log(`SAVED : ${postNumber}-${by}.png`)
postNumber++;
await browser.close();
})();
After the function is finished, the postNumber variable should be increase by one. And then run the code again by new URLs.
As you want to run the code one iteration after another, a normal for (or while) loop can be used. async/await code works fine with these.
You can use a for in your case like this:
(async () => {
const browser = await puppeteer.launch(/* ... */);
const page = await browser.newPage();
for (let postNumber = 1; postNumber < 10; postNumber++) {
await page.goto(/* ... */);
let element = await page.$('.boardList');
// ...
}
await browser.close();
})();
You can use any appropriate loop, like while-loop:
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
userDataDir: 'C:\\Users\\{computerName}\\AppData\\Local\\Google\\Chrome\\User Data',
headless: false
}); // default is true
const page = await browser.newPage();
let postNumber = 1;
while (postNumber <= 10) {
await page.goto(`https://band.us/band/{someNumbers}/post/${postNumber}`, {
waitUntil: 'networkidle2'
});
const element = await page.$('.boardList');
const by = await page.evaluate(() => document.getElementsByClassName('text')[0].textContent);
console.log(by);
await element.screenshot({
path: `./image/${postNumber}-${by}.png`
});
console.log(`SAVED : ${postNumber}-${by}.png`)
postNumber++;
}
await browser.close();
})();