How do I point the mouse to a specific part of the page and then scroll? (Puppeteer) - javascript

The URL I'm talking about is https://www.vudu.com/content/movies/movieslist. I'm trying to scroll through the part where the movies are. When I use the following code, it doesn't work.
await page.evaluate( () => {
window.scrollBy(0, window.innerHeight);
});
I think this is because you naturally have to hover over the movies before that part can scroll. How can I solve this problem? Thanks!
My full code:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.setViewport({ width: 1280, height: 800 });
url = "https://www.vudu.com/content/movies/movieslist"
await page.goto(url, {waitUntil: 'load'});
await page.evaluate( () => {
window.scrollBy(0, window.innerHeight);
});
await page.waitFor(2000);
})()

You can use mouse.move() for this, but it seems this would not fix the issue as the document body is not scrollable. You need to find the scrollable element to scroll just it:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
const [page] = await browser.pages();
await page.goto('https://www.vudu.com/content/movies/movieslist');
await page.waitForSelector('.nr-pt-40');
const data = await page.evaluate(() => {
const container = document.querySelector('.nr-pt-40');
container.scrollBy(0, container.clientHeight);
});
// await browser.close();
} catch (err) {
console.error(err);
}
})();

It's not the window that you need to scroll, but the div that wraps the thunbnails.
Try getting a handle to the wrapper using:
var first = document.querySelector(".contentPosterWrapper");
var wrapper = first.parentNode.parentNode;
wrapper.scrollBy(0, 500);

Related

Puppeteer: line of code being executed before others

I have this code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
The proble is that after clicking button1 the page change and puppeteer executes immediately the following line of code, instead I want it to wait for the new page to be loaded becuase otherwise It will throw an error since It can't find button2.
I found this solution on stackoverflow:
const puppeteer = require("puppeteer");
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time);
});
}
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
await delay(4000);
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ
marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
But of course this in't the best solution.
I think you have to modify a bit in your code:
await button1.click();
await page.waitForNavigation({waitUntil: 'networkidle2'});
For reference, see the documentation.
I found a solution, here's the code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse
matchpoint/quote/calcio/serie-a");
await page.waitForXPath('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
const [button1] = await page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
await button1.click();
await page.waitForXPath('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
button2.click();
})();

How do I define a variable as a scraped element using Puppeteer

const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null
})
const page = await browser.newPage()
await page.goto('https://www.supremenewyork.com/shop/sweatshirts/ftq968f24/lhrblx1z5')
var productName = await page.evaluate(() => {
document.querySelector('div[id="details"] > p[itemprop="model"]').innerText;
})
console.log(productName);
})()
When I run my code that is supposed to grab the name of the supreme item, it says undefined when it's supposed to log it in the console.
You are neither returning anything from the page.evaluate nor are you setting the value of productName. Try something like this instead that uses $eval to return the innerText of the matching element:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto(
"https://www.supremenewyork.com/shop/sweatshirts/ftq968f24/lhrblx1z5"
);
const productName = await page.$eval(
'div[id="details"] > p[itemprop="model"]',
(el) => el.innerText
);
console.log(productName);
})();
If you prefer to use evaluate it would look like:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
});
const page = await browser.newPage();
await page.goto(
"https://www.supremenewyork.com/shop/sweatshirts/ftq968f24/lhrblx1z5"
);
const productName = await page.evaluate(() => {
// notice the return
return document.querySelector('div[id="details"] > p[itemprop="model"]').innerText;
});
console.log(productName);
})();
If innerText doesn't return anything you may instead need to use something like textContent.
Hopefully that helps!

Puppeteer not working as expected when clicking button

My problem is that I need to set the comment selector to "all comments" whit puppeteer but the comments don't render after that puppeteer clicks on the correct button, "all the comments", the comment section just disappears, I will provide the code and a video of the browser in action.
const $ = require('cheerio');
const puppeteer = require('puppeteer');
const url = 'https://www.facebook.com/pg/SamsungGlobal/posts/';
const main = async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 0
});
page.mouse.click(50, 540, {});
for (var a = 0; a < 18; a++) {
setTimeout(() => {}, 16);
await page.keyboard.press('ArrowDown');
}
let bodyHTML = await page.evaluate(() => document.body.innerHTML);
var id = "#" + $("._427x ._4-u2.mbm._4mrt", bodyHTML).attr('id'); // selects id of first post
try {
var exp = await page.$(`${id} a._21q1`); // clicks on "most relevant" from the first post
await exp.evaluate(exp => exp.click());
await page.click('div[data-ordering="RANKED_UNFILTERED"]'); // selects "all the comments"
var exp = await page.$(`${id} a._42ft`); // should click on "more comments" but it doesn't load
await exp.evaluate(exp => exp.click());
await page.waitForSelector(`${id} a._5v47.fss`); // wait for the "others" in facebook comments
var exp = await page.$$(`${id} a._5v47.fss`);
await exp.evaluate(exp => exp.click());
await page.screenshot({
path: "./srn4.png"
});
// var post = await page.$eval(id + " .userContentWrapper", el => el.innerHTML);
// console.log("that's the post " + post);
} catch (e) {
console.log(e);
}
setTimeout(async function() {
await browser.close(); //close after some time
}, 1500);
};
main();
That's the video of the full execution process: https://youtu.be/jXpSOBfVskg
That's a slow motion of the moment it click on the menu: https://youtu.be/1OgfFNokxsA
You can try a variant with selectors:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto('https://www.facebook.com/pg/SamsungGlobal/posts/');
await page.waitForSelector('[data-ordering="RANKED_THREADED"]');
await page.click('[data-ordering="RANKED_THREADED"]');
await page.waitForSelector('[data-ordering="RANKED_UNFILTERED"]');
await page.click('[data-ordering="RANKED_UNFILTERED"]');
} catch (err) {
console.error(err);
}
})();
page.mouse.click(50, 540, {});
This is not going to work necessarily. What are you trying to click? You need to use CSS selectors to find elements that you want to click.
Also, dynamic elements might not appear in the page right away. You should use waitForSelector as needed.

How to iterate async function in puppeteer with NodeJS

I want to take a screenshot with puppeteer and it's working for one post. But I want to make it iterate.
If it's normal function I can just wrote the function name in the last side of the code so that it can iterate. But this is async function so I don't know how to iterate it.
const puppeteer = require('puppeteer');
let postNumber = 1;
let by;
(async () => {
const browser = await puppeteer.launch({
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
userDataDir: 'C:\\Users\\{computerName}\\AppData\\Local\\Google\\Chrome\\User Data',
headless: false
}); // default is true
const page = await browser.newPage();
await page.goto(`https://band.us/band/{someNumbers}/post/${postNumber}`, {
waitUntil: 'networkidle2'
});
let element = await page.$('.boardList');
by = await page.evaluate(() => document.getElementsByClassName('text')[0].textContent);
console.log(by);
await element.screenshot({
path: `./image/${postNumber}-${by}.png`
});
console.log(`SAVED : ${postNumber}-${by}.png`)
postNumber++;
await browser.close();
})();
After the function is finished, the postNumber variable should be increase by one. And then run the code again by new URLs.
As you want to run the code one iteration after another, a normal for (or while) loop can be used. async/await code works fine with these.
You can use a for in your case like this:
(async () => {
const browser = await puppeteer.launch(/* ... */);
const page = await browser.newPage();
for (let postNumber = 1; postNumber < 10; postNumber++) {
await page.goto(/* ... */);
let element = await page.$('.boardList');
// ...
}
await browser.close();
})();
You can use any appropriate loop, like while-loop:
'use strict';
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
executablePath: 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
userDataDir: 'C:\\Users\\{computerName}\\AppData\\Local\\Google\\Chrome\\User Data',
headless: false
}); // default is true
const page = await browser.newPage();
let postNumber = 1;
while (postNumber <= 10) {
await page.goto(`https://band.us/band/{someNumbers}/post/${postNumber}`, {
waitUntil: 'networkidle2'
});
const element = await page.$('.boardList');
const by = await page.evaluate(() => document.getElementsByClassName('text')[0].textContent);
console.log(by);
await element.screenshot({
path: `./image/${postNumber}-${by}.png`
});
console.log(`SAVED : ${postNumber}-${by}.png`)
postNumber++;
}
await browser.close();
})();

Scroll through the side bar of a page using Puppeteer

How do I scroll through just the side bar of a page and not the whole page?
I saw this solution but I don't know exactly how to get x and y. Using this doc as an example, how do I scroll through the scroll view on the side bar.
There are some ways you can scroll this part to its bottom:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({
headless: false,
args: ['--start-maximized'],
});
const [page] = await browser.pages();
await page.setViewport({ width: 1280, height: 600 });
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollTop = container.scrollHeight;
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scroll(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollBy(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const container = document.querySelector('#column2');
container.scrollTo(0, container.scrollHeight);
});
await page.waitFor(3000);
await page.goto('https://nodejs.org/api/');
await page.evaluate(() => {
const lastElementInContainer = [...document.querySelectorAll('#column2 li')].pop();
lastElementInContainer.scrollIntoView();
});
} catch (err) {
console.error(err);
}
})();

Categories