How do i type something into an input box with the puppeteer? - javascript

I'm doing experiments for a bot im making but for some reason things i cant get it to type into the input box in youtube.
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://www.youtube.com/?hl=hr&gl=HR');
await page.waitForNavigation({
waitUntil: 'networkidle0',
});
await page.type('#search', `text`)
// await browser.waitForTarget(() => false)
// await browser.close();
})();
The #search is the id for the youtube search bar but it isn't working for some reason

Your problem is waitUntil: 'networkidle0'
async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://www.youtube.com/?hl=hr&gl=HR');
await page.waitForSelector('input#search')
await page.type('input#search', `text`)
})();

Related

Capture a screenshot as a table using Puppeteer

I am learning to scrape items from a website using Puppeteer. I am using table data from Basketball reference.com to practice. What I have done so far is use the puppeteer to Search the stats of my favorite player (Stephen Curry), access the table page, and take a screenshot of the page which then finishes the scraping process and closes the browser. However, I cannot seem to scrape the table I need and I am completely stuck.
The following is the code I have written so far:
const puppeteer = require("puppeteer");
async function run() {
const browser = await puppeteer.launch({
headless: false,
ignoreHTTPSErrors: true,
});
const page = await browser.newPage();
await page.goto(`https://www.basketball-reference.com/`);
await page.waitForSelector("input[name=search]");
await page.$eval("input[name=search]", (el) => (el.value = "Stephen Curry"));
await page.click('input[type="submit"]');
await page.waitForSelector(`a[href='${secondPageLink}']`, { visible: true });
await page.click(`a[href='${secondPageLink}']`);
await page.waitForSelector();
await page.screenshot({
path: `StephenCurryStats.png`,
});
await page.close();
await browser.close();
}
run();
I am trying to scrape the PER GAME table on the following link and take its screenshot. However, I cannot seem to find the right selector to pick and scrape and I am very confused.
The URL is https://www.basketball-reference.com/players/c/curryst01.html
There seems to be at least a couple of issues here. I'm not sure what secondPageLink refers to or the intent behind await page.waitForSelector() (throws TypeError: Cannot read properties of undefined (reading 'startsWith') on my version). I would either select the first search result with .search-item-name a[href] or skip that page entirely by clicking on the first autocompleted name in the search after using page.type(). Even better, you can build the query string URL (e.g. https://www.basketball-reference.com/search/search.fcgi?search=stephen+curry) and navigate to that in your first goto.
The final page loads a video and a ton of Google ad junk. Best to block all requests that aren't relevant to the screenshot.
const puppeteer = require("puppeteer"); // ^16.2.0
let browser;
(async () => {
browser = await puppeteer.launch({headless: true});
const [page] = await browser.pages();
const url = "https://www.basketball-reference.com/";
await page.setViewport({height: 600, width: 1300});
await page.setRequestInterception(true);
const allowed = [
"https://www.basketball-reference.com",
"https://cdn.ssref.net"
];
page.on("request", request => {
if (allowed.some(e => request.url().startsWith(e))) {
request.continue();
}
else {
request.abort();
}
});
await page.goto(url, {waitUntil: "domcontentloaded"});
await page.type('input[name="search"]', "Stephen Curry");
const $ = sel => page.waitForSelector(sel);
await (await $(".search-results-item")).click();
await (await $(".adblock")).evaluate(el => el.remove());
await page.waitForNetworkIdle();
await page.screenshot({
path: "StephenCurryStats.png",
fullPage: true
});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
If you just want to capture the per game table:
// same boilerplate above this line
await page.goto(url, {waitUntil: "domcontentloaded"});
await page.type('input[name="search"]', "Stephen Curry");
const $ = sel => page.waitForSelector(sel);
await (await $(".search-results-item")).click();
const table = await $("#per_game");
await (await page.$(".scroll_note"))?.click();
await table.screenshot({path: "StephenCurryStats.png"});
But I'd probably want a CSV for maximum ingestion:
await page.goto(url, {waitUntil: "domcontentloaded"});
await page.type('input[name="search"]', "Stephen Curry");
const $ = sel => page.waitForSelector(sel);
await (await $(".search-results-item")).click();
const btn = await page.waitForFunction(() =>
[...document.querySelectorAll("#all_per_game-playoffs_per_game li button")]
.find(e => e.textContent.includes("CSV"))
);
await btn.evaluate(el => el.click());
const csv = await (await $("#csv_per_game"))
.evaluate(el => [...el.childNodes].at(-1).textContent.trim());
const table = csv.split("\n").map(e => e.split(",")); // TODO use proper CSV parser
console.log(table);

Page loads in regular chrome but not in puppeteer

I am trying to load a page, http://www.nhc.gov.cn/wjw/index.shtml, on puppeteer as part of a covid-tracking program. The page loads very quickly in the regular chrome browser, but when I load it in puppeteer, the page load fails with a 412. What can I do to get the page to load and fully simulate a regular browser going to the page?
The code for reproduction of this phenomenon is below:
const puppeteer = require('puppeteer-core');
(async () => {
const browser = await puppeteer.launch({ executablePath: '..\\executables\\chrome.exe', headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] });
const page = await browser.newPage();
Object.assign(global, { browser, page });
page.on('console', msg => console.log(`chrome[${msg.text()}]`));
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml', { waitUntil: 'networkidle0' });
await page.waitFor(15000);
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();
Thank you in advance for your help!
you can use puppeteer-extra with the StealthPlugin.
https://www.npmjs.com/package/puppeteer-extra-plugin-stealth
Here is my code :
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
(async () => {
puppeteer.use(StealthPlugin())
const browser = await puppeteer.launch({headless: false, ignoreHTTPSErrors: true})
const page = await browser.newPage();
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml');
await page.waitForSelector('.inLists')
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();

How to click on popup contents in Puppeteer?

I open the 'deliver to' popup but am not able to click on the input field and enter information.
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
const url = 'https://www.tntsupermarket.com/eng/store-flyer';
await page.goto(url, {waitUntil: 'networkidle0'});
const newPagePromise = new Promise(x => browser.once('targetcreated', target => x(target.page())));
await page.evaluate(()=> {
document.querySelector('span[class="deliverCss-city-FJJ"]').click();
});
const popup = await newPagePromise;
await popup.waitForSelector('input[aria-label="Enter your Postal Code"]');
await popup.focus('input[aria-label="Enter your Postal Code"]');
await popup.click('input[aria-label="Enter your Postal Code"]');
await popup.keyboard.type('a2b');
})();
The pop-up isn't a new page, just a modal element that's shown with JS and without navigation. Removing the navigation promise gives a pretty clear result:
const puppeteer = require("puppeteer"); // ^13.5.1
let browser;
(async () => {
browser = await puppeteer.launch({headless: false});
const [page] = await browser.pages();
const url = "https://www.tntsupermarket.com/eng/store-flyer";
await page.goto(url, {waitUntil: "networkidle0", timeout: 90000});
const cityEl = await page.waitForSelector('span[class="deliverCss-city-FJJ"]');
await cityEl.evaluate(el => el.click());
const postalSel = 'input[aria-label="Enter your Postal Code"]';
const postalEl = await page.waitForSelector(postalSel);
await postalEl.type("a2b");
await page.waitForTimeout(30000); // just to show that the state is as we wish
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
This is a bit slow; there's an annoying pop-up you might wish to click off instead of using "networkidle0":
// ... same code
await page.goto(url, {waitUntil: "domcontentloaded", timeout: 90000});
const closeEl = await page.waitForSelector("#closeActivityPop");
await closeEl.click();
const cityEl = await page.waitForSelector('span[class="deliverCss-city-FJJ"]');
// same code ...
On quick glance, if the page is cached, the pop-up might not show, so you might want to abort page.waitForSelector("#closeActivityPop"); after 30 seconds or so and continue with the code without clicking on it, depending on how flexible you want the script to be.

Puppeteer: line of code being executed before others

I have this code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
The proble is that after clicking button1 the page change and puppeteer executes immediately the following line of code, instead I want it to wait for the new page to be loaded becuase otherwise It will throw an error since It can't find button2.
I found this solution on stackoverflow:
const puppeteer = require("puppeteer");
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time);
});
}
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
await delay(4000);
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ
marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
But of course this in't the best solution.
I think you have to modify a bit in your code:
await button1.click();
await page.waitForNavigation({waitUntil: 'networkidle2'});
For reference, see the documentation.
I found a solution, here's the code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse
matchpoint/quote/calcio/serie-a");
await page.waitForXPath('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
const [button1] = await page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
await button1.click();
await page.waitForXPath('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
button2.click();
})();

iframe is not loaded in puppeteer

My code:
async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://example.com' , {
waitUntil: 'networkidle2'
});
await page.screenshot({path: 'home.png', fullPage: true});
let frames = await page.frames();
await page.screenshot({path: 'home.png', fullPage: true});
var iFrame = frames.find(f => f.url().indexOf("https://accounts.") > -1);
const usernameInput = await iFrame.waitForSelector("[name=username]" , {visible : true});
await usernameInput.type(email);
const passwordInput = await iFrame.waitForSelector("[name=password]" , {visible : true});
await passwordInput.type(password);
const navigationPromisePortal = page.waitForNavigation();
await iFrame.click(".primary");
await navigationPromisePortal;
var cookies = await page.cookies();
await browser.close();
return cookies;
}
I'm trying to get cookies from example.com. But every time it accounts iFrame is not loaded. This page is built by react. How do I solve this problem?

Categories