I have a string which of value changes, and I need to put this string into an input field.
(async () => {
let pageNum = 1004327;
let browser = await puppeteer.launch({
headless: true,
});
let page = await browser.newPage();
while (1) {
await page.goto(`${link}${pageNum}`);
page.setDefaultTimeout(0);
let html = await page.evaluate(async () => {
let mail = document.getElementById(
"ctl00_phWorkZone_DbLabel8"
).innerText;
let obj = {
email: mail,
link: window.location.href,
};
return obj;
});
if (Object.values(html)[0]) {
await staff.type(
"textarea[name=mail_address]",
Object.values(html).join("\n"),
{
delay: 0,
}
);
console.log(pageNum);
pageNum++;
staff.click("input[name=save]", { delay: 0 });
}
}
})();
I used .type() method, and it works, but I need something faster.
.type() method will allow you to fill the input like human typing. Instead of using that, just try .keyboard.sendCharacter() method (source). It will allow you to fill in the input instantly without typing.
Example how to use it :
const puppeteer=require('puppeteer');
const browser=await puppeteer.launch({ headless: false });
const page=await browser.newPage()
await page.goto("https://stackoverflow.com/q/75395199/12715723")
let input=await page.$('input[name="q"]');
await input.click();
await page.keyboard.sendCharacter('test');
I'm not sure how the input works on the page since it requires auth, but in many cases (depending on how the site listens for changes on the element) you can set the value using browser JS:
staff.$eval(
'textarea[name="mail_address"]',
(el, value) => el.value = value,
evalObject.values(html).join("\n")
);
Runnable, minimal example:
const puppeteer = require("puppeteer"); // ^19.6.3
const html = `<!DOCTYPE html><html><body>
<textarea name="mail_address"></textarea></body></html>`;
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.setContent(html);
const sel = 'textarea[name="mail_address"]';
await page.$eval(
sel,
(el, value) => el.value = value,
"testing"
);
console.log(await page.$eval(sel, el => el.value)); // => testing
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
If this doesn't work, you can try triggering a change handler on the element with a trusted Puppeteer keyboard action or manually with an untrusted JS browser event. You can also use keyboard.sendCharacter when focus is on the element, as pointed out in this answer:
await page.focus(selector);
await page.keyboard.sendCharacter("foobar");
Note that in your original code, .setDefaultTimeout(0); is risky because it can cause the script to hang forever without raising an error. staff.click and page.authenticate need await since they return promises.
Related
I'm trying to create an application to search for my music on some sites that post illegal content so I can ask them to delete it later.
I am facing this problem in puppeteer, when I try to press enter on the search input I get this error: Error: Execution context was destroyed, most likely because of a navigation.
I have two files. One called urlScrapper.js with my script and an array with the names of my songs:
import InfringementFinder from './InfringementFinder.js';
const songs = ['Artist Name - Song Name', 'Artist Name - Song Name'];
const irscCodes = ['XXXXXXXXXX', XXXXXXXXXX];
InfringementFinder(songs, irscCodes).then(() => {
console.log('Search complete!');
}).catch((error) => {
console.error(error);
});
and InfringementFinder.js:
import puppeteer from 'puppeteer';
const InfringementFinder = async (songs, irscCodes) => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const mainPage = 'https://example.com/';
await page.goto(mainPage);
// This enter the search term in the input field
await page.type('.search-field', 'Artist Name - Song Name'); // this supposed to be my prop but someone doesnt work
// Trigger the search by submitting the form
const searchSubmit = await page.waitForSelector('.search-submit');
await searchSubmit.press('Enter');
// Wait for the search results to load
await page.waitForSelector('.g1-frame');
// This finds the first entry-content element containing the information
const entryContent = await page.$('.g1-frame');
if (!entryContent) return;
// This press on the element
await entryContent.press('Enter');
// Extract the relevant information
try {
const data = await page.evaluate(() => {
const trackElements = Array.from(document.querySelectorAll('li', 'ol', 'a', 'href', 'strong', 'span', 'p', 'div', 'class'))
.filter(el => el.innerText.includes('Artist Name - Song Name'));
const tracks = trackElements.map(trackElement => {
const trackName = trackElement.innerText.split(' – ')[0];
return { trackName };
});
const downloadLinks = Array.from(document.querySelectorAll('.dl-btn'))
.map(link => link.getAttribute('href'));
return { tracks, downloadLinks };
});
console.log('Data:', data);
} catch (error) {
console.error(error);
} finally {
await browser.close();
}
};
export default InfringementFinder;
It only works if I try to scrape a page where I know my music is posted and using a different code version but the idea is to search the whole website using the search input.
The logic is as follows: You click on the search input, type the name of the song, navigate to another page, click on your music, navigate to another page, and scrape the name of the songs and links to illegal downloads.
Your error is probably due to the following code:
await entryContent.press('Enter'); // triggers a nav
// Extract the relevant information immediately
// without waiting for nav to complete
try {
const data = await page.evaluate(() => {
I'd either wait for a nav here or wait for the selector on the next page you're about to access with evaluate.
Also,
document.querySelectorAll('li', 'ol', 'a', 'href', 'strong', 'span', 'p', 'div', 'class')
doesn't make sense: querySelectorAll only accepts one parameter, and 'class' isn't a name of an HTML element. It's a good idea to test this in the browser first, because it's plain JS.
I don't see 'Artist Name - Song Name' anywhere on the page.
This code:
await page.waitForSelector('.g1-frame');
// This finds the first entry-content element containing the information
const entryContent = await page.$('.g1-frame');
if (!entryContent) return;
could just be:
const entryContent = await page.waitForSelector('.g1-frame');
It's common to assume you need to navigate the site as the user would: go to the homepage, type in the search term, press Enter...
Better is to look at the query string of the search page and build your own, avoiding the extra nav and fuss of dealing with the DOM. Here's an example:
const puppeteer = require("puppeteer"); // ^19.6.3
const baseUrl = "<Your base URL, ending in .net>";
const searchTerm = "autechre";
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
const url = `${baseUrl}?s=${encodeURIComponent(searchTerm)}`;
await page.setJavaScriptEnabled(false);
await page.setRequestInterception(true);
page.on("request", request => {
if (request.resourceType() === "document") {
request.continue();
}
else {
request.abort();
}
});
await page.goto(url, {waitUntil: "domcontentloaded"});
await (await page.$(".g1-frame")).click();
const trackListEl = await page.waitForSelector(".entry-content > .greyf12");
const tracks = await trackListEl.$$eval("li", els => {
const fields = ["artist", "track"];
return els.map(e =>
Object.fromEntries(
e.textContent
.split(/ *– */)
.map((e, i) => [fields[i], e.trim()]),
)
);
});
const downloadLinks = await page.$$eval(".dl-btn", els =>
els.map(e => e.getAttribute("href"))
);
console.log({tracks, downloadLinks});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Note that we don't need to execute JS and we're blocking almost all resource requests, so we can speed up the scrape significantly by switching to fetch/axios and a simple HTML parser like Cheerio:
const cheerio = require("cheerio"); // 1.0.0-rc.12
const baseUrl = "<Your base URL, ending in .net>";
const searchTerm = "autechre";
const url = `${baseUrl}?s=${encodeURIComponent(searchTerm)}`;
const get = url =>
fetch(url) // Node 18 or install node-fetch, or use another library like axios
.then(res => {
if (!res.ok) {
throw Error(res.statusText);
}
return res.text();
});
get(url)
.then(html =>
get(cheerio.load(html)(".entry-title a").attr("href"))
)
.then(html => {
const $ = cheerio.load(html);
const tracks = [...$(".entry-content > .greyf12 li")].map(
e => {
const fields = ["artist", "track"];
return Object.fromEntries(
$(e)
.text()
.split(/ *– */)
.map((e, i) => [fields[i], e.trim()])
);
}
);
const downloadLinks = [...$(".dl-btn")].map(e =>
$(e).attr("href")
);
console.log({tracks, downloadLinks});
});
The code is simpler, and on my machine, twice as fast as Puppeteer.
I want to type into 2 inputs at the same time but in fact both texts go to the second input.
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://example.com");
await Promise.all([
page.type("#user", "user"),
page.type("#password", "password"),
]);
await browser.close();
})();
The second input looks like upsaesrsword
The behavior is intended.
Related issue on GitHub:
https://github.com/puppeteer/puppeteer/issues/1958
Alternative solution:
page.$eval(
'#user',
(handle, text) => {
handle.value = text;
handle.dispatchEvent(new Event('change', {bubbles}));
},
'user'
);
I am trying to use Puppeteer to scrape this element from eBay:
However, when I run my code, I get an error that says "Cannot read properties of null (reading 'textContent')". This is my code:
async function scrape() {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.ebay.com/sch/i.html?_from=R40&_nkw=Blastoise+%282%2F102%29+%5BCelebrations%3A+Classic+Collection%5D&_sacat=0&Graded=No&_dcat=183454&rt=nc&LH_Sold=1&LH_Complete=1');
await page.waitForSelector('.s-item');
let cards = await page.evaluate(() => {
let cardElement = document.body.querySelectorAll('.s-item')
let cards = Object.values(cardElement).map(x => {
return {
date: x.querySelector('.s-item__title--tagblock span.POSITIVE').textContent ? ? null
}
})
return cards
})
console.log(cards)
})()
How can I solve this?
The first item in the result list is sort of a dummy/template/placeholder node that has no data in it. You can use .slice(1) or .s-item:not(:first-child) to skip that first node.
While you're at it, you might as well make your waitForSelector more precise to match the element you're about to select (although this line isn't necessary, as I'll explain below), use domcontentloaded to speed up goto and use ?.textContent so you can see undefined rather than a crash, which is how I debugged the problem. .textContent ?? null doesn't work because the throw happens before ?? null has the chance to evaluate.
const puppeteer = require("puppeteer"); // ^14.1.1
let browser;
(async () => {
browser = await puppeteer.launch({headless: true});
const [page] = await browser.pages();
const url = "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Blastoise+%282%2F102%29+%5BCelebrations%3A+Classic+Collection%5D&_sacat=0&Graded=No&_dcat=183454&rt=nc&LH_Sold=1&LH_Complete=1";
await page.goto(url, {waitUntil: "domcontentloaded"});
await page.waitForSelector(".s-item .s-item__title--tagblock span.POSITIVE");
const cards = await page.evaluate(() =>
[...document.querySelectorAll(".s-item:not(:first-child)")].map(x => ({
date: x
.querySelector(".s-item__title--tagblock span.POSITIVE")
?.textContent
}))
);
console.log(cards);
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
Better yet, skip Puppeteer entirely and make an HTTP request with Axios or fetch, then use Cheerio to parse the static HTML, which already has the data you need:
const axios = require("axios");
const cheerio = require("cheerio");
const url = "https://www.ebay.com/sch/i.html?_from=R40&_nkw=Blastoise+%282%2F102%29+%5BCelebrations%3A+Classic+Collection%5D&_sacat=0&Graded=No&_dcat=183454&rt=nc&LH_Sold=1&LH_Complete=1";
axios.get(url).then(({data}) => {
const $ = cheerio.load(data);
const cards = [];
$(".s-item:not(:first-child)").each(function (i, e) {
$(this).find(".s-item__title--tagblock .POSITIVE").each(function (i, e) {
cards.push($(this).text().trim());
});
});
console.log(cards);
})
.catch(err => console.error(err))
;
I am trying to scrape the key features part of the website with the URL of: "https://www.alpinestars.com/products/stella-missile-v2-1-piece-suit-1" using puppeteer - however, whenever I try to use a selector that works on the chrome console for the website the output for my code is always an empty array or object. For example both document.querySelectorAll("#key\ features > p") and document.getElementById('key features') both return as empty arrays or objects when I output it through my code but work via chrome console.
I have attached my code below:
const puppeteer = require('puppeteer');
async function getDescripData(url) {
const browser = await puppeteer.launch({headless: true});
const page = await browser.newPage();
await page.goto(url);
const descripFeatures = await page.evaluate(() => {
const tds = Array.from(document.getElementById('key features'))
console.log(tds)
return tds.map(td => td.innerText)
});
console.log(descripFeatures)
await browser.close();
return {
features: descripFeatures
}
}
How should I go about overcoming this issue?
Thanks in advance!
Your problem is in Array.from you are passing a non-iterable object and return null.
This works for me:
const puppeteer = require('puppeteer');
const url = 'https://www.alpinestars.com/products/stella-missile-v2-1-piece-suit-1';
(async () => {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
args: ['--start-maximized'],
devtools: true
});
const page = (await browser.pages())[0];
await page.goto(url);
const descripFeatures = await page.evaluate(() => {
const tds = document.getElementById('key features').innerText;
return tds.split('• ');
});
console.log(descripFeatures)
await browser.close();
})();
My problem is that I need to set the comment selector to "all comments" whit puppeteer but the comments don't render after that puppeteer clicks on the correct button, "all the comments", the comment section just disappears, I will provide the code and a video of the browser in action.
const $ = require('cheerio');
const puppeteer = require('puppeteer');
const url = 'https://www.facebook.com/pg/SamsungGlobal/posts/';
const main = async () => {
const browser = await puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
const page = await browser.newPage();
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 0
});
page.mouse.click(50, 540, {});
for (var a = 0; a < 18; a++) {
setTimeout(() => {}, 16);
await page.keyboard.press('ArrowDown');
}
let bodyHTML = await page.evaluate(() => document.body.innerHTML);
var id = "#" + $("._427x ._4-u2.mbm._4mrt", bodyHTML).attr('id'); // selects id of first post
try {
var exp = await page.$(`${id} a._21q1`); // clicks on "most relevant" from the first post
await exp.evaluate(exp => exp.click());
await page.click('div[data-ordering="RANKED_UNFILTERED"]'); // selects "all the comments"
var exp = await page.$(`${id} a._42ft`); // should click on "more comments" but it doesn't load
await exp.evaluate(exp => exp.click());
await page.waitForSelector(`${id} a._5v47.fss`); // wait for the "others" in facebook comments
var exp = await page.$$(`${id} a._5v47.fss`);
await exp.evaluate(exp => exp.click());
await page.screenshot({
path: "./srn4.png"
});
// var post = await page.$eval(id + " .userContentWrapper", el => el.innerHTML);
// console.log("that's the post " + post);
} catch (e) {
console.log(e);
}
setTimeout(async function() {
await browser.close(); //close after some time
}, 1500);
};
main();
That's the video of the full execution process: https://youtu.be/jXpSOBfVskg
That's a slow motion of the moment it click on the menu: https://youtu.be/1OgfFNokxsA
You can try a variant with selectors:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ headless: false });
const [page] = await browser.pages();
await page.goto('https://www.facebook.com/pg/SamsungGlobal/posts/');
await page.waitForSelector('[data-ordering="RANKED_THREADED"]');
await page.click('[data-ordering="RANKED_THREADED"]');
await page.waitForSelector('[data-ordering="RANKED_UNFILTERED"]');
await page.click('[data-ordering="RANKED_UNFILTERED"]');
} catch (err) {
console.error(err);
}
})();
page.mouse.click(50, 540, {});
This is not going to work necessarily. What are you trying to click? You need to use CSS selectors to find elements that you want to click.
Also, dynamic elements might not appear in the page right away. You should use waitForSelector as needed.