browser started with puppeteer is not available out of try-catch block - javascript

Here is the example code:
"use strict";
const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch();
console.log(`browser=${browser}`);
var cnt_pages = (await browser.pages()).length;
console.log(`${cnt_pages} pages`);
} catch (error) {
console.error(error);
console.error(`can not launch`);
process.exit();
}
console.log(`browser=${browser}`);
var cnt_pages = (await browser.pages()).length;
console.log(`cnt_pages ${cnt_pages}`);
input("continue?");
})();
As a result, I get
(node:13408) UnhandledPromiseRejectionWarning: ReferenceError: browser is not defined
at S:\!kyxa\!code\play_chrome_cdp\nodejs_1\!node_tutorial\!play_async\try_catch_browser.js:15:26
at processTicksAndRejections (internal/process/task_queues.js:93:5)
at emitUnhandledRejectionWarning (internal/process/promises.js:168:15)
at processPromiseRejections (internal/process/promises.js:247:11)
at processTicksAndRejections (internal/process/task_queues.js:94:32)
(node:13408) ReferenceError: browser is not defined
at S:\!kyxa\!code\play_chrome_cdp\nodejs_1\!node_tutorial\!play_async\try_catch_browser.js:15:26
at processTicksAndRejections (internal/process/task_queues.js:93:5)
(node:13408) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
at emitDeprecationWarning (internal/process/promises.js:180:11)
at processPromiseRejections (internal/process/promises.js:249:13)
at processTicksAndRejections (internal/process/task_queues.js:94:32)
browser=[object Object]
1 pages
As I see, the browser is available and working in the try block. But after the try-catch block it is not available.
Explain me please what happens?

I've explored the issue. I define the browser value in the try but I also use it in the catch. consts are block-scoped, so they are tied to the block. –
This is the working code:
"use strict";
const puppeteer = require("puppeteer");
(async () => {
var browser = null;
try {
browser = await puppeteer.launch();
console.log(`browser=${browser}`);
var cnt_pages = (await browser.pages()).length;
console.log(`${cnt_pages} pages`);
} catch (error) {
console.error(error);
console.error(`can not launch`);
process.exit();
}
console.log(`browser=${browser}`);
var cnt_pages = (await browser.pages()).length;
console.log(`cnt_pages ${cnt_pages}`);
})();

You can elevate let browser out of the block and remove the const, but even after fixing this scoping issue, the browser resource still isn't closed, and any errors that might occur after the try/catch blocks are uncaught. Here's my preferred Puppeteer boilerplate that handles these situations:
const puppeteer = require("puppeteer");
const scrape = async page => {
// write your code here
const url = "https://www.example.com";
await page.goto(url, {waitUntil: "domcontentloaded"});
console.log(await page.title());
};
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await scrape(page);
})()
.catch(err => console.error(err))
.finally(() => browser?.close());

Related

Node.js Puppeteer - 'Error: Evaluation failed: Error: Cannot focus non-HTMLElement' with YouTube Search input

I am trying to type into YouTube's search input using Puppeteer.
Code as follows:
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://youtube.com');
await page.type('#search','a');
...
Here is the error I get:
throw new Error('Evaluation failed: ' + (0, util_js_1.getExceptionMessage)(exceptionDetails));
^
Error: Evaluation failed: Error: Cannot focus non-HTMLElement
at pptr://__puppeteer_evaluation_script__:3:23
at ExecutionContext._ExecutionContext_evaluate (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:286:15)
at processTicksAndRejections (node:internal/process/task_queues:96:5)
at async ExecutionContext.evaluate (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/ExecutionContext.js:117:16)
at async ElementHandle.evaluate (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/JSHandle.js:105:16)
at async ElementHandle.focus (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/ElementHandle.js:486:9)
at async ElementHandle.type (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/ElementHandle.js:516:9)
at async DOMWorld.type (/Users/benjaminrubin/node_modules/puppeteer/lib/cjs/puppeteer/common/DOMWorld.js:449:9)
at async /Users/benjaminrubin/Documents/Software Dev Education/Scraping with Node JS/youtubeScrape.js:60:9
I could not figure out what exactly is wrong. Several examples across the web use the exact same format. What exactly does 'Cannot focus non-HTMLElement' mean?
This is a tricky one. Google sites are notorious for breaching the "one id on a page" rule, so there's actually two elements with the id search:
<ytd-searchbox id="search"> <!-- the one you are actually selecting -->
... bunch of nodes ...
<input id="search"> <!-- the one you think you're selecting -->
await page.type('#search','a'); types into ytd-searchbox, which isn't a standard HTML element, so Puppeteer fails with the Error: Cannot focus non-HTMLElement error.
The fix is to use input#search instead:
const puppeteer = require("puppeteer"); // ^19.1.0
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto("https://youtube.com", {waitUntil: "domcontentloaded"});
await page.type("input#search", "hello world");
await page.screenshot({path: "youtube.png"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());
Although the above solution may work, this is a good example of where simply encoding your search as a URL parameter and navigating directly to the results page is easier and more efficient:
const puppeteer = require("puppeteer");
let browser;
(async () => {
browser = await puppeteer.launch();
const [page] = await browser.pages();
const q = encodeURIComponent("your search here");
const url = `https://www.youtube.com/results?search_query=${q}`;
await page.goto(url, {waitUntil: "networkidle2"});
await page.screenshot({path: "youtube.png"});
})()
.catch(err => console.error(err))
.finally(() => browser?.close());

cannot read properties of undefined 'textContent'

I have a simple piece of code
describe('My First Puppeeteer Test', () => {
it('Should launch the browser', async function() {
const browser = await puppeteer.launch({ headless: false})
const page = await browser.newPage()
await page.goto('https://github.com/login')
await page.type('#login_field', testLogin)
await page.type('#password', testPassword)
await page.click('[name="commit"]')
await page.waitForNavigation()
let [element] = await page.$x('//h3[#class="text-normal"]')
let helloText = await page.evaluate(element => element.textContent, element);
console.log(helloText);
browser.close();
})
})
Everything worked before but today I get an error + my stacktrace:
Error: Evaluation failed: TypeError: Cannot read properties of undefined (reading 'textContent')
at puppeteer_evaluation_script:1:21
at ExecutionContext._evaluateInternal (node_modules\puppeteer\lib\cjs\puppeteer\common\ExecutionContext.js:221:19)
at processTicksAndRejections (node:internal/process/task_queues:96:5)
at async ExecutionContext.evaluate (node_modules\puppeteer\lib\cjs\puppeteer\common\ExecutionContext.js:110:16)
at async Context. (tests\example.tests.js:16:22)
How I can resolve this?
Kind regards
While I haven't tested the code due to the login and I assume your selectors are correct, the main problem is almost certainly that
await page.click('[name="commit"]')
await page.waitForNavigation()
creates a race condition. The docs clarify:
Bear in mind that if click() triggers a navigation event and there's a separate page.waitForNavigation() promise to be resolved, you may end up with a race condition that yields unexpected results. The correct pattern for click and wait for navigation is the following:
const [response] = await Promise.all([
page.waitForNavigation(waitOptions),
page.click(selector, clickOptions),
]);
As a side point, it's probably better to do waitForXPath rather than $x, although this seems less likely the root problem. Don't forget to await all promises such as browser.close().
const puppeteer = require("puppeteer");
let browser;
(async () => {
browser = await puppeteer.launch({headless: true});
const [page] = await browser.pages();
await page.goto('https://github.com/login');
await page.type('#login_field', testLogin);
await page.type('#password', testPassword);
// vvvvvvvvvvv
await Promise.all([
page.click('[name="commit"]'),
page.waitForNavigation(),
]);
const el = await page.waitForXPath('//h3[#class="text-normal"]');
// ^^^^^^^^^^^^
//const el = await page.waitForSelector("h3.text-normal"); // ..or
const text = await el.evaluate(el => el.textContent);
console.log(text);
//await browser.close();
//^^^^^ missing await, or use finally as below
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
Additionally, if you're using Jest, once you get things working, you might want to move the browser and page management to beforeEach/afterEach or beforeAll/afterAll blocks. It's faster to use the same browser instance for all test cases, and pages can be opened and closed before/after each case.

playwright Error: UnhandledPromiseRejectionWarning: TimeoutError: waiting for firstPage Proxy failed: timeout 30000ms exceeded

const playwright = require("playwright");
(async () => {
for (const browserType of ["chromium", "firefox", "webkit"]) {
const browser = await playwright[browserType].launch();
const context = await browser.newContext();
const page = await context.newPage('https://google.com/');
await page.screenshot({ path: `example-${browserType}.png` });
}
})();
I have tried to run this snippet. It takes a screenshot from chromium and firefox as expected. but it throws a timeout error when it tries to take a screenshot from webkit. although, local-webkit is installed with playwright.

Puppeteer: Simpler way to handle pages created on clicking a[target="_blank"]; wait for loading and include timeouts

Overview
I'm looking for a simpler way to handle clicking on links which open new pages (like target="_blank" anchor tags).
Here handle means:
get the new page object
wait for the new tab to load (with timeout)
Steps to reproduce
Tell us about your environment:
Puppeteer version: ^1.11.0
Platform / OS version: 64-bit, win 10 pro
URLs (if applicable): none
Node.js version: v10.15.0
I've looked at related issues:
https://github.com/GoogleChrome/puppeteer/issues/386
https://github.com/GoogleChrome/puppeteer/issues/3535
https://github.com/GoogleChrome/puppeteer/issues/978
and more
What steps will reproduce the problem?
I've included the code snippet below
I'm trying to:
Get the object for the new page when clicking on a link opens a new tab. (The links are dynamically generated, capturing href might not be the most elegant way)
Wait till the new page loads (with timeout). I'd like it if you can use page.waitForNavigation for consistency
close the tab and return the earlier tab to continue further operations
Please include code that reproduces the issue.
// as referenced here on #386 : https://github.com/GoogleChrome/puppeteer/issues/386#issuecomment-425109457
const getNewPageWhenLoaded = async () => {
return new Promise(x =>
global.browser.on('targetcreated', async target => {
if (target.type() === 'page') {
const newPage = await target.page();
const newPagePromise = new Promise(y =>
newPage.once('domcontentloaded', () => y(newPage))
);
const isPageLoaded = await newPage.evaluate(
() => document.readyState
);
return isPageLoaded.match('complete|interactive')
? x(newPage)
: x(newPagePromise);
}
})
);
};
const newPagePromise = getNewPageWhenLoaded();
await page.click('my-link'); // or just do await page.evaluate(() => window.open('https://www.example.com/'));
const newPage = await newPagePromise;
What is the expected result?
An easier and consistent way to handle new tabs
What happens instead?
The developer has to write what looks like plumbing (internal/ low level) commands.
Usage of waitForTarget might simplify this, but I've not been able to get the predicate to return the right types. Here's my non-functional code
private async getNewPageWhenLoaded() {
const newTarget = await this._browser.waitForTarget(async (target) => {
const newPage = await target.page();
await newPage.waitForNavigation(this._optionsNavigation);
// const newPagePromise = new Promise(() => newPage.once('load', () => x(newPage)));
return await newPage.evaluate("true");
});
return await newTarget.page();
}
// elsewhere in the code
const newPagePromise = this.getNewPageWhenLoaded();
await resultItem.element.click();
const newPage = <Page>await newPagePromise;
//I get the following error
DevTools listening on ws://127.0.0.1:31984/devtools/browser/bf86648d-d52d-42d8-a392-629bf96211d4
(node:5564) UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
at CDPSession.LifecycleWatcher._eventListeners.helper.addEventListener (<path-to-my-project>\node_modules\puppeteer\lib\FrameManager.js:1181:107)
at CDPSession.emit (events.js:182:13)
at CDPSession._onClosed (<path-to-my-project>\node_modules\puppeteer\lib\Connection.js:231:10)
at Connection._onMessage (<path-to-my-project>\node_modules\puppeteer\lib\Connection.js:103:19)
at WebSocketTransport._ws.addEventListener.event (<path-to-my-project>\node_modules\puppeteer\lib\WebSocketTransport.js:41:24)
at WebSocket.onMessage (<path-to-my-project>\node_modules\ws\lib\event-target.js:120:16)
at WebSocket.emit (events.js:182:13)
at Receiver.receiverOnMessage (<path-to-my-project>\node_modules\ws\lib\websocket.js:741:20)
at Receiver.emit (events.js:182:13)
at Receiver.dataMessage (<path-to-my-project>\node_modules\ws\lib\receiver.js:417:14)
-- ASYNC --
at Frame.<anonymous> (<path-to-my-project>\node_modules\puppeteer\lib\helper.js:144:27)
at Page.waitForNavigation (<path-to-my-project>\node_modules\puppeteer\lib\Page.js:644:49)
at Page.<anonymous> (<path-to-my-project>\node_modules\puppeteer\lib\helper.js:145:23)
at newTarget._browser.waitForTarget (<path-to-my-project>\pageObjects\MyPage.js:104:27)
at process._tickCallback (internal/process/next_tick.js:68:7)
(node:5564) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:5564) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
(node:5564) UnhandledPromiseRejectionWarning: TimeoutError: Navigation Timeout Exceeded: 300000ms exceeded
at Promise.then (<path-to-my-project>\node_modules\puppeteer\lib\FrameManager.js:1276:21)
-- ASYNC --
at Frame.<anonymous> (<path-to-my-project>\node_modules\puppeteer\lib\helper.js:144:27)
at Page.waitForNavigation (<path-to-my-project>\node_modules\puppeteer\lib\Page.js:644:49)
at Page.<anonymous> (<path-to-my-project>\node_modules\puppeteer\lib\helper.js:145:23)
at newTarget._browser.waitForTarget (<path-to-my-project>\pageObjects\MyPage.js:104:27)
at process._tickCallback (internal/process/next_tick.js:68:7)
(node:5564) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 2)
Note: issue I've created on github: https://github.com/GoogleChrome/puppeteer/issues/3718
first run the click function first of all and remove "global" inside the promise and declaring browser as constant outside the promise
const browser = await puppeteer.launch();
await page.click('my-link');
const getNewPageWhenLoaded = async () => {
return new Promise(x =>
browser.on('targetcreated', async target => {
if (target.type() === 'page') {
const newPage = await target.page();
const newPagePromise = new Promise(y =>
newPage.once('domcontentloaded', () => y(newPage))
);
const isPageLoaded = await newPage.evaluate(
() => document.readyState
);
return isPageLoaded.match('complete|interactive')
? x(newPage)
: x(newPagePromise);
}
})
);
};
const newPagePromise = getNewPageWhenLoaded();
const newPage = await newPagePromise;

Puppeteer Unhandled Rejected at: Promise - Why exception?

I am new to both Puppeteer and JavaScript. I am trying to automate some simple tasks, only that the elements appear within iframes - but I have resolved this. What I am unclear about is the exception thrown when I uncomment await browser.close().
My code:
const baseUrl = "https://test-environment.com/ABH2829.html?token=dhdj7s8383937hndkeie8j3jebd";
const puppeteer = require('puppeteer');
const expect = require('chai').expect;
const clickClothingButton = async () => {
try {
const browser = await puppeteer.launch({
headless: false,
slowMo: 250,
});
const page = await browser.newPage();
await page.setViewport({width: 1280, height: 800});
process.on('unhandledRejection', (reason, p) => {
console.error('Unhandled Rejected at: Promise', p, 'reason:', reason);
browser.close();
});
await page.goto(baseUrl, {waitUntil: 'networkidle2'});
const navigationPromise = page.waitForNavigation({timeout: 3000});
await page.waitForSelector('.widget-title');
const frame = page.frames().find(frame => frame.name() === 'iframe');
const clothingButton = await frame.$('#clothing-button');
clothingButton.click();
await navigationPromise;
await browser.close();
} catch (error) {
console.log(error);
throw new Error(error);
}
};
clickClothingButton();
Now this runs fine, but I always get the following:
Unhandled Rejected at: Promise Promise {
<rejected> Error: TimeoutError: Navigation Timeout Exceeded: 3000ms exceeded
If I try to just:
await browser.close();
Then it barfs with:
Unhandled Rejected at: Promise Promise {
<rejected> { Error: Protocol error (Runtime.callFunctionOn): Target closed.
What's the best way of handling this gracefully, and why can't I just close the browser? Bear in mind I'm still learning about Promises and the contracts that must be fulfilled for them.
First of all, the site you are appearing to access requires authentication.
You can use page.authenticate() to provide credentials for the HTTP authentication:
await page.authenticate({
username: 'username',
password: 'password',
});
Additionally, the timeout you set for page.waitForNavigation() is only 3000 ms (3 seconds), while the default is 30000 ms (30 seconds), so if it takes longer than the set amount of time to load the page, you are going to receive a TimeoutError.
I would strongly recommend allowing at least the default maximum navigation time of 30 seconds for the navigation to occur. You can use the timeout option in page.waitForNavigation() or use page.setDefaultNavigationTimeout().
Lastly, elementHandle.click() returns a Promise, so you need to await clothingButton.click():
await clothingButton.click();

Categories