How to take screenshots for multiple pages automatically using Puppeteer - javascript

I have a list of URLs in the text file. I want to get the screenshot for each page automatically by reading each URL form the text file using Puppeteer.
const puppeteer = require('puppeteer');
async function doScreenCapture(url, site_name) {
let browser = await puppeteer.launch({ headless: false });
let page = await browser.newPage();
await page.goto(url);
await page.setViewport({width: 1382, height: 717})
await page.waitFor(1000);
console.log('do screen capture running');
await page.screenshot({ path:`${site_name}.png`, fullPage: true });
await page.close();
await browser.close();
}
async function run() {
console.log('running');
var fs = require("fs");
var text = fs.readFileSync("linksList.txt").toString().split("\n");
for (var i = 0; i < text.length; ++i) {
doScreenCapture(text[i], "image"+i)
console.log("image"+i+" completed");
await page.waitFor(5000);
}
}
run();
This is the error I got while running the code
(node:77868) UnhandledPromiseRejectionWarning: ReferenceError: page is not defined
at run
at Object.
at Module._compile (module.js:653:30)
at Object.Module._extensions..js (module.js:664:10)
at Module.load (module.js:566:32)
at tryModuleLoad (module.js:506:12)
at Function.Module._load (module.js:498:3)
at Function.Module.runMain (module.js:694:10)
at startup (bootstrap_node.js:204:16)
at bootstrap_node.js:625:3
(node:77868) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:77868) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
do screen capture running.

I see two issues here:
page does not exist in the "for" loop, only in the "run" function and that gives you the exception. You can move waiting to the function
"doScreenCapture" is an async function and you should use await doScreenCapture in order for pages to be open in succession, not at once.

Related

Troubles to show the browser using puppeteer from WSL2

i have troubles when i try to execute my script using puppeteer with the "headless" option set up in false to show the browser
Im Working with WSL2, here is my script
const puppeteer = require('puppeteer');
(async () => {
console.log('Start Scraping...');
const browser = await puppeteer.launch({ headless: false }); <-- Here's the problem
const page = await browser.newPage();
await page.goto('https://es.wikipedia.org/wiki/Node.js');
await browser.close();
console.log('End Scraping...');
})()
This is the output when i try to run it
Start Scraping...
(node:3057) UnhandledPromiseRejectionWarning: Error: Failed to launch the browser process!
[3068:3068:0429/043013.248542:ERROR:browser_main_loop.cc(1393)] Unable to open X display.
TROUBLESHOOTING: https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md
at onClose (/home/nilsonkr/courses/nodejs/foundaments/node_modules/puppeteer/lib/cjs/puppeteer/node/BrowserRunner.js:194:20)
at ChildProcess.<anonymous> (/home/nilsonkr/courses/nodejs/foundaments/node_modules/puppeteer/lib/cjs/puppeteer/node/BrowserRunner.js:185:79)
at ChildProcess.emit (events.js:327:22)
at Process.ChildProcess._handle.onexit (internal/child_process.js:277:12)
(Use `node --trace-warnings ...` to show where the warning was created)
(node:3057) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:3057) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
When i set that value to true or just don't write it , it works, but i am still wondering what is the reason and how can i fix it ? to show the browser
i have already tried adding these flags but it didn't work either
const browser = await puppeteer.launch({headless: false,args: ['--no-sandbox', '--disable-setuid-sandbox']});

Trouble with uploading files to firebase storage

I'm learning about puppeteer and firebase at the moment. What I am trying to do is create a pdf of a web page and upload to firebase storage. This is my code.
const puppeteer = require('puppeteer');
const fs = require('fs').promises;
const firebase = require('firebase');
require("firebase/storage");
const url = process.argv[2];
if (!url) {
throw "Please provide URL as a first argument";
}
var firebaseConfig = {
#Firebase Config Goes here
};
// Initialize Firebase
firebase.initializeApp(firebaseConfig);
#Function to generate PDF file
async function run () {
const browser = await puppeteer.launch();
const page = await browser.newPage();
//await page.goto(url);
await page.goto(url, {waitUntil: 'domcontentloaded', timeout: 60000} );
//await page.pdf({ path: 'api.pdf', format: 'A4' })
const myPdf = await page.pdf();
await browser.close()
return myPdf;
}
const myOutput = run();
#Upload to Firebase based on the instruction here https://firebase.google.com/docs/storage/web/upload-files
var storageRef = firebase.storage().ref();
// Create a reference to 'mountains.jpg'
storageRef.child(myOutput).put(myOutput)
However, I'm running into this error when executing my code
$ node screenshot.js https://en.wikipedia.org/wiki/Aung_San_Suu_Kyi
C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:1040
.split('/')
^
TypeError: childPath.split is not a function
at child (\Test\node_modules\#firebase\storage\dist\index.cjs.js:1040:10)
at getChild (\Test\node_modules\#firebase\storage\dist\index.cjs.js:2610:19)
at ReferenceCompat.child (Test\node_modules\#firebase\storage\dist\index.cjs.js:2833:25)
at Object.<anonymous> (C:\Users\ppham\NucampFolder\Test\screenshot.js:55:12)
at Module._compile (internal/modules/cjs/loader.js:936:30)
at Object.Module._extensions..js (internal/modules/cjs/loader.js:947:10)
at Module.load (internal/modules/cjs/loader.js:790:32)
at Function.Module._load (internal/modules/cjs/loader.js:703:12)
at Function.Module.runMain (internal/modules/cjs/loader.js:999:10)
at internal/main/run_main_module.js:17:11
It is saying that childPath.split is not a function. I'm pretty confused at this since I've already installed all the firebase packages. I've been searching for this error for a while but no luck so far. Anyone knows how to resolve this issue?
===============================================================================
Edit #1: As Frank pointed out below, I have to change storageRef.child(myOutput).put(myOutput) to something like storageRef.child("filename.pdf").put(myOutput). It runs but I'm running into this error afterwards.
$ node screenshot.js https://google.com
Promise { <pending> }
(node:20636) UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'byteLength' of undefined
at C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:833:40
at Array.forEach (<anonymous>)
at Function.FbsBlob.getBlob (C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:832:25)
at multipartUpload (C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:1519:24)
at C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:2003:31
at C:\Users\ppham\NucampFolder\Test\node_modules\#firebase\storage\dist\index.cjs.js:1900:21
at processTicksAndRejections (internal/process/task_queues.js:85:5)
(node:20636) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:20636) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
This looks to imply that myOutput doesn't contain anything. I thought that I have return the pdf in the run function and it will be passed over to the upload function? I've been reading the Puppeteer documentation and couldn't find any reason why this wouldn't work. Anyone knows why this is not valid?
The error message is saying that the value you're passing to child() is not a string.
Since you're calling:
storageRef.child(myOutput).put(myOutput)
That kind'of makes sense. My guess is that you want to pass a file name into child(), like:
storageRef.child("filename.pdf").put(myOutput)

Javascript is losing a backtrace in the catch block

The following code sample:
async function expectResolutionErrorCode(object) {
try {
await Promise.resolve(1);
object.f1() // caught error
} catch (error) {
object.f2() // uncaught error
}
}
expectResolutionErrorCode(undefined).catch(err => console.log(err));
Produces the following backtrace when run using node v14:
$ node test.ts
TypeError: Cannot read property 'f2' of undefined
at expectResolutionErrorCode (/Users/bogdan/makabu/unstoppable/resolution/test.ts:6:12)
If I comment out the await statement at line 3 from the sample, the backtrace is complete:
TypeError: Cannot read property 'f2' of undefined
at expectResolutionErrorCode (/Users/bogdan/makabu/unstoppable/resolution/test.ts:6:12)
at Object.<anonymous> (/Users/bogdan/makabu/unstoppable/resolution/test.ts:10:1)
at Module._compile (internal/modules/cjs/loader.js:1201:30)
at Object.Module._extensions..js (internal/modules/cjs/loader.js:1221:10)
at Module.load (internal/modules/cjs/loader.js:1050:32)
at Function.Module._load (internal/modules/cjs/loader.js:938:14)
at Function.executeUserEntryPoint [as runMain] (internal/modules/run_main.js:71:12)
at internal/main/run_main_module.js:17:47
So the await statement clearly causes node to lose some part of the backtrace.
Why does it behave like that?
Is there any workaround to maintain the backtrace while still using await statements inside try catch?
I tested this behavior in Chrome and Safari and the stacktrace is lost in almost the same way: https://jsfiddle.net/61kdv8fx/
It's because the stack has unwound. Remember that await is syntactic sugar for hooking the settlement of a promise. So when you reach the await in your code, the function returns. Later, when the code continues because the promise was settled, the stack is shallow because promise reactions are called directly.
JavaScript tool makers are very aware of this and working on async call stacks to address it.
However, I should note that when I try to replicate what you're seeing on Node v14, I don't see it, because V8 already has async call stacks. Example:
async function example() {
try {
await Promise.resolve(1); // Could just be `await 1;`, but I wanted to match the question
throw new Error("boom1");
} catch (e) {
throw new Error("boom2");
}
}
async function a() {
await b();
}
async function b() {
await c()
}
async function c() {
await example();
}
a();
When I run it:
$ node example.js
(node:5342) UnhandledPromiseRejectionWarning: Error: boom2
at example (/path/to/example.js:6:15)
at async c (/path/to/example.js:19:5)
at async b (/path/to/example.js:15:5)
at async a (/path/to/example.js:11:5)
(Use `node --trace-warnings ...` to show where the warning was created)
(node:5342) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:5342) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
I get the same sort of output on modern versions of Chrome and other browsers using a recent V8. But Firefox v78 still loses the context; I'm sure the SpiderMonkey team are working on it.

Why does not work Puppeteer-firefox on servers?

I'm trying to run on Windows Server 2008 and 2016
Are there any solutions to launch it? I searched a lot but found nothing.
I get an error:
C:\Users\Administrator\Desktop\_t>node app.js
ERROR: The process "1212" not found.
(node:4276) UnhandledPromiseRejectionWarning: Error: Failed to launch Firefox!
at onClose (C:\Users\Administrator\Desktop\_t\node_modules\puppeteer-firefox
\lib\Launcher.js:263:14)
at Interface.helper.addEventListener (C:\Users\Administrator\Desktop\_t\node
_modules\puppeteer-firefox\lib\Launcher.js:252:50)
at Interface.emit (events.js:187:15)
at Interface.close (readline.js:379:8)
at Socket.onend (readline.js:157:10)
at Socket.emit (events.js:187:15)
at endReadableNT (_stream_readable.js:1094:12)
at process._tickCallback (internal/process/next_tick.js:63:19)
-- ASYNC --
at Puppeteer.<anonymous> (C:\Users\Administrator\Desktop\_t\node_modules\pup
peteer-firefox\lib\helper.js:31:27)
at C:\Users\Administrator\Desktop\_t\a.js:253:21
at afterLogin (C:\Users\Administrator\Desktop\_t\a.js:277:4)
at Object.start (C:\Users\Administrator\Desktop\_t\a.js:101:2)
at Object.<anonymous> (C:\Users\Administrator\Desktop\_t\app.js:156:7)
at Module._compile (internal/modules/cjs/loader.js:689:30)
at Object.Module._extensions..js (internal/modules/cjs/loader.js:700:10)
at Module.load (internal/modules/cjs/loader.js:599:32)
at tryModuleLoad (internal/modules/cjs/loader.js:538:12)
at Function.Module._load (internal/modules/cjs/loader.js:530:3)
at Function.Module.runMain (internal/modules/cjs/loader.js:742:12)
at startup (internal/bootstrap/node.js:282:19)
at bootstrapNodeJSCore (internal/bootstrap/node.js:743:3)
(node:4276) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This
error originated either by throwing inside of an async function without a catch
block, or by rejecting a promise which was not handled with .catch(). (rejection
id: 1)
(node:4276) [DEP0018] DeprecationWarning: Unhandled promise rejections are depre
cated. In the future, promise rejections that are not handled will terminate the
Node.js process with a non-zero exit code.
On Ubuntu server:
(node:7869) UnhandledPromiseRejectionWarning: Error: Failed to launch Firefox!
at onClose (/root/node_modules/puppeteer-firefox/lib/Launcher.js:263:14)
at Interface.helper.addEventListener (/root/node_modules/puppeteer-firefox/lib/Launcher.js:252:50)
at Interface.emit (events.js:194:15)
at Interface.close (readline.js:379:8)
at Socket.onend (readline.js:157:10)
at Socket.emit (events.js:194:15)
at endReadableNT (_stream_readable.js:1103:12)
at process._tickCallback (internal/process/next_tick.js:63:19)
-- ASYNC --
at Puppeteer.<anonymous> (/root/node_modules/puppeteer-firefox/lib/helper.js:31:27)
at /root/a.js:255:21
at afterLogin (/root/a.js:269:4)
at Object.start (/root/a.js:102:2)
at Object.<anonymous> (/root/app.js:156:7)
at Module._compile (internal/modules/cjs/loader.js:689:30)
at Object.Module._extensions..js (internal/modules/cjs/loader.js:700:10)
at Module.load (internal/modules/cjs/loader.js:599:32)
at tryModuleLoad (internal/modules/cjs/loader.js:538:12)
at Function.Module._load (internal/modules/cjs/loader.js:530:3)
at Function.Module.runMain (internal/modules/cjs/loader.js:742:12)
at startup (internal/bootstrap/node.js:283:19)
at bootstrapNodeJSCore (internal/bootstrap/node.js:743:3)
(node:7869) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:7869) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.
Does it work on at least some server?
Everything works fine on my Desktop computer.
Requires a version for working with the Firefox
Puppeteer-firefox
I had the same problem (failed to Launch Firefox) on my VPS Windows.
So i went to node_modules/puppeteer-firefox/.local-browser/.../firefox.exe and i tried to open it. I had the 0xc00007b error when i tried to open the exe file.
Download "All in One Runtimes" for Windows and start the installation of all softwares. I maked this and this works for me.
Try it and tell me !
Edit: (sorry for the bad english)
Please don't use the puppeteer-firefox package anymore. It has been deprecated.
Experimental support for Firefox is included in the official puppeteer package now.
Here an example from the Puppeteer repository in how to run your script against Firefox:
(async () => {
const browser = await puppeteer.launch(firefoxOptions);
const page = await browser.newPage();
console.log(await browser.version());
await page.goto('https://news.ycombinator.com/');
// Extract articles from the page.
const resultsSelector = '.storylink';
const links = await page.evaluate((resultsSelector) => {
const anchors = Array.from(document.querySelectorAll(resultsSelector));
return anchors.map((anchor) => {
const title = anchor.textContent.trim();
return `${title} - ${anchor.href}`;
});
}, resultsSelector);
console.log(links.join('\n'));
await browser.close();
})();
If there is still a problem in launching Firefox, then please let us know and we can get it fixed.

Understanding why Puppeteer wont load a website

I have some simple code
const puppeteer = require('puppeteer');
async function getPic() {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('https://www.paniniamerica.net');
await page.screenshot({path: 'panini.png'});
await browser.close();
}
getPic();
I noticed that this specific website just times out. Other websites like google.com or even stackoverflow.com work fine. Is this website blocking this type of behavior?
This is the error it spits out.
(node:63995) UnhandledPromiseRejectionWarning: Error: Navigation failed because browser has disconnected!
at CDPSession.LifecycleWatcher._eventListeners.helper.addEventListener (/Users//Desktop/test/node_modules/puppeteer/lib/LifecycleWatcher.js:46:107)
at CDPSession.emit (events.js:182:13)
at CDPSession._onClosed (/Users//Desktop/test/node_modules/puppeteer/lib/Connection.js:215:10)
at Connection._onMessage (/Users//Desktop/test/node_modules/puppeteer/lib/Connection.js:105:17)
at WebSocketTransport._ws.addEventListener.event (/Users//Desktop/test/node_modules/puppeteer/lib/WebSocketTransport.js:44:24)
at WebSocket.onMessage (/Users//Desktop/test/node_modules/ws/lib/event-target.js:120:16)
at WebSocket.emit (events.js:182:13)
at Receiver.receiverOnMessage (/Users//Desktop/test/node_modules/ws/lib/websocket.js:789:20)
at Receiver.emit (events.js:182:13)
at Receiver.dataMessage (/Users//Desktop/test/node_modules/ws/lib/receiver.js:422:14)
-- ASYNC --
at Frame.<anonymous> (/Users//Desktop/test/node_modules/puppeteer/lib/helper.js:111:15)
at Page.goto (/Users//Desktop/test/node_modules/puppeteer/lib/Page.js:674:49)
at Page.<anonymous> (/Users//Desktop/test/node_modules/puppeteer/lib/helper.js:112:23)
at getPic (/Users//Desktop/test/test.js:6:14)
at process._tickCallback (internal/process/next_tick.js:68:7)
(node:63995) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). (rejection id: 1)
(node:63995) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.

Categories