await and async switching the order of the functions - javascript

I just got a problem with my server NodeJS. I think it has to do with await/async
What I want to do is making a system that use Socket to create Email (with nodemailer) from PDF (with puppeteer) and then sending them to an Email adresse.
So I have to create the PDF before sending it by Email.
This is my Code:
async function CreatePDF(){ // Fonction cree
console.log('Trying to make the PDF');
try{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setContent('<body><style>' + HeadHTML + '</style>'+ PageHTML + '</body>');
await page.emulateMedia('screen');
await page.pdf({
path: path.join(__dirname, 'Nouveau dossier', NumberOfTheFile + '.pdf'),
format: "A4",
printBackground: true
});
console.log('PDF Created!');
SendEmail(); //Doesn't use async or await
} catch (e)
{
console.log('Error: ', e)
}
}
function SendEmail(){
console.log('Trying to send the email');
//Put everything to send Email here.
}
The problem is that the PDF is created after the system try to send the email. I don't know if there is a way to make puppeteer work without await/async or if I can make sure that things go in the correct direction.
On the console i get:
Trying to send the email
...
PDF Created

Related

Authenticate using cookies for web scraping?

I built an app that uses Puppeteer to scrape data from LinkedIn. I log in using email and password but would like to pass in cookies to authenticate. Here is what I currently use:
const puppeteer = require("puppeteer");
(async () => {
try {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto("https://www.linkedin.com/login");
await page.waitForSelector(loginBtn);
await page.type("#username", username);
await page.type("#password", password);
await page.click(loginBtn, { delay: 30 });
await browser.close();
} catch (error) {
console.log(`Our error = ${error}`);
}
})();
I've seen websites like Phantombuster that use "li_at" cookies to authenticate. https://i.imgur.com/PI8fzao.png
How can I authenticate using cookies?
Disclaimer: I work at Phantombuster ;)
Since logging in sets a cookie in your browser on success, you can replace that step with the direct result:
await page.setCookie({ name: "li_at", value: "[cookie here]", domain: "www.linkedin.com" })
You should then be able to goto any of the website page as if you were authenticated by the login form.

UnhandledPromiseRejectionWarning: Error: Evaluation failed theme is not defined

Before I start the question, I am new in JavaScript, and I have very basic knowledge of async js, but i need to solve this so i can have my first project functional.
I am trying to build a scraping app using Node and Puppeteer. Basically, the user enters a URL ("link" in the code below), puppeteer goes trough the website code, tries to find the specific piece and returns the data. That part I got working so far.
The problem is when a user enters a URL of a site that doesn't have that piece of code. In that case, I get UnhandledPromiseRejectionWarning: Error: Evaluation failed theme is not defined
What do I do so when there is an error like that, I can catch it and redirect the page instead of Getting Internal Server error.
app.post("/results", function(req, res) {
var link = req.body.link;
(async link => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name');
await browser.close()
return data
})(link)
.then(data => {
res.render("index", {data: data, siteUrl: link});
})
})
You can extend the async part to the whole route handler and do whatever you want on catch:
app.post('/results', async (req, res) => {
try {
const link = req.body.link
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name')
await browser.close()
res.render("index", {data: data, siteUrl: link})
} catch(e) {
// redirect or whatever
res.redirect('/')
}
});

Rendering a page after generating PDF with puppeteer?

I'm currently trying to generate a PDF with puppeteer, then render a page with a "thank you" message to the user. Once the user hits that page, the Puppeteer PDF will hopefully begin downloading on the user's machine. But I'm having some troubles.
I can successfully redirect the user to the page I want them on after collecting some basic info from a form:
app.post("/generatepdf", function (req, res) {
UserPdfRequest.create({ email: req.body.email, companyName: req.body.companyName }, function (err, createdRequest) {
if (err) {
console.log(err);
} else {
console.log(createdRequest);
res.redirect("/" + createdRequest._id + "/pdf-download");
}
})
});
Then, I send them to my route which handles finding the user in question, generating the PDF, then rendering the Thank You page:
app.get("/:companyId/pdf-download", function (req, res) {
UserPdfRequest.findById(req.params.companyId, function (err, foundRequest) {
if (err) {
console.log(err);
} else {
console.log(foundRequest);
(async () => {
const browser = await puppeteer.launch()
const page = await browser.newPage()
const url = 'http://localhost:3000/' + req.params.companyId + '/pdf-download';
await page.goto(url, {waitUntil: 'networkidle0'});
const buffer = await page.pdf({ format: "A4", printBackground: true });
res.type('application/pdf')
res.send(buffer)
browser.close()
})()
res.render("pdfDownload", { email: foundRequest.email, companyName: foundRequest.companyName });
}
})
});
But when I land on the Thank You page, my PDF does not begin downloading. Furthermore, my console.log(foundRequest) seems to log over and over again very rapidly in my terminal, and I also receive the following errors:
https://imgur.com/ZsApRHn
I know I'm probably in over my head here given I don't have much experience with async. I'm sure this is a simple fix I'm missing; however, any help (and explanation) would be extremely valuable and appreciated. Thank you for your time!
You are calling send and render on the same response object. You can either send the data or send html but you cannot do it for the same request.
Usually it is workarounded by opening a new tab for downloading.

How to accept a JavaScript alert popup in Puppeteer?

I have a script which uses Puppeteer to automatically log in to a corporate portal. The login uses SAML. So, when puppeteer opens up an instance of chromium and visits the page, a popup appears on screen to confirm the identity of the user. All I need to do is either manually click on "OK" button or press Enter from keyboard.
I have tried to simulate the pressing of the Enter key using puppeteer but it does not work.
The login screen -
Script -
const puppeteer = require('puppeteer');
async function startDb() {
const browser = await puppeteer.launch({
headless:false,
defaultViewport:null
});
const page = await browser.newPage();
await page.goto("https://example.com");
await page.waitFor(3000);
await page.keyboard.press('Enter');
console.log('Opened')
};
startDb();
**EDIT **
There is a solution proposed in this issue:
Basically just intercept the request, then fire the request off yourself using your favorite httpclient lib, and repond to the intercepted request with the response info.
const puppeteer = require('puppeteer');
const request = require('request');
const fs = require('fs');
(async () => {
const browser = await puppeteer.launch();
let page = await browser.newPage();
// Enable Request Interception
await page.setRequestInterception(true);
// Client cert files
const cert = fs.readFileSync('/path/to/cert.crt.pem');
const key = fs.readFileSync('/path/to/cert.key.pem');
page.on('request', interceptedRequest => {
// Intercept Request, pull out request options, add in client cert
const options = {
uri: interceptedRequest.url(),
method: interceptedRequest.method(),
headers: interceptedRequest.headers(),
body: interceptedRequest.postData(),
cert: cert,
key: key
};
// Fire off the request manually (example is using using 'request' lib)
request(options, function(err, resp, body) {
// Abort interceptedRequest on error
if (err) {
console.error(`Unable to call ${options.uri}`, err);
return interceptedRequest.abort('connectionrefused');
}
// Return retrieved response to interceptedRequest
interceptedRequest.respond({
status: resp.statusCode,
contentType: resp.headers['content-type'],
headers: resp.headers,
body: body
});
});
});
await page.goto('https://client.badssl.com/');
await browser.close();
})();
Before page.goto(), put this code:
page.on('dialog', async dialog => {
await dialog.accept();
});

Puppeteer - How can I get the current page (application/pdf) as a buffer or file?

Using Puppeteer (https://github.com/GoogleChrome/puppeteer), I have a page that's a application/pdf. With headless: false, the page is loaded though the Chromium PDF viewer, but I want to use headless. How can I download the original .pdf file or use as a blob with another library, such as (pdf-parse https://www.npmjs.com/package/pdf-parse)?
Since Puppeteer does not currently support navigation to a PDF document in headless mode via page.goto() due to the upstream issue, you can use page.setRequestInterception() to enable request interception, and then you can listen for the 'request' event and detect whether the resource is a PDF before using the request client to obtain the PDF buffer.
After obtaining the PDF buffer, you can use request.abort() to abort the original Puppeteer request, or if the request is not for a PDF, you can use request.continue() to continue the request normally.
Here's a full working example:
'use strict';
const puppeteer = require('puppeteer');
const request_client = require('request-promise-native');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', request => {
if (request.url().endsWith('.pdf')) {
request_client({
uri: request.url(),
encoding: null,
headers: {
'Content-type': 'applcation/pdf',
},
}).then(response => {
console.log(response); // PDF Buffer
request.abort();
});
} else {
request.continue();
}
});
await page.goto('https://example.com/hello-world.pdf').catch(error => {});
await browser.close();
})();
Grant Miller's solution didn't work for me because I was logged in the website. But if the pdf is public this solution works out well.
The solution for my case was to add the cookies
await page.setRequestInterception(true);
page.on('request', async request => {
if (request.url().indexOf('exibirFat.do')>0) { //This condition is true only in pdf page (in my case of course)
const options = {
encoding: null,
method: request._method,
uri: request._url,
body: request._postData,
headers: request._headers
}
/* add the cookies */
const cookies = await page.cookies();
options.headers.Cookie = cookies.map(ck => ck.name + '=' + ck.value).join(';');
/* resend the request */
const response = await request_client(options);
//console.log(response); // PDF Buffer
buffer = response;
let filename = 'file.pdf';
fs.writeFileSync(filename, buffer); //Save file
} else {
request.continue();
}
});

Categories