Create pdf using node.js and puppeteer - javascript

I have an issue with pdf generation.
const puppeteer = require("puppeteer");
const fs = require("fs-extra");
const path = require("path");
const hbs = require("handlebars");
const data = require("./database.json");
const { v4 } = require("uuid");
const compile = async function(templateName, data) {
const filePath = path.join(process.cwd(), "views", `${templateName}.hbs`);
const html = await fs.readFile(filePath, "utf-8");
return hbs.compile(html)(data);
};
async function createPDF(data) {
try {
const pdfFileName = v4();
const browser = await puppeteer.launch();
const page = await browser.newPage();
console.log(data); <-- object
const content = await compile("index", data); <--- problem here
await page.setContent(content);
await page.emulateMedia("screen");
await page.pdf({
path: path.join(__dirname, `pdfs/${pdfFileName}.pdf`),
format: "A4",
printBackground: true
});
await browser.close();
} catch (error) {
console.log(error)
}
};
Everything works fine. I can generate my new pdf file, but for some reason, my data object in handlebars templates is undefined.
Any idea how to solve it?

Related

Electron Spotify not opening in desktop mode

Electron is not opening spotify in desktop mode, as you can see in the screenshot below.
Here is the code:
const {BrowserWindow, app} = require("electron");
const pie = require("puppeteer-in-electron")
const puppeteer = require("puppeteer-core");
const fs = require("fs");
const path = require("path");
const main = async () => {
const cookiesPath = path.join(__dirname, "cookies/open.spotify.com.cookies.json");
const cookies = JSON.parse(await fs.readFileSync(cookiesPath, 'utf8'));
await pie.initialize(app);
const browser = await pie.connect(app, puppeteer);
const window = new BrowserWindow();
const url = "https://example.com/";
await window.loadURL(url);
const page = await pie.getPage(browser, window);
await page.goto("https://open.spotify.com");
for (const cookie of cookies) {
if (cookie.name !== 'ig_lang') {
await page.setCookie(cookie);
}
}
await page.reload();
};
main();
Note I'm using puppeteer-in-electron so that I can automate web process even in electron.
But, this is not an issue because even if I use electron normally without puppeteer the issue persists.
This is how it should've been: https://cdn.discordapp.com/attachments/1026704902925324410/1026710664611377202/unknown.png
This is how it is: https://cdn.discordapp.com/attachments/1026704902925324410/1026704903055343626/Screenshot_42.png
Hope I've explained it well.
Thanks
install package "https://github.com/castlabs/electron-releases#v20.0.0+wvcus"
like this:
npm install "https://github.com/castlabs/electron-releases#v20.0.0+wvcus" --save-dev
import also components
const { BrowserWindow, app, components } = require("electron");
to open in desktop mode just add userAgent:
window.loadURL(url, {
userAgent: "Chrome/105.0.0.0",
});
and create BrowserWindow after app and components are ready
app.whenReady().then(async () => {
await components.whenReady();
main();
});
full code:
const { BrowserWindow, app, components } = require("electron");
const pie = require("puppeteer-in-electron");
const puppeteer = require("puppeteer-core");
const fs = require("fs");
const path = require("path");
pie.initialize(app);
const main = async () => {
const cookiesPath = path.join(
__dirname,
"cookies/open.spotify.com.cookies.json",
);
const cookies = JSON.parse(await fs.readFileSync(cookiesPath, "utf8"));
const browser = await pie.connect(app, puppeteer);
const window = new BrowserWindow();
const url = "https://example.com/";
await window.loadURL(url, {
userAgent: "Chrome/105.0.0.0",
});
const page = await pie.getPage(browser, window);
await page.goto("https://open.spotify.com");
for (const cookie of cookies) {
if (cookie.name !== "ig_lang") {
await page.setCookie(cookie);
}
}
await page.reload();
};
app.whenReady().then(async () => {
await components.whenReady()
main();
});
short version of code:
const { BrowserWindow, app, components } = require("electron");
const main = () => {
const window = new BrowserWindow();
const url = "https://open.spotify.com";
window.loadURL(url, {
userAgent: "Chrome/105.0.0.0",
});
};
app.whenReady().then(async () => {
await components.whenReady();
main();
});

Puppeteer too much CPU

I'm trying to start this script that takes care of making visits to a site with different IPs through a proxy pool but after a few visits the CPU rises to 100% and begins to slow down more and more, you could help me to optimize it.
I state that I am not a programmer and I thank anyone who can help me solve this problem
const express = require('express');
const app = express();
const port = process.env.PORT || 8080;
const validUrl = require('valid-url');
const parseUrl = function (url) {
url = decodeURIComponent(url)
if (!/^(?:f|ht)tps?:\/\//.test(url)) {
url = 'https://' + url;
}
return url;
};
const getRandomDevice = () => {
const puppeteer = require('puppeteer');
const devices = Object.entries(puppeteer.devices)
return devices[Math.floor(Math.random() * devices.length)][1]
}
app.get('/', function (req, res) {
// const url = parseUrl(req.query.url);
const url = 'https://www.example.com';
const tries = req.query.tries || 100000;
if (validUrl.isWebUri(url)) {
console.log('Handling: ' + url);
(async () => {
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: true,
// userDataDir: './myUserDataDir',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--proxy-server=EXAMPLE-POOL-PROXY:13012'
]
});
let [page] = await browser.pages();
for (let i = 0; i < tries; i++) {
// enable request interception
await page.setRequestInterception(true);
const device = getRandomDevice()
await page.setUserAgent(device.userAgent);
await page.setViewport(device.viewport)
// add header for the navigation requests
page.on('request', request => {
// Add a new header for navigation request.
const headers = request.headers();
headers['User-Agent'] = device.userAgent;
headers['user-agent'] = device.userAgent;
request.continue({headers});
});
await page.goto(url, {waitUntil: 'networkidle2', timeout: 1500000});
try {
//console.log(page);
} catch (error) {
console.error(error)
} finally {
// console.log(urls);
// console.log(await page._client.send('Network.getAllCookies'));
// await page.screenshot().then(function (buffer) {
// res.setHeader('Content-Disposition', 'attachment;filename="' + url + '.png"');
// res.setHeader('Content-Type', 'image/png');
// res.send(buffer)
// });
await page.screenshot({path: 'screenshot-' + i + '.png',fullPage: true})
// If everything correct then no 'HeadlessChrome' sub string on userAgent
console.log(await page.evaluate(() => navigator.userAgent));
page = await browser.newPage();
}
}
setTimeout(async () => {
await browser.close();
}, 500);
})();
} else {
res.send('Invalid url: ' + url);
}
});
app.listen(port, function () {
console.log('App listening on port ' + port)
})

How can I send a custom url screenshot on discord.js?

So, I try to do a command. You need to write '!screenshot" + a custom url. It needs to take that url and take a screenshot of it and send it back to the respective channel. I tried many ways but I cant make it work.
const Discord = require("discord.js");
const fetch = require('node-fetch');
const cheerio = require("cheerio");
const puppeteer = require('puppeteer');
module.exports = {
name: 'screenshot',
description: 'makes a screenshot and sends it back',
usage: '[command name]',
cooldown: 5,
execute(message, args) {
const data = [];
//const url = message.client;
if (message.content.startsWith("!screenshot ")) {
const url = message.content.substr("!screenshot ".length);
};
},
async run(client, message, args) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
//const page = await context.newPage();
//const url = args.join(' '); // or something similar?
await page.goto(url, { waitUntil: 'networkidle2' });
console.log(await page.content());
await page.screenshot({path: 'screenshot.png'});
let screenshot = await page.screenshot();
await browser.close();
message.channel.send("Screenshot", {files: [screenshot]});
}
}

How get the selector of an element from a web page with more than one document html?

I try get information from a web page using puppeteer, but in I don't to find the selector tha I need, I suppose that's because the page contain more than one documents html and I can't to find the way for to get the data that I need.
the inpection of the page
that´s the code:
const puppeteer = require('puppeteer');
(async ()=>{
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://www.arrivia.com/careers/job-openings/');
await page.waitForSelector('.job-search-result');
const data = await page.evaluate(()=>{
const elements = document.querySelectorAll('.job-search-result .job-btn-container a');
vacancies = [];
for(element of elements){
vacancies.push(element.href);
}
return vacancies;
});
console.log(data.length);
const vacancies = [];
for (let i = 0; i <=2; i++){
var urljob = data[i];
await page.goto(data[i]);
await page.waitForSelector(".app-title"); //that´s one of the selectors that I can´t to find
from here I get an error`enter code here`
const jobs = await page.evaluate((urljob)=> {
const job = {};
job.title = document.querySelector(".app-title").innerText;
job.location = document.querySelector(".location").innerText;
job.url = urljob;
return job;close
});
vacancies.push(jobs);
}
console.log(vacancies);
//await page.screenshot({ path: 'xx1.jpg'});
await browser.close()
})();
Iframes are not always the easiest things to deal with, in Puppeteer. But a way to bypass this could be to access directly the URL of the iframe, instead of accessing the page which hosts the iframe. It's also faster:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
const page = await browser.newPage();
await page.goto("https://www.arrivia.com/careers/job-openings/", {
waitUntil: "domcontentloaded",
});
const jobUrls = await page.$$eval(".job-search-result .job-btn-container a",
els => els.map(el => el.href));
const vacancies = [];
for (let i = 0; i < 10; i++) { // don't forget to replace 10 with jobUrls.length later
const url = jobUrls[i];
const jobId = /job_id=(\d+)/.exec(url)[1]; // Extract the ID from the link
await page.goto(
`https://boards.greenhouse.io/embed/job_app?token=${jobId}`, // Go to iframe URL
{ waitUntil: "domcontentloaded" }
);
vacancies.push({
title: await page.$eval(".app-title", el => el.innerText),
location: await page.$eval(".location", el => el.innerText),
url,
});
}
console.log(vacancies);
await browser.close();
})();
Output:
[
{
title: 'Director of Account Management',
location: 'Scottsdale, AZ',
url: 'https://www.arrivia.com/careers/job/?job_id=2529695'
},
{
title: "Site Admin and Director's Assistant",
location: 'Albufeira, Portugal',
url: 'https://www.arrivia.com/careers/job/?job_id=2540303'
},
...
]

Fetch - Download Excel File does not work. File Size 1 kb

I am trying to download a file, it does not work after download. I am getting files but the size is 1kb which is not actual file size.
If I used fetchResp.text() I am not able to open a file name.
Here is full code.
I think the problem could be here: return await fetchResp.text();
This is example, it is also important to set cookies, because i want to download data behind login.
How to handle puppeteer cookies and fetch?
What if i put fetch function outside page.evaluation. Does { credentials: "include" } will work?
Thanks in advance for your help.
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const fs = require("fs");
(async () => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
headless: false,
slowMo: 30,
});
const page = await browser.newPage();
await page.goto(
"https://file-examples.com/index.php/sample-documents-download/sample-xls-download/"
);
const content = await page.content();
const $ = cheerio.load(content);
const listings = $("#table-files > tbody > tr:has(a)")
.map((index, element) => {
const URL = $(element).find("a").attr("href");
const Filename = $(element).find("a").attr("href").split("/").pop();
//.replace(/^.*[\\\/]/g, "");
const name = $(element)
.find("td:nth-child(1)")
.text()
.trim()
.replace("\n", "");
return {
Filename,
URL,
};
})
.get();
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
const downloadedContent = await page.evaluate(async (downloadUrl) => {
const fetchResp = await fetch(downloadUrl, { credentials: "include" });
return await fetchResp.text();
}, downloadUrl);
fs.writeFile(`./${Filename}`, downloadedContent, () =>
console.log("Wrote file")
);
}
await page.close();
await browser.close();
})();
The main problem here is that you are getting the file contents as just text, which would be fine if you wanted a plain text file, but you need to write an excel file, so you will need blob or an arrayBuffer, both of which cannot be returned from the page.evaluate method. See https://github.com/puppeteer/puppeteer/issues/3722
So you don't need to fetch the excel files using the page.evaluate function from puppeteer, you can directly get them from node using https module after getting all the links and then stream the contents to the files, which is easier in this case and also less code. You'll need these modifications
First require the https module
const https = require('https');
Then close puppeteer after getting the links, since we don't need it anymore
.get();
await page.close();
await browser.close();
Call the function here, when looping throught the links
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
var file = await getFile(downloadUrl, Filename);
}
Finally, you need to create a function to read/write the file, outside of your main code block
function getFile(downloadUrl, Filename) {
var data = '';
var writeStream = fs.createWriteStream(Filename);
var req = https.get(downloadUrl, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}
Full snippet
const puppeteer = require('puppeteer');
const cheerio = require("cheerio");
const fs = require("fs");
const https = require('https');
(async () => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
headless: false,
slowMo: 30,
});
const page = await browser.newPage();
await page.goto(
"https://file-examples.com/index.php/sample-documents-download/sample-xls-download/"
);
const content = await page.content();
const $ = cheerio.load(content);
const listings = $("#table-files > tbody > tr:has(a)")
.map((index, element) => {
const URL = $(element).find("a").attr("href");
const Filename = $(element).find("a").attr("href").split("/").pop();
//.replace(/^.*[\\\/]/g, "");
const name = $(element)
.find("td:nth-child(1)")
.text()
.trim()
.replace("\n", "");
return {
Filename,
URL,
};
})
.get();
await page.close();
await browser.close();
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
//call the function with each link and filename
var file = await getFile(downloadUrl, Filename);
}
})();
//send request and stream the response to a file
function getFile(downloadUrl, Filename) {
var writeStream = fs.createWriteStream(Filename);
var req = https.get(downloadUrl, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}
EDIT Saw your comment, you can send cookies by modifying the get request like this, but remember about the same domain policy for cookies
function getFile(downloadUrl, Filename) {
var url = new URL(downloadUrl)
var options = {
hostname: url.hostname,
path: url.pathname,
method: 'GET',
headers: {
'Cookie': 'myCookie=myvalue'
}
};
var writeStream = fs.createWriteStream(Filename);
var req = https.request(options, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}

Categories