I'm trying to download a file using playwright, this file is generated which means the url doesn't ends with a '.pdf' and forcing this link in the url doesn't start the download. I was abble to navigate until the pdf is generated but how can I download it now ?
here is my code:
const {chromium} = require('playwright');
const path = require('path');
(async () => {
const browser = await chromium.launch({
headless: true,
});
const context = await browser.newContext({
acceptDownloads: true
});
const page = await context.newPage();
const downloadPath = path.join(__dirname, 'edt');
await page.goto('https://planif.eppe.ui/jsp/custom/eppee/easyMyPlanning.jsp');
await page.getByLabel('Identifiant :').click();
await page.getByLabel('Identifiant :').fill('myusername');
await page.getByLabel('Identifiant :').press('Tab');
await page.getByLabel('Mot de passe :').fill('mysecretpassword');
await page.getByLabel('Mot de passe :').press('Enter');
await page.getByRole('combobox').selectOption('10');
await page.getByRole('button', { name: 'Submit' }).click();
await page.locator('.footer > a').click();
const page1Promise = page.waitForEvent('popup');
await page.frameLocator('frame[name="link"]').getByRole('link', { name: 'Export PDF...' }).click();
const page1 = await page1Promise;
const downloadPromise = page1.waitForEvent('download');
await page1.getByRole('button', { name: 'Submit' }).click();
const download = await downloadPromise;
await download.saveAs(path.join(downloadPath, 'edt.pdf'));
await browser.close();
})();
with this code I end up having the browser waiting for me to try to download the file how can I automate this ?
ps: with this code and with my browser in non headless mode I end up on this page end if I manualy press the download button it works
Related
So, I try to do a command. You need to write '!screenshot" + a custom url. It needs to take that url and take a screenshot of it and send it back to the respective channel. I tried many ways but I cant make it work.
const Discord = require("discord.js");
const fetch = require('node-fetch');
const cheerio = require("cheerio");
const puppeteer = require('puppeteer');
module.exports = {
name: 'screenshot',
description: 'makes a screenshot and sends it back',
usage: '[command name]',
cooldown: 5,
execute(message, args) {
const data = [];
//const url = message.client;
if (message.content.startsWith("!screenshot ")) {
const url = message.content.substr("!screenshot ".length);
};
},
async run(client, message, args) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
//const page = await context.newPage();
//const url = args.join(' '); // or something similar?
await page.goto(url, { waitUntil: 'networkidle2' });
console.log(await page.content());
await page.screenshot({path: 'screenshot.png'});
let screenshot = await page.screenshot();
await browser.close();
message.channel.send("Screenshot", {files: [screenshot]});
}
}
I try get information from a web page using puppeteer, but in I don't to find the selector tha I need, I suppose that's because the page contain more than one documents html and I can't to find the way for to get the data that I need.
the inpection of the page
that´s the code:
const puppeteer = require('puppeteer');
(async ()=>{
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://www.arrivia.com/careers/job-openings/');
await page.waitForSelector('.job-search-result');
const data = await page.evaluate(()=>{
const elements = document.querySelectorAll('.job-search-result .job-btn-container a');
vacancies = [];
for(element of elements){
vacancies.push(element.href);
}
return vacancies;
});
console.log(data.length);
const vacancies = [];
for (let i = 0; i <=2; i++){
var urljob = data[i];
await page.goto(data[i]);
await page.waitForSelector(".app-title"); //that´s one of the selectors that I can´t to find
from here I get an error`enter code here`
const jobs = await page.evaluate((urljob)=> {
const job = {};
job.title = document.querySelector(".app-title").innerText;
job.location = document.querySelector(".location").innerText;
job.url = urljob;
return job;close
});
vacancies.push(jobs);
}
console.log(vacancies);
//await page.screenshot({ path: 'xx1.jpg'});
await browser.close()
})();
Iframes are not always the easiest things to deal with, in Puppeteer. But a way to bypass this could be to access directly the URL of the iframe, instead of accessing the page which hosts the iframe. It's also faster:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
const page = await browser.newPage();
await page.goto("https://www.arrivia.com/careers/job-openings/", {
waitUntil: "domcontentloaded",
});
const jobUrls = await page.$$eval(".job-search-result .job-btn-container a",
els => els.map(el => el.href));
const vacancies = [];
for (let i = 0; i < 10; i++) { // don't forget to replace 10 with jobUrls.length later
const url = jobUrls[i];
const jobId = /job_id=(\d+)/.exec(url)[1]; // Extract the ID from the link
await page.goto(
`https://boards.greenhouse.io/embed/job_app?token=${jobId}`, // Go to iframe URL
{ waitUntil: "domcontentloaded" }
);
vacancies.push({
title: await page.$eval(".app-title", el => el.innerText),
location: await page.$eval(".location", el => el.innerText),
url,
});
}
console.log(vacancies);
await browser.close();
})();
Output:
[
{
title: 'Director of Account Management',
location: 'Scottsdale, AZ',
url: 'https://www.arrivia.com/careers/job/?job_id=2529695'
},
{
title: "Site Admin and Director's Assistant",
location: 'Albufeira, Portugal',
url: 'https://www.arrivia.com/careers/job/?job_id=2540303'
},
...
]
I am trying to download a file, it does not work after download. I am getting files but the size is 1kb which is not actual file size.
If I used fetchResp.text() I am not able to open a file name.
Here is full code.
I think the problem could be here: return await fetchResp.text();
This is example, it is also important to set cookies, because i want to download data behind login.
How to handle puppeteer cookies and fetch?
What if i put fetch function outside page.evaluation. Does { credentials: "include" } will work?
Thanks in advance for your help.
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
const fs = require("fs");
(async () => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
headless: false,
slowMo: 30,
});
const page = await browser.newPage();
await page.goto(
"https://file-examples.com/index.php/sample-documents-download/sample-xls-download/"
);
const content = await page.content();
const $ = cheerio.load(content);
const listings = $("#table-files > tbody > tr:has(a)")
.map((index, element) => {
const URL = $(element).find("a").attr("href");
const Filename = $(element).find("a").attr("href").split("/").pop();
//.replace(/^.*[\\\/]/g, "");
const name = $(element)
.find("td:nth-child(1)")
.text()
.trim()
.replace("\n", "");
return {
Filename,
URL,
};
})
.get();
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
const downloadedContent = await page.evaluate(async (downloadUrl) => {
const fetchResp = await fetch(downloadUrl, { credentials: "include" });
return await fetchResp.text();
}, downloadUrl);
fs.writeFile(`./${Filename}`, downloadedContent, () =>
console.log("Wrote file")
);
}
await page.close();
await browser.close();
})();
The main problem here is that you are getting the file contents as just text, which would be fine if you wanted a plain text file, but you need to write an excel file, so you will need blob or an arrayBuffer, both of which cannot be returned from the page.evaluate method. See https://github.com/puppeteer/puppeteer/issues/3722
So you don't need to fetch the excel files using the page.evaluate function from puppeteer, you can directly get them from node using https module after getting all the links and then stream the contents to the files, which is easier in this case and also less code. You'll need these modifications
First require the https module
const https = require('https');
Then close puppeteer after getting the links, since we don't need it anymore
.get();
await page.close();
await browser.close();
Call the function here, when looping throught the links
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
var file = await getFile(downloadUrl, Filename);
}
Finally, you need to create a function to read/write the file, outside of your main code block
function getFile(downloadUrl, Filename) {
var data = '';
var writeStream = fs.createWriteStream(Filename);
var req = https.get(downloadUrl, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}
Full snippet
const puppeteer = require('puppeteer');
const cheerio = require("cheerio");
const fs = require("fs");
const https = require('https');
(async () => {
const browser = await puppeteer.launch({
args: ["--no-sandbox"],
headless: false,
slowMo: 30,
});
const page = await browser.newPage();
await page.goto(
"https://file-examples.com/index.php/sample-documents-download/sample-xls-download/"
);
const content = await page.content();
const $ = cheerio.load(content);
const listings = $("#table-files > tbody > tr:has(a)")
.map((index, element) => {
const URL = $(element).find("a").attr("href");
const Filename = $(element).find("a").attr("href").split("/").pop();
//.replace(/^.*[\\\/]/g, "");
const name = $(element)
.find("td:nth-child(1)")
.text()
.trim()
.replace("\n", "");
return {
Filename,
URL,
};
})
.get();
await page.close();
await browser.close();
for (let val of listings) {
const downloadUrl = val.URL;
const Filename = val.Filename;
console.log(val);
//call the function with each link and filename
var file = await getFile(downloadUrl, Filename);
}
})();
//send request and stream the response to a file
function getFile(downloadUrl, Filename) {
var writeStream = fs.createWriteStream(Filename);
var req = https.get(downloadUrl, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}
EDIT Saw your comment, you can send cookies by modifying the get request like this, but remember about the same domain policy for cookies
function getFile(downloadUrl, Filename) {
var url = new URL(downloadUrl)
var options = {
hostname: url.hostname,
path: url.pathname,
method: 'GET',
headers: {
'Cookie': 'myCookie=myvalue'
}
};
var writeStream = fs.createWriteStream(Filename);
var req = https.request(options, function(res) {
res.pipe(writeStream);
res.on('end', () => {
console.log('No more data in response.');
});
});
req.end();
}
I have an issue with pdf generation.
const puppeteer = require("puppeteer");
const fs = require("fs-extra");
const path = require("path");
const hbs = require("handlebars");
const data = require("./database.json");
const { v4 } = require("uuid");
const compile = async function(templateName, data) {
const filePath = path.join(process.cwd(), "views", `${templateName}.hbs`);
const html = await fs.readFile(filePath, "utf-8");
return hbs.compile(html)(data);
};
async function createPDF(data) {
try {
const pdfFileName = v4();
const browser = await puppeteer.launch();
const page = await browser.newPage();
console.log(data); <-- object
const content = await compile("index", data); <--- problem here
await page.setContent(content);
await page.emulateMedia("screen");
await page.pdf({
path: path.join(__dirname, `pdfs/${pdfFileName}.pdf`),
format: "A4",
printBackground: true
});
await browser.close();
} catch (error) {
console.log(error)
}
};
Everything works fine. I can generate my new pdf file, but for some reason, my data object in handlebars templates is undefined.
Any idea how to solve it?
I want to click a link that contains certain text. I have tried using an X-Path expression and that is not working. The application I am testing is Multi Page Application so I am not sure if the new page is generated.
The HTML:
<a class="text-major ev-pick-this-event" href="/cgi-bin/ncommerce3/SEGetEventInfo?ticketCode=GS%3AAMTX%3AHUSKERS%3ASLP2%3A&linkID=BQFN80-AMTX&shopperContext=&pc=&caller=&appCode=&groupCode=SLP&cgc=&dataAccId=129&locale=en_US&siteId=ev_BQFN80-AMTX">HUSKERS - SLP2 - Ranges</a>
What I have tried:
DislplayEventList.js
class DisplayEventListPage {
async clickEventListLink(page) {
const slp2ItemLink = await page.$x(
'//a[contains(., "HUSKERS - SLP2 - Ranges")]'
);
await slp2ItemLink.click();
}
}
module.exports = DisplayEventListPage;
navigate.test.js
const path = require("path");
const config = require("config");
const url = config.get("url");
const DisplayGroupListPage = require("../pageObjects/DisplayGroupList");
const DisplayEventListPage = require("../pageObjects/DisplayEventList");
let groupListPage = new DisplayGroupListPage();
let eventListPage = new DisplayEventListPage();
describe("Test Navigation", () => {
beforeAll(async () => {
await page.goto(url);
await page.screenshot({ path: "groupListPage.png" });
await groupListPage.clickGroupName(page);
await page.screenshot({ path: "eventListPage.png" });
await page.waitFor(3000);
const pageURL = await page.url();
console.log(pageURL);
await eventListPage.clickEventListLink(page);
});
it('should be titled "evenue 1.5 | Online Ticket Office | HUSKERS - SLP2 - Ranges', async () => {
await expect(page.title()).resolves.toMatch(
"evenue 1.5 | Online Ticket Office | HUSKERS - SLP2 - Ranges"
);
});
});
Error I receive:
TypeError: slp2ItemLink.click is not a function
try with contains selector:
setTimeout(()=>{
$("a:contains('HUSKERS - SLP2 - Ranges')").trigger("click");
console.log("go");
}
, 3000);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<a class="text-major ev-pick-this-event" href="/cgi-bin/ncommerce3/SEGetEventInfo?ticketCode=GS%3AAMTX%3AHUSKERS%3ASLP2%3A&linkID=BQFN80-AMTX&shopperContext=&pc=&caller=&appCode=&groupCode=SLP&cgc=&dataAccId=129&locale=en_US&siteId=ev_BQFN80-AMTX" target="_blank">HUSKERS - SLP2 - Ranges</a>
take into consideration that first your user should interact with the page, or the click trigger will be ignored.