Download pdf file with puppeteer - javascript

The code should download the pdf file but it doesn't.
Basically wanted to download pdf file from a link and save to my machine.
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
await page.goto(
"https://www.thecampusqdl.com/uploads/files/pdf_sample_2.pdf"
);
const client = await page.target().createCDPSession();
await client.send("Browser.setDownloadBehavior", {
behavior: "allow",
downloadPath: "C:/my folder/",
});
})();

Related

How to download historical-data (csv format) from investing.com with Puppeteer Js?

I tried this piece of code to download historical data in csv format from investing.com.
//collector.mjs
import puppeteer from "puppeteer";
import path from "path";
(async ()=>{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setUserAgent('Chrome/105.0.0.0');
await page.goto("https://www.investing.com/equities/tesla-motors-historical-data", {
waitUntil: "networkidle2",
});
const client = await page.target().createCDPSession();
await client .send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.resolve("./csvData"),
});
await page.evaluate(()=>{
document.querySelector("span[class='download-data_text__Myrn3']").click();
});
await browser.close();
})();
What i get all the time is "TSLA Historical Data.csv.crdownload" file (instead).
So, how to get this (picture under) instead in my folder csvData?
Use the page.setViewport().
//collector.mjs
import puppeteer from "puppeteer";
import path from "path";
(async ()=>{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setUserAgent('Chrome/105.0.0.0');
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto("https://www.investing.com/equities/tesla-motors-historical-data", {
waitUntil: "networkidle2",
});
const client = await page.target().createCDPSession();
await client .send('Browser.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.resolve("./csvData"),
});
await page.click('.download-data_text__Myrn3');
await browser.close();
})();
Done,

Integrate Node.js code using puppeteer in JMeter

How can I integrate Node.js code using puppeteer in JMeter?
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('https://blog.testproject.io/');
await page.click('(//a[#title=\'Docs\'][contains(.,\'Docs\')])[2]');
await page.screenshot({path: 'example.png'});
await browser.close();
})();

Scraping into different pages

Im trying to scrap a web page, and after following some tutorials, i found how to scrap different products, and how to change the page, but not at the same time. I tryed some ways to do it, but couln't find out.
This is my scraping code:
const puppeteer = require('puppeteer');
const xlsx = require("xlsx");
async function getPageData(url,page){
await page.goto(url);
const h1 = await page.$eval(".product_main h1", h1 => h1.textContent);
const price = await page.$eval(".price_color", price => price.textContent);
const instock = await page.$eval(".instock.availability", instock => instock.innerText);
return {
title: h1,
price: price,
instock: instock
}
//await browser.close();
};
async function getLinks(){
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://books.toscrape.com/');
const links = await page.$$eval('.product_pod .image_container a', allAs =>
allAs.map(a => a.href));
}
async function main(){
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
const data = await getPageData("https://books.toscrape.com/catalogue/a-light-in-the-attic_1000/index.html",page);
console.log(data);
}
main();
And here is my code to change the page:
const puppeteer = require('puppeteer');
const xlsx = require("xlsx");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://books.toscrape.com/');
while(await page.$(".pager .next a")){
await page.click(".pager .next a");
await page.waitForTimeout(3000);
}
})();

Page loads in regular chrome but not in puppeteer

I am trying to load a page, http://www.nhc.gov.cn/wjw/index.shtml, on puppeteer as part of a covid-tracking program. The page loads very quickly in the regular chrome browser, but when I load it in puppeteer, the page load fails with a 412. What can I do to get the page to load and fully simulate a regular browser going to the page?
The code for reproduction of this phenomenon is below:
const puppeteer = require('puppeteer-core');
(async () => {
const browser = await puppeteer.launch({ executablePath: '..\\executables\\chrome.exe', headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] });
const page = await browser.newPage();
Object.assign(global, { browser, page });
page.on('console', msg => console.log(`chrome[${msg.text()}]`));
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml', { waitUntil: 'networkidle0' });
await page.waitFor(15000);
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();
Thank you in advance for your help!
you can use puppeteer-extra with the StealthPlugin.
https://www.npmjs.com/package/puppeteer-extra-plugin-stealth
Here is my code :
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
(async () => {
puppeteer.use(StealthPlugin())
const browser = await puppeteer.launch({headless: false, ignoreHTTPSErrors: true})
const page = await browser.newPage();
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml');
await page.waitForSelector('.inLists')
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();

Puppeteer: line of code being executed before others

I have this code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
The proble is that after clicking button1 the page change and puppeteer executes immediately the following line of code, instead I want it to wait for the new page to be loaded becuase otherwise It will throw an error since It can't find button2.
I found this solution on stackoverflow:
const puppeteer = require("puppeteer");
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time);
});
}
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
await delay(4000);
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ
marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
But of course this in't the best solution.
I think you have to modify a bit in your code:
await button1.click();
await page.waitForNavigation({waitUntil: 'networkidle2'});
For reference, see the documentation.
I found a solution, here's the code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse
matchpoint/quote/calcio/serie-a");
await page.waitForXPath('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
const [button1] = await page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
await button1.click();
await page.waitForXPath('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
button2.click();
})();

Categories