How to download historical-data (csv format) from investing.com with Puppeteer Js? - javascript

I tried this piece of code to download historical data in csv format from investing.com.
//collector.mjs
import puppeteer from "puppeteer";
import path from "path";
(async ()=>{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setUserAgent('Chrome/105.0.0.0');
await page.goto("https://www.investing.com/equities/tesla-motors-historical-data", {
waitUntil: "networkidle2",
});
const client = await page.target().createCDPSession();
await client .send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.resolve("./csvData"),
});
await page.evaluate(()=>{
document.querySelector("span[class='download-data_text__Myrn3']").click();
});
await browser.close();
})();
What i get all the time is "TSLA Historical Data.csv.crdownload" file (instead).
So, how to get this (picture under) instead in my folder csvData?

Use the page.setViewport().
//collector.mjs
import puppeteer from "puppeteer";
import path from "path";
(async ()=>{
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setUserAgent('Chrome/105.0.0.0');
await page.setViewport({
width: 1920,
height: 1080
});
await page.goto("https://www.investing.com/equities/tesla-motors-historical-data", {
waitUntil: "networkidle2",
});
const client = await page.target().createCDPSession();
await client .send('Browser.setDownloadBehavior', {
behavior: 'allow',
downloadPath: path.resolve("./csvData"),
});
await page.click('.download-data_text__Myrn3');
await browser.close();
})();
Done,

Related

Integrate Node.js code using puppeteer in JMeter

How can I integrate Node.js code using puppeteer in JMeter?
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('https://blog.testproject.io/');
await page.click('(//a[#title=\'Docs\'][contains(.,\'Docs\')])[2]');
await page.screenshot({path: 'example.png'});
await browser.close();
})();

Page loads in regular chrome but not in puppeteer

I am trying to load a page, http://www.nhc.gov.cn/wjw/index.shtml, on puppeteer as part of a covid-tracking program. The page loads very quickly in the regular chrome browser, but when I load it in puppeteer, the page load fails with a 412. What can I do to get the page to load and fully simulate a regular browser going to the page?
The code for reproduction of this phenomenon is below:
const puppeteer = require('puppeteer-core');
(async () => {
const browser = await puppeteer.launch({ executablePath: '..\\executables\\chrome.exe', headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-gpu'] });
const page = await browser.newPage();
Object.assign(global, { browser, page });
page.on('console', msg => console.log(`chrome[${msg.text()}]`));
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml', { waitUntil: 'networkidle0' });
await page.waitFor(15000);
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();
Thank you in advance for your help!
you can use puppeteer-extra with the StealthPlugin.
https://www.npmjs.com/package/puppeteer-extra-plugin-stealth
Here is my code :
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require("puppeteer-extra-plugin-stealth");
(async () => {
puppeteer.use(StealthPlugin())
const browser = await puppeteer.launch({headless: false, ignoreHTTPSErrors: true})
const page = await browser.newPage();
await page.goto('http://www.nhc.gov.cn/wjw/index.shtml');
await page.waitForSelector('.inLists')
await page.screenshot({path: 'nhc_scrape.png'});
await browser.close();
})();

Puppeteer: line of code being executed before others

I have this code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
The proble is that after clicking button1 the page change and puppeteer executes immediately the following line of code, instead I want it to wait for the new page to be loaded becuase otherwise It will throw an error since It can't find button2.
I found this solution on stackoverflow:
const puppeteer = require("puppeteer");
function delay(time) {
return new Promise(function (resolve) {
setTimeout(resolve, time);
});
}
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse-matchpoint/quote/calcio/serie-a");
const [button1] = await
page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
button1.click();
await delay(4000);
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white
marketList_listItemHeight__1aiAJ
marketList_bgColorGrey__VdrVK"]/p[text()="1X2
ESITO FINALE"]');
button2.click();
})();
But of course this in't the best solution.
I think you have to modify a bit in your code:
await button1.click();
await page.waitForNavigation({waitUntil: 'networkidle2'});
For reference, see the documentation.
I found a solution, here's the code:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto("https://www.sisal.it/scommesse
matchpoint/quote/calcio/serie-a");
await page.waitForXPath('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
const [button1] = await page.$x('//div[#class="marketBar_changeMarketLabel__l0vzl"]/p');
await button1.click();
await page.waitForXPath('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
const [button2] = await page.$x('//div[#class="listItem_container__2IdVR white marketList_listItemHeight__1aiAJ marketList_bgColorGrey__VdrVK"]/p[text()="1X2 ESITO FINALE"]');
button2.click();
})();

How do i type something into an input box with the puppeteer?

I'm doing experiments for a bot im making but for some reason things i cant get it to type into the input box in youtube.
(async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://www.youtube.com/?hl=hr&gl=HR');
await page.waitForNavigation({
waitUntil: 'networkidle0',
});
await page.type('#search', `text`)
// await browser.waitForTarget(() => false)
// await browser.close();
})();
The #search is the id for the youtube search bar but it isn't working for some reason
Your problem is waitUntil: 'networkidle0'
async () => {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://www.youtube.com/?hl=hr&gl=HR');
await page.waitForSelector('input#search')
await page.type('input#search', `text`)
})();

iframe is not loaded in puppeteer

My code:
async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://example.com' , {
waitUntil: 'networkidle2'
});
await page.screenshot({path: 'home.png', fullPage: true});
let frames = await page.frames();
await page.screenshot({path: 'home.png', fullPage: true});
var iFrame = frames.find(f => f.url().indexOf("https://accounts.") > -1);
const usernameInput = await iFrame.waitForSelector("[name=username]" , {visible : true});
await usernameInput.type(email);
const passwordInput = await iFrame.waitForSelector("[name=password]" , {visible : true});
await passwordInput.type(password);
const navigationPromisePortal = page.waitForNavigation();
await iFrame.click(".primary");
await navigationPromisePortal;
var cookies = await page.cookies();
await browser.close();
return cookies;
}
I'm trying to get cookies from example.com. But every time it accounts iFrame is not loaded. This page is built by react. How do I solve this problem?

Categories