Getting blank screenshot from Puppeteer - javascript

I have used puppeteer to capture the screenshot of my page in React JS. But it is taking a blank screenshot instead of the actual charts present on the page. Here is my code.
const puppeteer = require('puppeteer');
const url = process.argv[2];
if (!url) {
throw "Please provide URL as a first argument";
}
async function run () {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox'],headless: true, ignoreHTTPSErrors:true});
const page = await browser.newPage();
await page.goto(url, {
timeout: 30000,
waitUntil: "networkidle0"
});
await page.content();
let imgDataBase64 = await page.screenshot({quality:100, fullPage: true, encoding: "base64", type: "jpeg"});
await browser.close();
return resolve(imgDataBase64);
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
The reason for the same could be document is getting loaded first before the chart loads. And puppeteer takes the screenshot as soon as the document loads. Can anyone please help me with this? We have to be sure that there is no delay in chart loading after the document is loaded so that screenshot can be captured properly. Please help. Thanks in advance.

Related

Using wappalyzer and puppeteer in node.js

I am trying to build a scraper to monitor web projects automatically.
So far so good, the script is running, but now I want to add a feature that automatically analyses what libraries I used in the projects. The most powerful script for this job is wappalyser. They have a node package (https://www.npmjs.com/package/wappalyzer) and it's written that you can use it combined with pupperteer.
I managed to run pupperteer and to log the source code of the sites in the console, but I don't get the right way to pass the source code to the wappalyzer analyse function.
Do you guys have a hint for me?
I tryed this code but a am getting a TypeError: url.split is not a function
function getLibarys(url) {
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url);
// get source code with puppeteer
const html = await page.content();
const wappalyzer = new Wappalyzer();
(async function () {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(page, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
await browser.close()
})()
}
Fixed it by using the sample code from wappalyzer.
function getLibarys(url) {
const Wappalyzer = require('wappalyzer');
const options = {
debug: false,
delay: 500,
headers: {},
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: true,
probe: true,
proxy: false,
userAgent: 'Wappalyzer',
htmlMaxCols: 2000,
htmlMaxRows: 2000,
noScripts: false,
noRedirect: false,
};
const wappalyzer = new Wappalyzer(options)
;(async function() {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(url, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
}
I do not know if you still need an answer to this. But this is what a wappalyzer collaborator told me:
Normally you'd run Wappalyzer like this:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
await wappalyzer.init() // Launches a Puppeteer instance
const site = await wappalyzer.open(url)
If you want to use your own browser instance, you can skip wappalyzer.init() and assign the instance to wappalyzer.browser:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
wappalyzer.browser = await puppeteer.launch() // Use your own Puppeteer launch logic
const site = await wappalyzer.open(url)
You can find the discussion here.
Hope this helps.

UnhandledPromiseRejectionWarning: Error: Evaluation failed theme is not defined

Before I start the question, I am new in JavaScript, and I have very basic knowledge of async js, but i need to solve this so i can have my first project functional.
I am trying to build a scraping app using Node and Puppeteer. Basically, the user enters a URL ("link" in the code below), puppeteer goes trough the website code, tries to find the specific piece and returns the data. That part I got working so far.
The problem is when a user enters a URL of a site that doesn't have that piece of code. In that case, I get UnhandledPromiseRejectionWarning: Error: Evaluation failed theme is not defined
What do I do so when there is an error like that, I can catch it and redirect the page instead of Getting Internal Server error.
app.post("/results", function(req, res) {
var link = req.body.link;
(async link => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name');
await browser.close()
return data
})(link)
.then(data => {
res.render("index", {data: data, siteUrl: link});
})
})
You can extend the async part to the whole route handler and do whatever you want on catch:
app.post('/results', async (req, res) => {
try {
const link = req.body.link
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name')
await browser.close()
res.render("index", {data: data, siteUrl: link})
} catch(e) {
// redirect or whatever
res.redirect('/')
}
});

I can't go from a page to another using page.goto() - Puppeteer

I'm trying to make a InstagramBot that logs in and then go to some profile, my code worked yesterday for awhile and than it just stopped working .
I've tried to clone my repository from github, but it does'n work either, sometimes it works again, but if I try to create another function, the code just ignore the line of the code that changes the page.
I've also tried to create a new page and then in this new page use the goto function and it worked, but the account doesn keep logged in
The version of puppeteer that I'm using: 1.16.0
The version of node.js that I'm using: v10.15.3
const puppeteer = require('puppeteer');
const BASE_URL = "https://www.instagram.com/accounts/login/?hl=en&source=auth_switcher";
const instagram = {
browser: null,
page: null,
profile_url: null,
initialize: async (profile) => {
instagram.browser = await puppeteer.launch({
headless: false
})
instagram.profile_url = await "https://www.instagram.com/" + profile;
instagram.page = await instagram.browser.newPage();
await instagram.page.goto(BASE_URL, {waitUntil: 'networkidle2'});
},
login: async(username, password) =>{
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username);
await instagram.page.type('input[name="password"', password);
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
await console.log(instagram.profile_url);
await instagram.page.goto(instagram.profile_url, {timeout: 0, waitUntil: 'domcontentloaded'}); // the code just ignore this line
await instagram.page.waitFor(1000);
},
getPhotosLinks: async() => {
console.log("Do something here");
}
}
module.exports = instagram;
It doesn't give any error message, just doesn't work
Replace
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
with
await Promise.all([
instagram.page.click('button[type="submit"]');,
instagram.page.waitForNavigation()
]);
and see if it works

Puppeteer doesn't show a calendar that is shown after clicking a text field

I'm using Puppeteer for doing some web scraping and I'm having troubles. The website I'm trying to scrape is this one and I'm trying to create a screenshot of a calendar that appears after clicking the button "Reserve now" > "Dates".
const puppeteer = require('puppeteer');
const fs = require('fs');
void (async () => {
try {
const browser = await puppeteer.launch({headless: false});
const page = await browser.newPage();
await page.goto('https://www.marriott.com/hotels/travel/reumd-le-meridien-ra-beach-hotel-and-spa');
await page.setViewport({ width: 1920, height: 938 });
await page.waitForSelector('.m-hotel-info > .l-container > .l-header-section > .l-m-col-2 > .m-button');
await page.click('.m-hotel-info > .l-container > .l-header-section > .l-m-col-2 > .m-button');
await page.waitForSelector('.modal-content');
await page.waitFor(5000);
await page.waitForSelector('.js-recent-search-inputs .js-datepick-container .l-h-field-input')
await page.click('.js-recent-search-inputs .js-datepick-container .l-h-field-input');
await page.waitFor(5000);
await page.screenshot({ path: 'myscreenshot.png'});
await browser.close();
} catch (error) {
console.log(error);
}
})()
This is what myscreenshot.png should contain:
but I'm getting this instead:
As you can see, myscreenshot.png doesn't contain the calendar. I don't understand what I'm doing wrong since I click on the right node and I even give time enough to it for loading everything.
Thank you in advance!
Edit: I forgot to say that I have also tried Puppeteer recorder in order to achieve this and I haven't had luck either.
As you have many .l-h-field-input elements, I would try being more specific there.
This worked for me:
await page.click('.js-recent-search-inputs .js-datepick-container .l-h-field-input');

Puppeteer: Grabbing html from page that doesn't refresh after input tag button is clicked

I am trying to grab some html after a input tag button is clicked. I am clicking the button with page.evaluate() since page.click() does not seem to work for an input tag button. I have tried visual debugging with headless:false in the puppeteer launch options to verify that the browser indeed navigated to the point after the button is clicked. I am unsure as to why page.content() returns the html before the button is clicked rather than the html after the event happens.
const puppeteer = require('puppeteer');
const url = 'http://www.yvr.ca/en/passengers/flights/departing-flights';
const fs = require('fs');
const tomorrowSelector = '#flights-toggle-tomorrow'
puppeteer.launch().then(async browser => {
const page = await browser.newPage();
await page.goto(url);
await page.evaluate((selector)=>document.querySelector(selector).click(),tomorrowSelector);
let html = await page.content();
await fs.writeFile('index.html', html, function(err){
if (err) console.log(err);
console.log("Successfully Written to File.");
});
await browser.close();
});
You can click on the label for the radio. Also, you need to wait for some sign of changed state (for XHR/fetch response or new selectors). For example, this code works for me, but you can use any other condition or just wait for some seconds.
const fs = require('fs');
const puppeteer = require('puppeteer');
const url = 'http://www.yvr.ca/en/passengers/flights/departing-flights';
const tomorrowLabelSelector = 'label[for=flights-toggle-tomorrow]';
const tomorrowLabelSelectorChecked = '.yvr-form__toggle:checked + label[for=flights-toggle-tomorrow]';
puppeteer.launch({ headless: false }).then(async (browser) => {
const page = await browser.newPage();
await page.goto(url);
await Promise.all([
page.click(tomorrowLabelSelector),
page.waitForSelector(tomorrowLabelSelectorChecked),
]);
const html = await page.content();
await fs.writeFile('index.html', html, (err) => {
if (err) console.log(err);
console.log('Successfully Written to File.');
});
// await browser.close();
});

Categories