I am having problem with getting the code into the beforeAll function finish and wait for the promise that resolves the storyLinks. The console log at the end of the snippet returns undefined but I need it to return the hrefs of the stories in my storybook. I cannot wrap this into an async function because of the testing pipeline being clogged on fail.
const puppeteer = require('puppeteer');
const { toMatchImageSnapshot } = require('jest-image-snapshot');
expect.extend({ toMatchImageSnapshot });
const timeout = 5000;
describe('visual tests', () => {
let page, browser, storyLinks;
const selector = `a[href*="selectedStory="]`;
beforeAll(async() => {
browser = await puppeteer.connect({browserWSEndpoint});
page = await browser.newPage();
await page.goto('http://localhost:8080');
await page.evaluate(() => {
const components = Array.from(document.querySelectorAll('div[data-name]'));
for(let i = 1; i < components.length; i++) {
components[i].addEventListener('click',() => {});
components[i].click();
}
});
storyLinks = await page.evaluate((selector) => {
const stories = Array.from(document.querySelectorAll(selector));
const links = stories.map(story => {
let href = story.href;
let name = story.text.replace(/[^A-Z0-9]/ig, '-').replace(/-{2,}/,'-');
let component = href.match(/selectedKind=(.*?)\&/).pop();
return {href: href, name: component + '-' + name};
});
return links;
}, selector);
}, timeout);
afterAll(async () => {
await page.close();
await browser.disconnect();
})
console.log(storyLinks);
}, timeout);
There's a few things I notice might be causing your issues. You need to add async to your describe block. Also, "describe" groups together multiple tests so you're missing an it or test block. Jest docs also note adding the expect.assertions(NUM_OF_ASSERTIONS); I'd do something like:
const puppeteer = require('puppeteer');
const { toMatchImageSnapshot } = require('jest-image-snapshot');
expect.extend({ toMatchImageSnapshot });
const timeout = 5000;
async function myStoryLinkTest(page) {
const selector = `a[href*="selectedStory="]`;
await page.goto('http://localhost:8080');
await page.evaluate(() => {
Array.from(document.querySelectorAll('div[data-name]'), item => {
item.addEventListener('click', () => {});
item.click();
});
});
const storyLinks = await page.evaluate(selector => {
return Array.from(document.querySelectorAll(selector), story => {
let href = story.href;
let name = story.text.replace(/[^A-Z0-9]/gi, '-').replace(/-{2,}/, '-');
let component = href.match(/selectedKind=(.*?)\&/).pop();
return { href: href, name: component + '-' + name };
});
});
return storyLinks;
}
describe('visual tests', async () => {
let page, browser;
beforeAll(async () => {
browser = await puppeteer.connect({ browserWSEndpoint });
page = await browser.newPage();
});
afterAll(async () => {
await page.close();
await browser.disconnect();
});
it('should do something with storyLinks', async () => {
expect.assertions(1);
const storyLinkResult = await myStoryLinkTest(page);
expect(storyLinkResult).toEqual('Some value you expect');
}, timeout);
});
Related
The following code runs on every one of my requests and I'm afraid that it's trying to launch the browser every time and causing server issues on Heroku. I want to launch puppeteer like a Singleton instance where I only launch it once and then after that my requests will just trigger browser.newPage(). I'm not experienced in JS to resolve this.
(async () => {
const browser = await puppeteer.launch({ headless: true});
const page = await browser.newPage();
await page.on('response', interceptedResponse =>{
let status = interceptedResponse.status();
interceptedResponse.text()
.then((text) => {
handleResponse(text)
browser.close();
})
.catch(err => {
console.error(`interceptedResponse error: ${err}`)
browser.close();
});
});
await page.goto(url);
})();
You can create a class handling this for you. It may not be "official singleton" but id does what you want:
checkout browser.js:
var puppeteer = require('puppeteer')
class PuppeteerApi {
browser = null
constructor(config) {
this.config = config
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (!this.browser) {
this.browser = await this.newBrowser()
}
return this.browser
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
// close the page or even reuse it?.
await page.close()
// you could add logic for closing the whole browser instance depending what
// you want.
}
async shutdown() {
await this.browser.close()
}
}
const config = {
headless: false
}
const browserApi = new PuppeteerApi(config)
export default browserApi
// use it like:
// import and set config once!.
var browserApi = require('./browser.js')
const config = { headless: true }
browserApi.setConfig(config)
// in an request handler you could do this:
(async () => {
var page = await browserApi.newPage()
// do some stuff..
// in the end hand the page back for eitehr closing it
// or maybe putting it in a pool? .
await browser.handBack(page)
})()
I do not know the behaviour of puppeteer when for example 30 pages would be opened. Here would be an example which could open a given amount of browser instances in parallel.
var puppeteer = require('puppeteer')
class PuppeteerApi {
browsers = []
index = 0
constructor(browserLimit, config) {
this.config = config
this.browserLimit = browserLimit
if (typeof this.browserLimit !== 'number' || this.browserLimit < 1) {
throw 'BrowserLimit needs atleast to be 1!!'
}
}
setConfig(config) {
this.config = config
}
async newBrowser() {
return await puppeteer.launch(this.config)
}
async getBrowser() {
if (this.index >= this.browserLimit) {
this.index = 0
}
if (!this.browsers[this.index]) {
this.browsers[this.index] = await this.newBrowser()
}
// iterate through browsers.
return this.browsers[this.index++]
}
async newPage() {
const browser = await this.getBrowser()
const page = await browser.newPage()
return page
}
async handBack(page) {
await page.close()
}
async shutdown() {
const proms = this.browsers.map(b => b.close())
await Promise.all(proms)
}
}
const config = {
headless: false
}
const limit = 5
const browserApi = new PuppeteerApi(limit, config)
export default browserApi
If you like a functional style (which is less code), it is fastly to adapt. Here is the first example:
var puppeteer = require('puppeteer')
let browser = null
let config = {
headless: false
}
const newBrowser = async() => {
return await puppeteer.launch(this.config)
}
export const setPuppeteerConfig = (_config) => {
config = _config
}
export const getPage = async() => {
const browser = await getBrowser()
return await browser.newPage()
}
const getBrowser = async() => {
if (!browser) {
browser = await newBrowser()
}
return browser
}
export const handback = async(page) => {
await page.close()
}
export const shutdown = async() => {
await browser.close()
}
// usage:
const { setPuppeteerConfig , shutdown, getPage, handback } = require('./browser')
// setconfig..
(async () => {
const page = await getPage()
// do some stuff..
await handback(page)
})
Feel free to leave a comment if anything is not working as indendet.
I am attempting to write a script that locates the largest image on a page. The first step of this process would be to retrieve all the image sources on a particular website. This is where I am stuck.
const puppeteer = require('puppeteer');
function ImageFetcher(pageURL, partName) {
return new Promise( async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless: false,
});
const page1 = await browser.newPage();
await page1.goto(pageURL);
try {
const images = await page.$$eval("img", els => els.map(x => x.getAttribute("src")));
console.log(images);
} catch(e) {console.log("ERR Locator")};
await page1.close();
await browser.close();
return resolve();
} catch(e) {console.log(`Error Image Fetcher Part Name: ${partName}`)};
});
}
async function start() {
pageURL = "https://www.grainger.com/product/NVENT-CADDY-Cushioned-Pipe-Clamp-1RVC3";
partName = "10000";
ImageFetcher(pageURL, partName);
} start();
//ERR Locator
How about this:
const puppeteer = require("puppeteer");
let testing = async () => {
const browser = await puppeteer.launch({
headless: false
});
const page = await browser.newPage();
await page.goto('https://www.grainger.com/product/NVENT-CADDY-Cushioned-Pipe-Clamp-1RVC3');
const image = await extractLargestImage(page);
return image;
};
async function extractLargestImage(page) {
return page.evaluate(() => {
let imgs = document.querySelectorAll('img');
let largestImgSrc = 'none yet';
let largestImgSize = 0;
for (var img of imgs) {
let imgSize = Number(img.height) * Number(img.width);
if (imgSize > largestImgSize) {
largestImgSize = imgSize;
largestImgSrc = img.src;
}
}
return largestImgSrc;
});
}
testing().then((value) => {
console.dir(value, {'maxArrayLength': null});
});
The page.on is reconized by the async for loop at the bottom as finished and ready to run the function again, but its not actually done. It still needs to run everything up to page.close. How do I let the async function know that it is done after page.close, not page.on? Let me know if you need anymore info, thanks.
const puppeteer = require('puppeteer');
const fs = require('fs');
const req = require('request');
const got = require('got');
const NodeID3 = require('node-id3');
const readline = require('readline');
const selectors = require('./selectors');
const getDownloadUrl = async (url, browser) => {
const page = await browser.newPage();
await page.goto(url);
page.setRequestInterception(true);
await page._client.send('Page.setDownloadBehavior', {behavior: 'allow', downloadPath: './Songs'})
const baseUrl = 'https://cf-hls-media.sndcdn.com/media/';
await page.on('request', async (request) => {
if(request.url().includes(baseUrl)){
const downloadUrl = fixUrl(request.url());
const info = await getSongInfo(page);
downloadSong(downloadUrl, info.title);
await tagSong(info);
await request.abort();
await page.close();
} else {
request.continue();
}
});
};
const fixUrl = (url) => {
...
};
const downloadSong = (url, title) => {
...
};
const getSongInfo = async (page) => {
...
};
const tagSong = async (info) => {
...
};
(() => {
const readInterface = readline.createInterface({
input: fs.createReadStream('../Song Urls.csv'),
output: process.stdout,
console: false,
terminal: false,
});
let urls = [];
readInterface.on('line', function(line) {
urls.push(line);
}).on('close', async () => {
const browser = await puppeteer.launch({headless: false});
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
await getDownloadUrl(url, browser);
}
});
})();
/*
Issue: The loop recognizes that the getDownloadUrl function is done even though it's
not and continues anyways.
*/
await only works with promises, and page.on looks to be a callback-based event listener, not something that returns a promise. If you want to be able to await it, you will need to create a promise around it.
await new Promise((resolve) => {
page.on('request', async (request) => {
if(request.url().includes(baseUrl)){
const downloadUrl = fixUrl(request.url());
const info = await getSongInfo(page);
downloadSong(downloadUrl, info.title);
await tagSong(info);
await request.abort();
await page.close();
resolve();
} else {
request.continue();
}
});
})
I'm trying to read the dom content from indian superleague for example goals, attacking, mins per goal, etc using class and object. It gives an error like this
Evaluation failed: TypeError: Cannot read property 'textContent' of undefined
at puppeteer_evaluation_script:7:64
Here's the code
config.js
const puppeteer = require('puppeteer')
class Puppeteer{
constructor(){
this.param = {
path: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
url: 'https://indiansuperleague.com',
}
}
async connect(){
this.param.browser = await puppeteer.launch({executablePath: this.param.path, headless: false})
this.param.page = await this.param.browser.newPage()
await this.param.page.goto(this.param.url, {timeout: 0})
}
async disconnect(){
await this.param.browser.close()
}
}
module.exports = Puppeteer
states.js
class States{
constructor(param){
this.param = param
}
async fetchData(){
const page = this.param.page
const res = await page.evaluate(() => {
const title = 'si-fkt-sctn-title', value = 'si-fkt-sctn-number'
// const titleArray = document.getElementsByClassName(title)
// const valueArray = document.getElementsByClassName(value)
let key = document.getElementsByClassName(title)[0].textContent.trim()
let num = document.getElementsByClassName(value)[0].textContent.trim()
/* for(let i=0; i<titleArray.length; i++){
key[i] = titleArray[i].textContent.trim()
num[i] = valueArray[i].textContent.trim()
// Object.defineProperty(temp, key, {value:num,writable: true,configurable: true,enumerable: true})
} */
return {key, num}
})
console.log(res)
}
}
module.exports = States
app.js
const Puppeteer = require('./config')
const States = require('./modules/states')
const puppeteer = new Puppeteer()
const states = new States(puppeteer.param)
puppeteer.connect().then(async() => {
let res = await states.fetchData()
console.log(res)
await puppeteer.disconnect()
}).catch(e => console.log(e))
What is the solution?
The elements may be created dynamically after some time. You can try to use page.waitForSelector() before retrieving the data from them. For example:
'use strict';
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto('https://indiansuperleague.com');
await Promise.all([
page.waitForSelector('.si-fkt-sctn-title'),
page.waitForSelector('.si-fkt-sctn-number'),
]);
const data = await page.evaluate(() => {
return [
document.querySelector('.si-fkt-sctn-title').textContent,
document.querySelector('.si-fkt-sctn-number').textContent,
];
});
console.log(data);
await browser.close();
} catch (err) {
console.error(err);
}
})();
Output:
[ ' Goals', ' 63' ]
I have this string which consist of the following :
const string = `describe('Test', () => {
it('found', async () => {
await createUser();
const test = await agent.get(`${prefix}/test`)
});
it('array', async () => {
await createUser();
const test = await agent.get(`${prefix}/test`)
});
});`
When i try to console log this string, i get that ${prefix} is an Unexpected identifier, when i try to replace it with something else, i get the same error.
Use the concatenation between different part of string:
const string = `describe('Test', () => {
it('found', async () => {
await createUser();
const test = await agent.get(`+`${prefix}/test`+`)
});
it('array', async () => {
await createUser();
const test = await agent.get(`+`${prefix}/test`+`)
});
});`
You don't need backtick inside the string.
const string = `describe('Test', () => {
it('found', async () => {
await createUser();
const test = await agent.get(${prefix}/test)
});
it('array', async () => {
await createUser();
const test = await agent.get(${prefix}/test)
});
});`
Add quote marks:
const string = `describe('Test', () => {
it('found', async () => {
await createUser();
const test = await agent.get('${prefix}/test')
});
it('array', async () => {
await createUser();
const test = await agent.get('${prefix}/test')
});
});`