How can I select this (html anchor element to click and navigate to Tutorial page) query using puppeteer ?
I was doing this and it is not working
const puppeteer = require('puppeteer');
const url = process.argv[2];
if (!url) {
throw "Please provide URL as a first argument";
}
async function run() {
const browser = await puppeteer.launch({
headless: false,
defaultViewport: null,
slowMo: 10,
args: ['--start-maximzed', '--disable-notifications']
});
const page = await browser.newPage();
await page.goto(url);
await page.waitForSelector(".python-navigation .navigation.menu .tier-1.element-3 a", {
visible: true
});
await page.click(".python-navigation .navigation.menu .tier-1.element-3 a");
await page.waitForSelector(".documentation-banner .download-buttons ", {
visible: true
});
const elem = await page.$$('.documentation-banner .download-buttons a');
await elem[0].click();
await page.waitForSelector(".contentstable", {
visible: true
});
elem = await page.$$('.contentstable')[0].$$('tbody')[0].$$('tr')[0].$$('td')[0].$$('p')[1];
await elem.click();
await page.pdf({path: 'pdfGenerated.pdf',format:"A4"});
console.log("Success");
browser.close();
}
run();
What should I write instead of this line elem = await page.$$('.contentstable')[0].$$('tbody')[0].$$('tr')[0].$$('td')[0].$$('p')[1]; ?
for click on an element you can use this code:
await page.click(".contentstable tbody tr td p:nth-child(2) a");
Related
I'm trying to download a file using playwright, this file is generated which means the url doesn't ends with a '.pdf' and forcing this link in the url doesn't start the download. I was abble to navigate until the pdf is generated but how can I download it now ?
here is my code:
const {chromium} = require('playwright');
const path = require('path');
(async () => {
const browser = await chromium.launch({
headless: true,
});
const context = await browser.newContext({
acceptDownloads: true
});
const page = await context.newPage();
const downloadPath = path.join(__dirname, 'edt');
await page.goto('https://planif.eppe.ui/jsp/custom/eppee/easyMyPlanning.jsp');
await page.getByLabel('Identifiant :').click();
await page.getByLabel('Identifiant :').fill('myusername');
await page.getByLabel('Identifiant :').press('Tab');
await page.getByLabel('Mot de passe :').fill('mysecretpassword');
await page.getByLabel('Mot de passe :').press('Enter');
await page.getByRole('combobox').selectOption('10');
await page.getByRole('button', { name: 'Submit' }).click();
await page.locator('.footer > a').click();
const page1Promise = page.waitForEvent('popup');
await page.frameLocator('frame[name="link"]').getByRole('link', { name: 'Export PDF...' }).click();
const page1 = await page1Promise;
const downloadPromise = page1.waitForEvent('download');
await page1.getByRole('button', { name: 'Submit' }).click();
const download = await downloadPromise;
await download.saveAs(path.join(downloadPath, 'edt.pdf'));
await browser.close();
})();
with this code I end up having the browser waiting for me to try to download the file how can I automate this ?
ps: with this code and with my browser in non headless mode I end up on this page end if I manualy press the download button it works
I use puppeteer to get data about the store. I search using the p.shop-page-content__text_large, span.shop-list-item__address selectors, but I ran into such a problem that only one of them can be present on the page. I tried to solve the problem in the following way, but it does not work. Tell me how can this be fixed?
const puppeteer = require('puppeteer');
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
async function getData(page, selector) {
return await page.$$eval(selector, info => info.map((data) => {
let str = data.textContent.trim(),
from = str.search(','),
to = str.length;
return {
'COUNTRY': 'unknow',
'STREET' : str.substring(from, to)
}
}));
}
const result = [];
for (let val of cities) {
console.log(val.LINK, val.CITY);
const page = await browser.newPage();
await page.goto('https://www.example-site.ru' + val.LINK);
data = await page.waitForFunction('.shop-page-content').then(async() => {
console.log('ok');
return await getData(page, 'p.shop-page-content__text_large');
}).catch(async (e) => {
console.log('fail');
await page.waitForSelector('.shops-info__section');
return await getData(page, 'span.shop-list-item__address');
// result.push(data);
});
result.push(data);
await browser.close();
}
console.log(result);
It turned out like this:
const browser = await puppeteer.launch({
headless: false,
slowMo: 150,
});
const cities = [{'CITY': 'Town1', 'LINK': '/shops/town1/'}, {'CITY': 'Town2', 'LINK': '/shops/town2/'}];
const page = await browser.newPage();
const result = [];
for (let val of cities) {
await page.goto('https://www.example-site.ru' + val.LINK);
const list = await page.evaluate(() => {
const data = [];
const elements = document.querySelectorAll('p.shop-page-content__text_large').length
? document.querySelectorAll('p.shop-page-content__text_large')
: document.querySelectorAll('span.shop-list-item__address');
for (const element of elements) {
data.push(element.innerText);
}
return data;
});
result.push({
link: val.LINK,
city: val.CITY,
list
})
}
await browser.close();
So, I try to do a command. You need to write '!screenshot" + a custom url. It needs to take that url and take a screenshot of it and send it back to the respective channel. I tried many ways but I cant make it work.
const Discord = require("discord.js");
const fetch = require('node-fetch');
const cheerio = require("cheerio");
const puppeteer = require('puppeteer');
module.exports = {
name: 'screenshot',
description: 'makes a screenshot and sends it back',
usage: '[command name]',
cooldown: 5,
execute(message, args) {
const data = [];
//const url = message.client;
if (message.content.startsWith("!screenshot ")) {
const url = message.content.substr("!screenshot ".length);
};
},
async run(client, message, args) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
//const page = await context.newPage();
//const url = args.join(' '); // or something similar?
await page.goto(url, { waitUntil: 'networkidle2' });
console.log(await page.content());
await page.screenshot({path: 'screenshot.png'});
let screenshot = await page.screenshot();
await browser.close();
message.channel.send("Screenshot", {files: [screenshot]});
}
}
I try get information from a web page using puppeteer, but in I don't to find the selector tha I need, I suppose that's because the page contain more than one documents html and I can't to find the way for to get the data that I need.
the inpection of the page
that´s the code:
const puppeteer = require('puppeteer');
(async ()=>{
const browser = await puppeteer.launch({headless:false});
const page = await browser.newPage();
await page.goto('https://www.arrivia.com/careers/job-openings/');
await page.waitForSelector('.job-search-result');
const data = await page.evaluate(()=>{
const elements = document.querySelectorAll('.job-search-result .job-btn-container a');
vacancies = [];
for(element of elements){
vacancies.push(element.href);
}
return vacancies;
});
console.log(data.length);
const vacancies = [];
for (let i = 0; i <=2; i++){
var urljob = data[i];
await page.goto(data[i]);
await page.waitForSelector(".app-title"); //that´s one of the selectors that I can´t to find
from here I get an error`enter code here`
const jobs = await page.evaluate((urljob)=> {
const job = {};
job.title = document.querySelector(".app-title").innerText;
job.location = document.querySelector(".location").innerText;
job.url = urljob;
return job;close
});
vacancies.push(jobs);
}
console.log(vacancies);
//await page.screenshot({ path: 'xx1.jpg'});
await browser.close()
})();
Iframes are not always the easiest things to deal with, in Puppeteer. But a way to bypass this could be to access directly the URL of the iframe, instead of accessing the page which hosts the iframe. It's also faster:
const puppeteer = require("puppeteer");
(async () => {
const browser = await puppeteer.launch({ headless: false, defaultViewport: null });
const page = await browser.newPage();
await page.goto("https://www.arrivia.com/careers/job-openings/", {
waitUntil: "domcontentloaded",
});
const jobUrls = await page.$$eval(".job-search-result .job-btn-container a",
els => els.map(el => el.href));
const vacancies = [];
for (let i = 0; i < 10; i++) { // don't forget to replace 10 with jobUrls.length later
const url = jobUrls[i];
const jobId = /job_id=(\d+)/.exec(url)[1]; // Extract the ID from the link
await page.goto(
`https://boards.greenhouse.io/embed/job_app?token=${jobId}`, // Go to iframe URL
{ waitUntil: "domcontentloaded" }
);
vacancies.push({
title: await page.$eval(".app-title", el => el.innerText),
location: await page.$eval(".location", el => el.innerText),
url,
});
}
console.log(vacancies);
await browser.close();
})();
Output:
[
{
title: 'Director of Account Management',
location: 'Scottsdale, AZ',
url: 'https://www.arrivia.com/careers/job/?job_id=2529695'
},
{
title: "Site Admin and Director's Assistant",
location: 'Albufeira, Portugal',
url: 'https://www.arrivia.com/careers/job/?job_id=2540303'
},
...
]
I am trying to take screenshots of all the table elements in a web page. Now for someweb pages it is working perfectly fine, I am able to take take pictures. But some websites are not working. I am able to take screenshots but they are mostly white or gray here is the code iam using.
const puppeteer = require('puppeteer');
const jsonfile = require('jsonfile');
const getWebImages = async(pageToGo, link) => {
puppeteer.launch({
args: ['--start-maximized'],
headless: false,
defaultViewport: null
}).then(async (browser) => {
const page = await browser.newPage();
await page.goto(pageToGo, {waitUntil: 'networkidle2', timeout: 60000});
const VIEWPORT = {width: 1366, height: 768 }; // Your default values
boxes2 = [];
const getData = async (link) => {
return page.evaluate(async (link) => {
return await new Promise(resolve => {
var rects = [];
const element = document.querySelectorAll('table');
element.forEach(function (item, index) {
var box = item.getBoundingClientRect();
rects.push({
x: box.left,
y: box.left,
width: box.width,
height: box.height,
id: index
})
})
return resolve(rects);
})
}, link);
}
const getImages = async (rect) => {
for (const item of rect) {
try {
await page.screenshot({
path: 'data\\_table_' + item.id + '.png',
clip: {
x: item.x,
y: item.y,
width: item.width,
height: item.height
}
});
} catch (e) {
console.log(e)
}
}
}
boxes2 = await getData(link);
images = await getImages(boxes2);
console.log(boxes2)
await browser.close();
});
}
getWebImages("https://www.csb.gc.ca/rates/", 11);
I have tried different screen sizes and other things like waiting for everything to load. When i see in the browser, i can clearly see the page loads and after it loads, the screenshots are taken but the images are either just white screens same size as tabel area.
NOTE: Just a note that i also downloaded some of the pages offline and even that is not working.
My problem was that i was setting my viewport after goto(), I changed the code to this;
`const puppeteer = require('puppeteer');
async function run(url) {
let browser = await puppeteer.launch({ headless: true });
let page = await browser.newPage();
const VIEWPORT = { width: 1360, height: 780}
boxes2 = [];
await page.setViewport(VIEWPORT);
await page.goto(url, { waitUntil: 'domcontentloaded'});
await page.waitFor('table');
await page.waitForSelector('table');
const el = await page.$$('table');
for(let i = 0; i < el.length; i++){
// await console.log(el[i].getBoundingClientRect());
await el[i].screenshot({
path: 'link_' +i+ '.png',
})
}
// await processScreens(el, page);
await page.close();
await browser.close();
}