I'm trying to build an automation for my Linkedin account so that whenever I get a message, I want to do something custom with it.
I'm using Puppeteer and MutationObserver inside page.evaluate call after loading my Linkedin profile with my li_at session cookie.
But it fails to fire an event even when I see the node changing the textContent.
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ devtools: true, headless: false });
const page = await browser.newPage();
await page.setBypassCSP(true);
await page.setDefaultNavigationTimeout(0);
await page.setCookie({
'name': 'li_at',
'value': 'putYourSessionCookieHere',
'domain': '.www.linkedin.com'
})
await page.goto('https://www.linkedin.com', {waitUntil: 'networkidle2'});
await page.exposeFunction('puppeteerMutationListener', puppeteerMutationListener);
await page.evaluate(() => {
const target = document.querySelector('#messaging-nav-item .nav-item__badge-count');
const observer = new MutationObserver((mutationsList) => {
for (const mutation of mutationsList) {
window.puppeteerMutationListener(
mutation.removedNodes[0].textContent,
mutation.addedNodes[0].textContent,
);
}
});
observer.observe(
target,
{ childList: true},
);
});
} catch (err) {
console.error(err);
}
})();
function puppeteerMutationListener(oldValue, newValue) {
console.log(`${oldValue} -> ${newValue}`);
}
To reproduce the issue, you would need:
A Linkedin account
A helpful coworker with a linkedin account up for messaging you OR you can change the textContent of the Node by yourself
Any ideas on why this may be happening?
Related
I want to get live changing data from external website with js puppeteer. Is it possible without reloading this external website evertime?
Here is a small example that logs "Births today" from https://www.worldometers.info/ in real time using MutationObserver (another possible option is monitoring page network activity, but this option usually requires more site analysing; this site seems not using network activity and updates its data with pure JS according to some statistics):
import puppeteer from 'puppeteer';
const browser = await puppeteer.launch();
try {
const [page] = await browser.pages();
await page.goto('https://www.worldometers.info/');
await page.waitForSelector('#c3 span.counter-number');
await page.exposeFunction('logInPuppeteer', (birthsNumber) => {
process.stdout.cursorTo(0);
process.stdout.clearLine(0);
process.stdout.write(`Births today: ${birthsNumber}.`);
});
await page.evaluate(() => {
const target = document.querySelector('#c3 span.counter-number');
const config = { characterData: true, childList: true, subtree: true };
const observer = new MutationObserver(() => {
window.logInPuppeteer(target.innerText);
});
observer.observe(target, config);
});
} catch (err) { console.error(err); }
I'm using Puppeteer.js to crawl some URL. I'm using the default Chromium browser of Puppeteer.All is working well, but the problem is, that when I run the crawling script, and doing other things in the background and the focus is no longer on the Chromium browser of Puppeteer, it's not working: waiting for elements way too long, and abort operations, or in other words: puppeteer is paused (or freeze).
P.S, I'm also using puppeteer-extra and puppeteer-extra-plugin-stealth NPM packages for advance options.
Here is how I create the browser and the page:
async initiateCrawl(isDisableAsserts) {
// Set the browser.
this.isPlannedClose = false;
const browser = await puppeteerExtra.launch({
headless: false,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--start-maximized',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding'
]
});
const pid = browser.process().pid;
browser.on('disconnected', () => {
systemUtils.killProcess(pid);
if (!this.isPlannedClose) {
systemUtils.exit(Status.BROWSER_CLOSE, Color.RED, 0);
}
});
process.on('SIGINT', () => {
this.close(browser, true);
});
// Set the page and close the first empty tab.
const page = await browser.newPage();
const pages = await browser.pages();
if (pages.length > 1) {
await pages[0].close();
}
await page.setRequestInterception(true);
await page.setJavaScriptEnabled(false);
await page.setDefaultNavigationTimeout(this.timeout);
page.on('request', (request) => {
if (isDisableAsserts && ['image', 'stylesheet', 'font', 'script'].indexOf(request.resourceType()) !== -1) {
request.abort();
} else {
request.continue();
}
});
return {
browser: browser,
page: page
};
}
I already looked at:
https://github.com/puppeteer/puppeteer/issues/3339
https://github.com/GoogleChrome/chrome-launcher/issues/169
https://www.gitmemory.com/issue/GoogleChrome/puppeteer/3339/530620329
Not working solutions:
const session = await page.target().createCDPSession();
await session.send('Page.enable');
await session.send('Page.setWebLifecycleState', {state: 'active'});
const chromeArgs = [
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding'
];
var ops = {args:[
'--kiosks',
'--disable-background-timer-throttling',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-canvas-aa',
'--disable-2d-canvas-clip-aa',
'--disable-gl-drawing-for-tests',
'--disable-dev-shm-usage',
'--no-zygote',
'--use-gl=desktop',
'--enable-webgl',
'--hide-scrollbars',
'--mute-audio',
'--start-maximized',
'--no-first-run',
'--disable-infobars',
'--disable-breakpad',
'--user-data-dir='+tempFolder,
'--no-sandbox',
'--disable-setuid-sandbox'
], headless: false, timeout:0 };
puppeteer = require('puppeteer');
browser = await puppeteer.launch(ops);
page = await browser.newPage();
Has anyone faced this issue before and have any idea how to solve this? Thanks.
My issue was solved when I updated to the latest puppeteer version (9.0.0).
Hello I try to make a screenshot with Playwright but I have cookie EU law popup on my screenshots. How can I remove them ?
Here is my browser parameters.
const browser = await playwright.firefox.launch({
headless: true,
firefoxUserPrefs: {
"network.cookie.cookieBehavior": 2
}
});
But it don't work.
Thank for your help.
Use the playwright API to click the element. I'm using the text selector in the example below, but you can use any selector.
const { webkit } = require('playwright');
(async() => {
const browser = await webkit.launch({ headless: false });
const page = await browser.newPage();
await page.goto('https://npmjs.com');
await page.click('text=Accept');
await page.screenshot({ path: 'screenshot.png' });
await browser.close();
})();
Before I start the question, I am new in JavaScript, and I have very basic knowledge of async js, but i need to solve this so i can have my first project functional.
I am trying to build a scraping app using Node and Puppeteer. Basically, the user enters a URL ("link" in the code below), puppeteer goes trough the website code, tries to find the specific piece and returns the data. That part I got working so far.
The problem is when a user enters a URL of a site that doesn't have that piece of code. In that case, I get UnhandledPromiseRejectionWarning: Error: Evaluation failed theme is not defined
What do I do so when there is an error like that, I can catch it and redirect the page instead of Getting Internal Server error.
app.post("/results", function(req, res) {
var link = req.body.link;
(async link => {
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name');
await browser.close()
return data
})(link)
.then(data => {
res.render("index", {data: data, siteUrl: link});
})
})
You can extend the async part to the whole route handler and do whatever you want on catch:
app.post('/results', async (req, res) => {
try {
const link = req.body.link
const browser = await puppeteer.launch({ args: ['--no-sandbox'] })
const page = await browser.newPage()
await page.goto(link, { waitUntil: 'networkidle2'})
const data = await page.evaluate('theme.name')
await browser.close()
res.render("index", {data: data, siteUrl: link})
} catch(e) {
// redirect or whatever
res.redirect('/')
}
});
I'm trying to make a InstagramBot that logs in and then go to some profile, my code worked yesterday for awhile and than it just stopped working .
I've tried to clone my repository from github, but it does'n work either, sometimes it works again, but if I try to create another function, the code just ignore the line of the code that changes the page.
I've also tried to create a new page and then in this new page use the goto function and it worked, but the account doesn keep logged in
The version of puppeteer that I'm using: 1.16.0
The version of node.js that I'm using: v10.15.3
const puppeteer = require('puppeteer');
const BASE_URL = "https://www.instagram.com/accounts/login/?hl=en&source=auth_switcher";
const instagram = {
browser: null,
page: null,
profile_url: null,
initialize: async (profile) => {
instagram.browser = await puppeteer.launch({
headless: false
})
instagram.profile_url = await "https://www.instagram.com/" + profile;
instagram.page = await instagram.browser.newPage();
await instagram.page.goto(BASE_URL, {waitUntil: 'networkidle2'});
},
login: async(username, password) =>{
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username);
await instagram.page.type('input[name="password"', password);
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
await console.log(instagram.profile_url);
await instagram.page.goto(instagram.profile_url, {timeout: 0, waitUntil: 'domcontentloaded'}); // the code just ignore this line
await instagram.page.waitFor(1000);
},
getPhotosLinks: async() => {
console.log("Do something here");
}
}
module.exports = instagram;
It doesn't give any error message, just doesn't work
Replace
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
with
await Promise.all([
instagram.page.click('button[type="submit"]');,
instagram.page.waitForNavigation()
]);
and see if it works