I've accessed the spotify website (open.spotify.com) through playwright fine, but when i manually navigate around trying to play a song and click play nothing happens. I've tried both Firefox and Chromium and it seems that they meet the version requirement for spotify. So im not sure what is going wrong.
const baseURL = 'https://open.spotify.com/'
const playwright = require('playwright')
const browserType = ['firefox']
function sleep(ms) {
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
}
async function login(email, password, proxy) {
const browser = await playwright[browserType].launch({headless:false})
/*const browser = await playwright[browserType].launch({
headless: false,
proxy: {
server: proxy,
},
});*/
const page = await browser.newPage();
await page.goto('https://accounts.spotify.com/en/login?continue=https%3A%2F%2Fopen.spotify.com%2F');
let emailField = page.getByPlaceholder('Email address or username')
let passwordField = page.getByPlaceholder('Password')
await emailField.type(email, {delay: 5})
await passwordField.type(password, {delay: 5})
let loginButton = page.getByTestId('login-button')
await loginButton.click()
}
Related
I'm trying to scrape video url of Instagram videos using puppeteer but unable to do it. it is returning null as a response
here is my code
async function getVideo(){
const launch = await puppeteer.launch({headless: true});
const page = await launch.newPage();
await page.goto('https://www.instagram.com/p/CfW5u5UJmny/?hl=en');
const video = await page.evaluate(() => {
return document.querySelector('video').src;
});
console.log(video); returns null
await launch.close();
}
example ur: https://instagram.fluh1-1.fna.fbcdn.net/v/t50.16885-16/290072800_730588251588660_5005285215058589375_n.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLjcyMC5pZ3R2LmJhc2VsaW5lIiwicWVfZ3JvdXBzIjoiW1wiaWdfd2ViX2RlbGl2ZXJ5X3Z0c19vdGZcIl0ifQ&_nc_ht=instagram.fluh1-1.fna.fbcdn.net&_nc_cat=100&_nc_ohc=ROJWkaOqkQcAX_z-_Ls&edm=AP_V10EBAAAA&vs=440468611258459_2442386419&_nc_vs=HBksFQAYJEdPQW9TaEUwaURaVmQ1Z0NBTC0yRkV0aVdIWkZidlZCQUFBRhUAAsgBABUAGCRHTEdvVHhGMWFjUUpsMzhDQUZNT0c1cV8wT3c1YnZWQkFBQUYVAgLIAQAoABgAGwGIB3VzZV9vaWwBMRUAACaa%2BO%2FYnLPeQBUCKAJDMywXQCDdsi0OVgQYEmRhc2hfYmFzZWxpbmVfMV92MREAdewHAA%3D%3D&ccb=7-5&oh=00_AfCBrACQlXOqmbGSWRk_6Urv_fmHJUFDIt-8w6EO0_UcHQ&oe=638D6CBD&_nc_sid=4f375e
You are loading the Instagram page. Since it takes a little while to load, I used setTimeout function to wait. Puppeteer also has many inbuilt functions you can use to obtain the src, such as the following.
async function getVideo(){
const launch = await puppeteer.launch({headless: false});
const page = await launch.newPage();
await page.goto('https://www.instagram.com/p/CfW5u5UJmny/?hl=en');
setTimeout(async () => {
let src = await page.$eval("video", n => n.getAttribute("src"))
console.log(src);
await launch.close();
}, 1000)
}
I am trying to build a scraper to monitor web projects automatically.
So far so good, the script is running, but now I want to add a feature that automatically analyses what libraries I used in the projects. The most powerful script for this job is wappalyser. They have a node package (https://www.npmjs.com/package/wappalyzer) and it's written that you can use it combined with pupperteer.
I managed to run pupperteer and to log the source code of the sites in the console, but I don't get the right way to pass the source code to the wappalyzer analyse function.
Do you guys have a hint for me?
I tryed this code but a am getting a TypeError: url.split is not a function
function getLibarys(url) {
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url);
// get source code with puppeteer
const html = await page.content();
const wappalyzer = new Wappalyzer();
(async function () {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(page, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
await browser.close()
})()
}
Fixed it by using the sample code from wappalyzer.
function getLibarys(url) {
const Wappalyzer = require('wappalyzer');
const options = {
debug: false,
delay: 500,
headers: {},
maxDepth: 3,
maxUrls: 10,
maxWait: 5000,
recursive: true,
probe: true,
proxy: false,
userAgent: 'Wappalyzer',
htmlMaxCols: 2000,
htmlMaxRows: 2000,
noScripts: false,
noRedirect: false,
};
const wappalyzer = new Wappalyzer(options)
;(async function() {
try {
await wappalyzer.init()
// Optionally set additional request headers
const headers = {}
const site = await wappalyzer.open(url, headers)
// Optionally capture and output errors
site.on('error', console.error)
const results = await site.analyze()
console.log(JSON.stringify(results, null, 2))
} catch (error) {
console.error(error)
}
await wappalyzer.destroy()
})()
}
I do not know if you still need an answer to this. But this is what a wappalyzer collaborator told me:
Normally you'd run Wappalyzer like this:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
await wappalyzer.init() // Launches a Puppeteer instance
const site = await wappalyzer.open(url)
If you want to use your own browser instance, you can skip wappalyzer.init() and assign the instance to wappalyzer.browser:
const Wappalyzer = require('wappalyzer')
const wappalyzer = new Wappalyzer()
wappalyzer.browser = await puppeteer.launch() // Use your own Puppeteer launch logic
const site = await wappalyzer.open(url)
You can find the discussion here.
Hope this helps.
I'm trying to build an automation for my Linkedin account so that whenever I get a message, I want to do something custom with it.
I'm using Puppeteer and MutationObserver inside page.evaluate call after loading my Linkedin profile with my li_at session cookie.
But it fails to fire an event even when I see the node changing the textContent.
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch({ devtools: true, headless: false });
const page = await browser.newPage();
await page.setBypassCSP(true);
await page.setDefaultNavigationTimeout(0);
await page.setCookie({
'name': 'li_at',
'value': 'putYourSessionCookieHere',
'domain': '.www.linkedin.com'
})
await page.goto('https://www.linkedin.com', {waitUntil: 'networkidle2'});
await page.exposeFunction('puppeteerMutationListener', puppeteerMutationListener);
await page.evaluate(() => {
const target = document.querySelector('#messaging-nav-item .nav-item__badge-count');
const observer = new MutationObserver((mutationsList) => {
for (const mutation of mutationsList) {
window.puppeteerMutationListener(
mutation.removedNodes[0].textContent,
mutation.addedNodes[0].textContent,
);
}
});
observer.observe(
target,
{ childList: true},
);
});
} catch (err) {
console.error(err);
}
})();
function puppeteerMutationListener(oldValue, newValue) {
console.log(`${oldValue} -> ${newValue}`);
}
To reproduce the issue, you would need:
A Linkedin account
A helpful coworker with a linkedin account up for messaging you OR you can change the textContent of the Node by yourself
Any ideas on why this may be happening?
I'm practicing with headless browser and I plan to make a little viewerbot. The goal would be to be able to put a site where a stream is broadcasted and to be able to choose a number of viewers to send on the stream with the possibility to increase the number or to reduce it without relaunching the app.
Currently I have some problems with the use of puppeteer-cluster.
1/ I can't find a way to handle the number of active tasks at the same time, how to add or remove at any time. That is to say my number of viewers in this case. Would Puppeteer be better than Puppeteer-cluster for my use?
2/ When the tasks of the cluster go live if I have a timeout problem on a single task it's all the others that crash as well. How can I fix that?
3/ Once the task is launched how to make sure that it never ends, that the viewer is on the page without being detected AFK or that the task is finished.
const {Cluster} = require('puppeteer-cluster');
const vanillaPuppeteer = require('puppeteer')
const {addExtra} = require('puppeteer-extra')
const Stealth = require('puppeteer-extra-plugin-stealth')
async function main() {
const puppeteer = addExtra(vanillaPuppeteer)
puppeteer.use(Stealth())
let viewers = 3;
let live = 'https://a-live-stream.com';
const browserArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-infobars'
];
const proxies = [
'proxy:port',
'proxy:port',
'proxy:port',
];
let perBrowserOptions = [];
for (let i = 0; i < viewers; i++) {
perBrowserOptions = [...perBrowserOptions, {args: browserArgs.concat(['--proxy-server=' + proxies[i]])}]
}
const cluster = await Cluster.launch({
puppeteerOptions: {
headless: false,
args: browserArgs,
executablePath: 'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe'
},
monitor: false,
puppeteer,
concurrency: Cluster.CONCURRENCY_BROWSER,
maxConcurrency: viewers,
perBrowserOptions: perBrowserOptions
});
cluster.on('taskerror', (err, data) => {
console.log(`Error crawling ${data}: ${err.message}`);
});
const viewer = async ({page, data: url}) => {
await page.goto(url, {waitUntil: 'networkidle2'})
const element = await page.$('iframe')
await element.click()
console.log('#Viewer live')
await page.waitFor(3000000)
console.log('#Closed')
};
cluster.queue(live, viewer)
cluster.queue(live, viewer)
cluster.queue(live, viewer)
await cluster.idle()
await cluster.close()
}
main().catch(console.warn)
I'm trying to make a InstagramBot that logs in and then go to some profile, my code worked yesterday for awhile and than it just stopped working .
I've tried to clone my repository from github, but it does'n work either, sometimes it works again, but if I try to create another function, the code just ignore the line of the code that changes the page.
I've also tried to create a new page and then in this new page use the goto function and it worked, but the account doesn keep logged in
The version of puppeteer that I'm using: 1.16.0
The version of node.js that I'm using: v10.15.3
const puppeteer = require('puppeteer');
const BASE_URL = "https://www.instagram.com/accounts/login/?hl=en&source=auth_switcher";
const instagram = {
browser: null,
page: null,
profile_url: null,
initialize: async (profile) => {
instagram.browser = await puppeteer.launch({
headless: false
})
instagram.profile_url = await "https://www.instagram.com/" + profile;
instagram.page = await instagram.browser.newPage();
await instagram.page.goto(BASE_URL, {waitUntil: 'networkidle2'});
},
login: async(username, password) =>{
await instagram.page.waitFor(1000);
await instagram.page.type('input[name="username"]', username);
await instagram.page.type('input[name="password"', password);
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
await console.log(instagram.profile_url);
await instagram.page.goto(instagram.profile_url, {timeout: 0, waitUntil: 'domcontentloaded'}); // the code just ignore this line
await instagram.page.waitFor(1000);
},
getPhotosLinks: async() => {
console.log("Do something here");
}
}
module.exports = instagram;
It doesn't give any error message, just doesn't work
Replace
await instagram.page.click('button[type="submit"]');
await instagram.page.waitFor(1500);
with
await Promise.all([
instagram.page.click('button[type="submit"]');,
instagram.page.waitForNavigation()
]);
and see if it works