PhantomJS is a headless WebKit browser. I can open a url with this and get content of a page that updates every second.
But I need to get the content of many (100) pages at the same time.
All pages must be opened concurrently and refresh every second.
It's possible for one page, but I don't know how to retrieve from multiple pages at once.
This is the example code from the PhantomJS website:
console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs.org/';
page.open(url, function (status) {
//Page is loaded!
phantom.exit();
});
May I use many PhantomJS instances at one time ? I doesn't seem the best way. Does any body know how to open just one PhantomJS instance and get content from several pages?
Here is the code, I used before to parse the items for the E-shop and putting HTML code for each page of these items
I hope that it will help you!
var RenderUrlsToFile, system, url_string_for_array;
var arrayOfUrls = new Array();
system = require("system");
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
var getFilename, next, page, retrieve, urlIndex, webpage, link_name, sex;
var fs = {};
fs = require('fs');
urlIndex = 0;
webpage = require("webpage");
page = null;
// getFilename = function() {
// return "parsed/" + urlIndex + ".png";
// };
next = function(status, url, file) {
page.close();
callbackPerUrl(status, url, file);
return retrieve();
};
retrieve = function() {
var url;
if (urls.length > 0) {
url = urls.shift();
urlIndex++;
page = webpage.create();
page.viewportSize = {
width: 800,
height: 600
};
page.settings.userAgent = "Phantom.js bot";
return page.open("http://" + url, function(status) {
var file;
// file = getFilename();
if (status === "success") {
return window.setTimeout((function() {
// page.render(file);
var js = page.evaluate(function () {
return document;
});
fs.write('your_file_path'.html', js.all[0].outerHTML, 'w');
return next(status, url, file);
}), 100);
} else {
return next(status, url, file);
}
});
} else {
return callbackFinal();
}
};
return retrieve();
};
if (system.args.length > 1) {
arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
------------MAIN PART OF CODE FOR YOUR QUESTION------
For example: I need to parse the items on the E-shop, so I take the first page and then I set "for" for the exactly numbe of pages
url_string_for_array = "www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page=1";
for(var k=2; k<20; k++)
{
url_string_for_array += ",www.lamoda.ru/c/559/accs-muzhskieaksessuary/?genders=men&page="+k;
}
arrayOfUrls = url_string_for_array.split(',');
}
RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
if (status !== "success") {
return console.log("Unable to render '" + url + "'");
} else {
return console.log("Rendered '" + url + "'");
}
}), function() {
return phantom.exit();
});
Related
I am making a Chrome extension for version 87.0.4280.66.
It's a little fun script library to mess around with apps.
Right now, I'm making a Chrome extension that when you click a button, it adds on to an Instructure link to make it play confetti.
I'm making it check if there are parameters present or not, as well as if it is an Instructure link too.
The problem is when I call the function, it doesn't run it at all.
Here is the popup.js that is responsible for updating the URL to add the parameter confetti=true.
let confetti = document.getElementById('confetti');
const regEx = /https:\/\/cbsd\.instructure\.com\/courses\/([0-9]{5})\/assignments\/([0-9]{6})\?module_item_id=([0-9]{7})|https:\/\/cbsd\.instructure\.com\/courses\/([0-9]{5})\/assignments\/([0-9]{6})/g;
var getParams = function (url) {
var params = {};
var parser = document.createElement('a');
parser.href = url;
var query = parser.search.substring(1);
var vars = query.split('&');
for (var i = 0; i < vars.length; i++) {
var pair = vars[i].split('=');
params[pair[0]] = decodeURIComponent(pair[1]);
}
return params;
};
function error(message)
{
var notification = {
type: "basic",
title: "Error",
iconUrl: "images/error.ico",
message: message
}
chrome.notifications.create(notification);
}
function isCanvas(domain)
{
return regEx.test(domain);
}
function isMod(dom)
{
var parameters = getParams(dom);
if (parameters) {
return(true)
} else {
return(false)
}
}
confetti.onclick = function(element) {
chrome.tabs.query({active: true, lastFocusedWindow: true}, tabs => {
let domain = tabs[0].url;
var canvasCheck = isCanvas(domain)
if (canvasCheck === true) {
error(isMod(domain).toString());
/*/ if (andOrQuestion) {
let newUrl = domain + "&confetti=true";
chrome.tabs.update({url: newUrl});
} else {
let newUrl = domain + "?confetti=true"
chrome.tabs.update({url: newUrl});
} /*/
} else {
// error("You are not on an assignment page!")
}
});
}
Thank you for taking the time to answer this question and have a nice day!
I am trying to develop a chrome extension which parses data from the current tab and post it to a url which does processing on the data. In certain cases the page may need to be redirected so that certain get parameters are present. My popup.js can successfully do the redirect, but I need to click on the extension a second time to get it to run properly. Note: it runs properly if the page has the correct parameters. How can I adjust this so that the code reruns after the redirect and posts the new source to the specified url.
Here is my popup.js:
var url = "";
chrome.runtime.onMessage.addListener(function(request, sender) {
chrome.tabs.query({'active': true, 'lastFocusedWindow': true}, function (tabs) {
url = tabs[0].url;
if (url.search("[?&]view=list") == -1)
{
url = setGetParameter(url,'view','list');
chrome.tabs.update(tabs[0].id,{url:url});
process(request);
}
});
process(request);
});
function process(request) {
if (request.action == "getSource") {
message.innerText = request.source;
var data = new FormData();
data.append('source',request.source);
var xhttp = new XMLHttpRequest();
xhttp.onreadystatechange = function() {
if (xhttp.readyState == XMLHttpRequest.DONE) {
message.innerText = xhttp.responseText;
}
}
xhttp.open("POST","http://127.0.0.1:5000/api/scraper",true);
xhttp.send(data);
}
}
function onWindowLoad() {
var message = document.querySelector('#message');
chrome.tabs.executeScript(null, {
file: "getPagesSource.js"
}, function() {
// If you try and inject into an extensions page or the webstore/NTP you'll get an error
if (chrome.runtime.lastError) {
message.innerText = 'There was an error injecting script : \n' + chrome.runtime.lastError.message;
}
});
}
function setGetParameter(url, paramName, paramValue)
{
var hash = location.hash;
url = url.replace(hash, '');
if (url.indexOf(paramName + "=") >= 0)
{
var prefix = url.substring(0, url.indexOf(paramName));
var suffix = url.substring(url.indexOf(paramName));
suffix = suffix.substring(suffix.indexOf("=") + 1);
suffix = (suffix.indexOf("&") >= 0) ? suffix.substring(suffix.indexOf("&")) : "";
url = prefix + paramName + "=" + paramValue + suffix;
}
else
{
if (url.indexOf("?") < 0)
url += "?" + paramName + "=" + paramValue;
else
url += "&" + paramName + "=" + paramValue;
}
return url + hash;
}
window.onload = onWindowLoad;
Take a look at webRequest.onBeforeRedirect, this event fired when a redirect is about to be executed. You can listen to this event and do your logic again.
Don't forget to declare webRequest permission along with host permissions for any hosts whose network requests you want to access in you manifest.json.
Somehow I keep getting an "error code 5" when trying to set the following right.
What I want to do, is copy an existing file from the assets in android to an accessible spot on the android device to be able to share it across other apps (like mail).
Here is my code example:
window.requestFileSystem = window.requestFileSystem || window.webkitRequestFileSystem;
var storagefolder = cordova.file.dataDirectory;
var storagefolderpointer;
console.log("storage folder: " + storagefolder);
// Check for support.
if (window.requestFileSystem) {
console.log("filesystem beschikbaar");
var getFSfail = function () {
console.log('Could not open filesystem');
};
var getFSsuccess = function(fs) {
var getDIRsuccess = function (dir) {
console.debug('Got dirhandle');
cachedir = dir;
fileurl = fs.root.fullPath + '/' + storagefolder;
storagefolderpointer = dir;
};
var getDIRfail = function () {
console.log('Could not open directory');
};
console.debug('Got fshandle');
FS = fs;
FS.root.getDirectory(storagefolder, {create:true,exclusive:false}, getDIRsuccess, getDIRfail);
};
window.requestFileSystem(LocalFileSystem.PERSISTENT, 0, getFSsuccess, getFSfail);
setTimeout(function() {
console.log("directory beschikbaar");
var suc = function(entry){
var goe = function(){
console.log("copy success");
};
var fou = function(){
console.log("copy NOT NOT success");
};
entry.copyTo(storagefolder, "vcard.vcf", goe, fou);
};
var fai = function(e){
console.log("fail getFile: " + e.code);
};
window.resolveLocalFileSystemURL(storagefolderpointer + "www/visitekaart/vcard.vcf", suc, fai);
}, 1000);
} else {
console.log("filesystem NOT NOT NOT available");
}
have you use cordovaFile plugin instead?,you can use blob to read the content of your files than write a new one on android sdcard using cordovaFile plugin
$cordovaFile.writeFile('appdata/file.txt', blob, 0).then(function(fileEntry) {
//success
}, function(err) {
//err
}
I am trying to load some data from my JSON file using AJAX. The file is called external-file.json. Here is the code, it includes other parts that haven't got to do with the data loading.The part I'm not sure of begins in the getViaAjax funtion. I can't seem to find my error.
function flip(){
if(vlib_front.style.transform){
el.children[1].style.transform = "";
el.children[0].style.transform = "";
} else {
el.children[1].style.transform = "perspective(600px) rotateY(-180deg)";
el.children[0].style.transform = "perspective(600px) rotateY(0deg)";
}
}
var vlib_front = document.getElementById('front');
var el = document.getElementById('flip3D');
el.addEventListener('click', flip);
var word = null; var explanation = null;
var i=0;
function updateDiv(id, content) {
document.getElementById(id).innerHTML = content;
document.getElementById(id).innerHTML = content;
}
updateDiv('the-h',word[i]);
updateDiv('the-p',explanation[i])
function counter (index, step){
if (word[index+step] !== undefined) {
index+=step;
i=index;
updateDiv('the-h',word[index]);
updateDiv('the-p',explanation[index]);
}
}
var decos = document.getElementById('deco');
decos.addEventListener('click', function() {
counter(i,-1);
}, false);
var incos = document.getElementById('inco');
incos.addEventListener('click', function() {
counter(i,+1);
}, false);
function getViaAjax("external-file.json", callback) { // url being the url to external File holding the json
var r = new XMLHttpRequest();
r.open("GET", "external-file.json", true);
r.onload = function() {
if(this.status < 400 && this.status > 199) {
if(typeof callback === "function")
callback(JSON.parse(this.response));
} else {
console.log("err");// server reached but gave shitty status code}
};
}
r.onerror = function(err) {console.log("error Ajax.get "+url);console.log(err);}
r.send();
}
function yourLoadingFunction(jsonData) {
word = jsonData.words;
explanation = jsonData.explanation;
updateDiv('the-h',word[i]);
updateDiv('the-p',explanation[i])
// then call whatever it is to trigger the update within the page
}
getViaAjax("external-file.json", yourLoadingFunction)
As #light said, this:
function getViaAjax("external-file.json", callback) { // url being the url to external File holding the json
var r = new XMLHttpRequest();
r.open("GET", "external-file.json", true);
Should be:
function getViaAjax(url, callback) { // url being the url to external File holding the json
var r = new XMLHttpRequest();
r.open("GET", url, true);
I built up a quick sample that I can share that might help you isolate your issue. Stand this up in a local http-server of your choice and you should see JSON.parse(xhr.response) return a javascript array containing two objects.
There are two files
data.json
index.html
data.json
[{
"id":1,
"value":"foo"
},
{
"id":2,
"value":"bar"
}]
index.html
<html>
<head>
</head>
<body onload="so.getJsonStuffs()">
<h1>so.json-with-ajax</h1>
<script type="application/javascript">
var so = (function(){
function loadData(data){
var list = document.createElement("ul");
list.id = "data-list";
data.forEach(function(element){
var item = document.createElement("li");
var content = document.createTextNode(JSON.stringify(element));
item.appendChild(content);
list.appendChild(item);
});
document.body.appendChild(list);
}
var load = function()
{
console.log("Initializing xhr");
var xhr = new XMLHttpRequest();
xhr.onload = function(e){
console.log("response has returned");
if(xhr.status > 200
&& xhr.status < 400) {
var payload = JSON.parse(xhr.response);
console.log(payload);
loadData(payload);
}
}
var uri = "data.json";
console.log("opening resource request");
xhr.open("GET", uri, true);
xhr.send();
}
return {
getJsonStuffs : load
}
})();
</script>
</body>
</html>
Running will log two Javascript objects to the Dev Tools console as well as add a ul to the DOM containing a list item for every object inside the data.json array
Hi i am downloading selected links using chrome extension but I can't set downloads location. All the urls downloaded to default location of chrome. i know we can't do it because of security reason. can we prompt directory chooser dialog in chrome extension popup from here user can select the Download path.Need any information from my side let me know.
Is this possible at all? Any suggestions on how to go about it?
Thanks in advance
My code
function downloadFile(url, onSuccess,arrayOfUrl,zip) {
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = "blob";
xhr.onreadystatechange = function () {
if (xhr.readyState == 4) {
if (onSuccess)
{
onDownloadComplete(xhr.response, arrayOfUrl,zip)
}
}
}
xhr.send("null");
}
function onDownloadComplete(blobData,urls,zip ){
if (count < urls.length) {
blobToBase64(blobData, function(binaryData){
var fileName = urls[count].substring(urls[count].lastIndexOf('/')+1);
zip.file(fileName+".docx", binaryData, {base64: true});
if (count < urls.length -1){
count++;
downloadFile(urls[count], onDownloadComplete, urls,zip);
}
else {
var content = zip.generate();
var zipName = 'download.zip';
var a = document.createElement('a');
a.href = "data:application/zip;base64," + content;
a.download = zipName;
a.click();
count = 0;
}
});
}
}
popup.js
function onDownloadComplete(blobData,urls,zip ){
if (count < urls.length) {
blobToBase64(blobData, function(binaryData){
// add downloaded file to zip:
var fileName = urls[count].substring(urls[count].lastIndexOf('/')+1);
// zip.file(fileName, binaryData, {base64: true});
zip.file(fileName+".docx", binaryData, {base64: true}); //file"+count+".docx"
if (count < urls.length -1){
count++;
downloadFile(urls[count], onDownloadComplete, urls,zip);
}
else {
chrome.runtime.getBackgroundPage(function () {
zipAndSaveFiles(zip);});
}
});
}
}
**background.js**
function zipAndSaveFiles(zip)
{
var content = zip.generate(zip);
var zipName = 'download.zip';
var dataURL = 'data:application/zip;base64,' + content;
chrome.downloads.download({
url: dataURL,
filename: zipName,
saveAs: true
});
}
Since you are generating and downloading just one ZIP file, you can use the chrome.downloads.download() method. E.g.:
var content = zip.generate();
var zipName = 'download.zip';
var dataURL = 'data:application/zip;base64,' + content;
chrome.downloads.download({
url: dataURL,
filename: zipName,
saveAs: true
});
count = 0;
If you omit the display of a SaveAs dialog, then you can only specify a file name that is inside the user-defined download folder or in a subfolder of it.
Regarding the issue with the popup (see comment below):
You should call the function from your background-page, not the popup. E.g. you could use chrome.runtime.sendMessage/onMessage to pass a message to your background-page:
In background.js:
...
function zipAndSaveFiles(...) { ... }
chrome.runtime.onMessage.addListener(function(msg, sender) {
if ((msg.action === 'zipAndSave')
&& (msg.params !== undefined)) {
zipAndSaveFiles(msg.params);
}
});
In popup.js:
...
chrome.runtime.sendMessage({
action: 'zipAndSave',
params: ['url1', 'url2', 'url3']
});