I'm using the below CasperJS script to recursively parse the (multi-page) search results provided by google for the query site:https://www.launchgood.com/project/.
var links = [];
var casper = require('casper').create();
function getLinks() {
var currentLinks = document.querySelectorAll('h3.r a');
return Array.prototype.map.call(currentLinks, function(e) {
rawHref = e.getAttribute('href');
urlPattern = /.*(https?[:/]+[^&]+).*/g;
cleanHref = urlPattern.exec(rawHref);
return cleanHref[1];
});
Array.prototype.push.apply(links, currentLinks);
this.echo(' - ' + currentLinks.join('\n - '));
}
function parseAndContinue() {
links = this.evaluate(getLinks);
// now click 'Next'
if(this.exists('a.fl')) {
this.thenClick('a.fl');
this.then(parseAndContinue);
} else {
this.exit();
}
}
casper.start('http://google.com/ncr', function() {
// search from google form
this.fill('form[action="/search"]',
{ q: 'site:https://www.launchgood.com/project/' }, true);
});
casper.then(parseAndContinue);
casper.run();
This seems to continually search the 2nd page, over and over again in a never-ending loop -- instead of advancing to the next page.
What am I doing wrong?
Your looks fine apart from not printing anything. getLinks is a function that is evaluated in the page context. this refers to the global object, which is window, inside of the page context. You have no access to casper inside of the page context, because it is sandboxed and only primitive objects can be passed in or out. It has no access to variables defined outside of it (no access to links).
function getLinks() {
var currentLinks = document.querySelectorAll('h3.r a');
return Array.prototype.map.call(currentLinks, function(e) {
var rawHref = e.getAttribute('href');
var urlPattern = /.*(https?[:/]+[^&]+).*/g;
var cleanHref = urlPattern.exec(rawHref);
return cleanHref[1];
});
}
function parseAndContinue() {
var links = this.evaluate(getLinks);
console.log(JSON.stringify(links, undefined, 4));
// now click 'Next'
if(this.exists('a.fl')) {
this.thenClick('a.fl');
this.then(parseAndContinue);
} else {
this.exit();
}
}
Additionally, no code after the return statement will be executed.
Please be more careful and don't create global variables left and right.
Related
I have a PHP page that loads two JS files at the end. In the first file I have this...
// global variables
var refineSearchStorage = {};
// function calls
window.addEventListener("load", function() {
refineSearchStorage.get();
});
refineSearchStorage = {
data : null, // empty storage
get : function() {
refineSearchStorage.data = localStorage.getItem("refineSearchStorage");
if(refineSearchStorage.data === null) {
refineSearchStorage.data = { refineKeywords: '' };
refineSearchStorage.save();
}
else {
refineSearchStorage.data = JSON.parse(refineSearchStorage.data);
}
},
add : function(x) {
refineSearchStorage.data.refineKeywords = x;
refineSearchStorage.save();
},
save : function() {
localStorage.setItem("refineSearchStorage", JSON.stringify(refineSearchStorage.data));
}
};
Inline javascript calls Function 1 from the middle of the page. It is created by PHP after a search result...
<script>
window.addEventListener('load', function () {
searchActions('{$keywords_human}');
});
</script>
Function 1 appears in the 2nd JS page and the result is Uncaught TypeError: Cannot set property 'refineKeywords' of null... instead of adding to the localStorage.
function searchActions(x) {
refineSearchStorage.add(x);
}
Function 2 below is called with the click of a button and adds to a localStorage variable without issue. It is also located on the 2nd JS page...
function keywordAdd(y) {
var existingParams = refineSearchStorage.data.refineKeywords;
var param = y.toLowerCase();
var newParams;
newParams = (existingParams + ' ' + param).trim();
refineSearchStorage.add(newParams);
}
Function 1 used to work, but I did something to break it when I split the functions on to different pages. What did I do?
It's because
window.addEventListener('load', function () {
searchActions('{$keywords_human}');
});
gets called before
window.addEventListener("load", function() {
refineSearchStorage.get();
});
That error means that your data attribute is null
(screenshot) See what I mean by that
convert add to
add : function(x) {
refineSearchStorage.data = refineSearchStorage.data || {}; // this might help
refineSearchStorage.data.refineKeywords = x;
refineSearchStorage.save();
},
Whole day I have been trying to get my script working on a specific website.
Basically I want this script to only run on specific domains.
Here is the code:
document.addEventListener("DOMContentLoaded", function(event) {
// URL variables
let fullURL = window.location.host;
let fullURL2 = window.location.host + window.location.pathname;
var timer = setInterval(deletor, 1);
alert(fullURL2);
alert(fullURL);
if (fullURL === "net.adjara.com") {
function deletor() {
timer;
var slider = document.querySelector("#slider-con");
var bannerTop = document.querySelector("#MainContent > div:nth-child(2)")
var bannerMiddle = document.querySelector("#MainContent > iframe");
var bannerRandom = document.querySelector("#MainContent > div:nth-child(3)");
if (slider) {
slider.parentNode.removeChild(slider);
}
if (bannerTop) {
bannerTop.parentNode.removeChild(bannerTop);
}
if (bannerMiddle) {
bannerMiddle.parentNode.removeChild(bannerMiddle);
}
if (bannerRandom) {
bannerRandom.parentNode.removeChild(bannerRandom);
}
console.log("worked!");
};
} else {
console.log("false!");
return false;
}
});
I do get "false" console logged when I go to other site than mentioned, but the script doesn't execute when the domain matches. I even added an alert to see what the value was of the variable and copy-pasted that.
When the function is defined inside the if, the variable will be hoisted, but it will not have a value
console.log(test)
if (new Date()){
function test() {}
}
so you are setting the interval with undefined.
Move the interval after you define the function.
I know the title is a little bit confusion, here is the details:
Say I have a custom object defined in javascript, and there is a public member defined in it:
function Test()
{
this.testArray = [];
}
And I have two methods for this object, one is read out some xml file and filled into the array:
Test.prototype.readXML = function()
{
var self = this;
$.get('assest/xml/mydata.xml', function(d){
$(d).find("item").each(function(){
var item = new Item;
item.ID = ($(this).attr("ID"));
item.body = ($(this).find("body").text());
});
self.testArray.push(item);
});
}
And another function, which will display the content into the HTML page.
Test.prototype.appendInfo = function()
{
var i;
for (i=0; i<testArray.length;i++)
{
$('#testdisplay').append(testArray[i].ID +"<br />");
$('#testdisplay').append(testArray[i].body = "<br /");
}
}
However, the display function continue gives me error that the testArray is not defined. I'm not sure where is the problem, since I put the display function behind the reading function. I expect that the data will be stored in the array and could be accessed anytime I need them.
Hope some one will kindly help me about this! Thank you!
}
}
So I notice two problems with your code.
First when you do your ajax call you need to pass a deferred back to the user. Since ajax calls are async it may not finish right away.
So your readXML function should do this. It should return the jquery get.
Test.prototype.readXML = function() {
var self = this;
return $.get('assest/xml/mydata.xml', function(d){
$(d).find("item").each(function(){
var item = new Item;
item.ID = ($(this).attr("ID"));
item.body = ($(this).find("body").text());
});
self.testArray.push(item);
});
}
Next you your second function append was just missing some context.
Test.prototype.appendInfo = function() {
var i;
for (i=0; i<this.testArray.length;i++) {
$('#testdisplay').append(this.testArray[i].ID +"<br />");
$('#testdisplay').append(this.testArray[i].body = "<br /");
}
}
So your code should look like this.
var mytest = new Test();
mytest.readXML().done(function(){
mytest.appendInfo();
}).fail(function(){
// put some fallback code here
});
Updated:
Added additional this's.
There is no testArray in your appendInfo() function, that's why it says it's not defined. You should use this.testArray instead.
Every time you want to use a variable declared inside your scope, but outside the function you are using, you must use this.yourVariable
How can I match whole JavaScript function on current page? My JavaScript function is:
window.onbeforeunload = function()
{
if( !success )
{
success = true;
alert('Think first! Click CANCEL on the next page to receive a new gift.\n\n');
window.location = 'http://www.mysite.com/';
return '\nClick CANCEL\n\nOver thousands of gifts have been given out, get yours today!\n\n';
}
}
and I want dynamically match and replace this function to:
window.onbeforeunload=function()
{
return ExitPopup();
}
Please help me to resolve this.
can't you do
var newFunction = function()
{
return ExitPopup();
}
and then
window.onbeforeunload = newFunction;
I have a json object retrieved from server in my $(document).ready(...); that has an string that I would like to resolve to a function also defined within $(document).ready(...); so, for example:
$(document).ready(function{
$.getJSON(/*blah*/,function(data){/*more blah*/});
function doAdd(left,right) {
return left+right;
}
function doSub(left,right) {
return left-right;
}
});
with json string:
{"doAdd":{"left":10,"right":20}}
One way I thought about was creating an associative array of the function before loading the json:
var assocArray=...;
assocArray['doAdd'] = doAdd;
assocArray['doSub'] = doSub;
Using eval or window[](); are no good as the function may not be called for some time, basically I want to link/resolve but not execute yet.
Change your JSON to
{method: "doAdd", parameters : {"left":10,"right":20}}
Then do
var method = eval(json.method);
// This doesn't call it. Just gets the pointer
Or (haven't tried this)
var method = this[json.method]
How about something like this?
$(function(){
// Function to be called at later date
var ressolvedFunc = null;
// Ajax call
$.getJSON(/*blah*/,function(data){
// Generate one function from another
ressolvedFunc = (function(data) {
var innerFunc;
var left = data.left;
var right = data.right;
// Detect action
for (action in data) {
if (action == "doAdd")
innerFunc = function() {
return left + right;
};
else
innerFunc = function() {
return left - right;
};
}
return innerFunc;
})(data);
});
});
The anonymous function returns fresh function, with the new values stored within the enclosure. This should allow you to call the function at later date with the data previously retrieved from the GET request.
Rich
try this:
var doX = (function() {
var
data = [],
getDo = function(action) {
for(var d in data) {
if (data[d][action]) {
return data[d];
}
}
return null;
};
return {
set: function(sdata) {
data.push(sdata);
},
doAdd: function() {
var add = getDo("doAdd");
if (!add)
return 0;
return add.doAdd.left + add.doAdd.right;
},
doSub: function() {
var sub = getDo("doSub");
if (!sub)
return 0;
return sub.doAdd.left + sub.doAdd.right;
}
};
})();
$(document).ready(function{
$.getJSON(/*blah*/,function(data){ doX.set(data); });
});