variables transaction in phantomjs, scraping web page - javascript

I need to scrape url's from one page, I have made this loop using phantomjs. But it isn't working and I don't know why.
function() {
var f = fs.open('parse.txt', 'a');
for (var x = 0; x <= 15; x++) {
var hrefs = page.evaluate(function(x) {
return $('.login').eq(x).attr('href');
}, 'hrefs');
f.write(hrefs + '\r\n');;
}
f.close();
}
I have tried to do this with an array, but it failed also.
var array = [];
page.evaluate(function(array){
for (var z = 0; z<=15; z++) {
array.push($('.login').eq(z).attr('href'));
}
}, array);
console.log(array.length); // 0

Here's what worked for me.
// var webpage = require('webpage');
var page = require('webpage').create();
var fs = require('fs');
var system = require('system');
var address = "https://jquery.org";
console.log("Opening page : " + address);
// var page = webpage.create();
page.open(address, function(status) {
console.log('Status? '+status);
if ( status !== 'success') {
console.log("Failed to load the address...");
phantom.exit();
}
var f = fs.open('parse.txt', 'a');
for (var x = 0; x<=15; x++ ) {
var href = page.evaluate(function(x) {
return $('a').eq(x).attr('href');
}, x);
console.log(href);
f.write(href + '\r\n');
}
f.close();
phantom.exit();
});
Some notes.
In the first example, you didn't need to remove "x" from function x, you needed to pass x into page.evaluate as the variable that contained the value to be passed to your function once it was in the browser.
i.e.
page.evaluate(function(x) {...}, x);
instead of
page.evaluate(function(x){...}, hrefs)
and definitely not
page.evaluate(function() { return x; })
given that understanding the appropriate way to accomplish the second example is :
var array = page.evaluate(function() {
var result = [];
for (var z = 0; z<=15; z++) {
result.push($('.login').eq(z).attr('href'));
}
return result;
});

Related

Swapping JSON for Firebase

I want to make a form for a live D3 sankey diagram Link to source code here but I want to use firebase not JSON.
// load the data
d3.json("ConTech.json", function(error, graph) {
var nodeMap = {};
graph.nodes.forEach(function(x) { nodeMap[x.name] = x; });
graph.links = graph.links.map(function(x) {
return {
source: nodeMap[x.source],
target: nodeMap[x.target],
value: x.value
};
});
sankey
.nodes(graph.nodes)
.links(graph.links)
.layout(32);
The above is what I need to edit to refer to my firebase database.
This is my data from my form in firebase
This is my attempt to extract the data into an array and seems to give me what I need.
function gotData(data) {
//console.log(data.val());
var links = data.val();
var nodes = data.val()
var lkeys = Object.keys(links);
var nkeys = Object.keys(nodes);
//console.log(keys);
for (var i = 0; i < lkeys.length; i++) {
var k = lkeys[i];
var source = links[k].source;
var target = links[k].target;
if (source !== undefined){
slist.push(source);
}
if (target !== undefined){
tlist.push(target);
}
//console.log(source, target);
}
for (var x = 0; x < nkeys.length; x++) {
var kn = nkeys[x];
var name = nodes[kn].name;
if (name !== undefined){
nlist.push(name);
}
//console.log(name);
}
//console.log(slist, tlist, nlist);
//console.log(source, target, name);
}
function errData(err) {
console.log('broke!');
console.log('err');
}
function getData () {
console.log(slist, tlist, nlist);
}
However im struggling to swap
"d3.json("ConTech.json", function(error, graph)"
for the right implementaiton.

Google script - parse HTML from Website Forum - and Write Data to Sheet

I'm getting HTML from a forum url, and parsing the post count of the user from their profile page. I don't know how to write the parsed number into the Google spreadsheet.
It should go account by account in column B till last row and update the column A with count.
The script doesn't give me any errors, but it doesn't set the retrieved value into the spreadsheet.
function msg(message){
Browser.msgBox(message);
}
function onOpen() {
var ui = SpreadsheetApp.getUi();
ui.createMenu("Update")
.addItem('Update Table', 'updatePosts')
.addToUi();
}
function getPostCount(profileUrl){
var html = UrlFetchApp.fetch(profileUrl).getContentText();
var sliced = html.slice(0,html.search('Posts Per Day'));
sliced = sliced.slice(sliced.search('<dt>Total Posts</dt>'),sliced.length);
postCount = sliced.slice(sliced.search("<dd> ")+"<dd> ".length,sliced.search("</dd>"));
return postCount;
}
function updatePosts(){
if(arguments[0]===false){
showAlert = false;
} else {
showAlert=true;
}
var spreadSheet = SpreadsheetApp.getActiveSpreadsheet();
var accountSheet = spreadSheet.getSheetByName("account-stats");
var statsLastCol = statsSheet.getLastColumn();
var accountCount = accountSheet.getLastRow();
var newValue = 0;
var oldValue = 0;
var totalNewPosts = 0;
for (var i=2; i<=accountCount; i++){
newValue = parseInt(getPostCount(accountSheet.getRange(i, 9).getValue()));
oldValue = parseInt(accountSheet.getRange(i, 7).getValue());
totalNewPosts = totalNewPosts + newValue - oldValue;
accountSheet.getRange(i, 7).setValue(newValue);
statsSheet.getRange(i,statsLastCol).setValue(newValue-todaysValue);
}
if(showAlert==false){
return 0;
}
msg(totalNewPosts+" new post found!");
}
function valinar(needle, haystack){
haystack = haystack[0];
for (var i in haystack){
if(haystack[i]==needle){
return true;
}
}
return false;
}
The is the first time I'm doing something like this and working from an example from other site.
I have one more question. In function getPostCount I send the function profileurl. Where do I declare that ?
Here is how you get the URL out of the spreadsheet:
function getPostCount(profileUrl){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var thisSheet = ss.getSheetByName("List1");
var getNumberOfRows = thisSheet.getLastRow();
var urlProfile = "";
var sliced = "";
var A_Column = "";
var arrayIndex = 0;
var rngA2Bx = thisSheet.getRange(2, 2, getNumberOfRows, 1).getValues();
for (var i = 2; i < getNumberOfRows + 1; i++) { //Start getting urls from row 2
//Logger.log('count i: ' + i);
arrayIndex = i-2;
urlProfile = rngA2Bx[arrayIndex][0];
//Logger.log('urlProfile: ' + urlProfile);
var html = UrlFetchApp.fetch(urlProfile).getContentText();
sliced = html.slice(0,html.search('Posts Per Day'));
var postCount = sliced.slice(sliced.search("<dd> ")+"<dd> ".length,sliced.search("</dd>"));
sliced = sliced.slice(sliced.search('<dt>Total Posts</dt>'),sliced.length);
postCount = sliced.slice(sliced.search("<dd> ")+"<dd> ".length,sliced.search("</dd>"));
Logger.log('postCount: ' + postCount);
A_Column = thisSheet.getRange(i, 1);
A_Column.setValue(postCount);
};
}
You're missing var in front of one of your variables:
postCount = sliced.slice(sliced.search("<dd> ")+"<dd> ".length,sliced.search("</dd>"));
That won't work. Need to put var in front. var postCount = ....
In this function:
function updatePosts(){
if(arguments[0]===false){
showAlert = false;
} else {
showAlert=true;
}
There is no array named arguments anywhere in your code. Where is arguments defined and how is it getting any values put into it?

Get anchor tag values in jQuery

I have a html tag like this.
<a class="employee_details" target="_blank" href="index1.php?name=user1&id=123">User</a>
I need to get the two parameter values in jquery
<script type="text/javascript">
$(function () {
$('.employee_details').click(function () {
var status_id = $(this).attr('href').split('name');
alert(status_id[0]);
});
});
</script>
Any help in getting both the parameter values in two variables in javascript.
I want to get user1 and 123 in two variables using jQuery
Thanks
Kimz
You can use URLSearchParams as a most up-to-date and modern solution:
let href = $(this).attr('href');
let pars = new URLSearchParams(href.split("?")[1]);
console.log(pars.get('name'));
Supported in all modern browsers and no jQuery needed!
Original answer:
Try this logic:
var href = $(this).attr('href');
var result = {};
var pars = href.split("?")[1].split("&");
for (var i = 0; i < pars.length; i++)
{
var tmp = pars[i].split("=");
result[tmp[0]] = tmp[1];
}
console.log(result);
So you'll get the parameters as properties on result object, like:
var name = result.name;
var id = result.id;
Fiddle.
An implemented version:
var getParams = function(href)
{
var result = {};
var pars = href.split("?")[1].split("&");
for (var i = 0; i < pars.length; i++)
{
var tmp = pars[i].split("=");
result[tmp[0]] = tmp[1];
}
return result;
};
$('.employee_details').on('click', function (e) {
var params = getParams($(this).attr("href"));
console.log(params);
e.preventDefault();
return false;
});
Fiddle.
$(function() {
$('.employee_details').on("click",function(e) {
e.preventDefault(); // prevents default action
var status_id = $(this).attr('href');
var reg = /name=(\w+).id=(\w+)/g;
console.log(reg.exec(status_id)); // returns ["name=user1&id=123", "user1", "123"]
});
});
// [0] returns `name=user1&id=123`
// [1] returns `user1`
// [2] returns `123`
JSFiddle
NOTE: Better to use ON method instead of click
Not the most cross browser solution, but probably one of the shortest:
$('.employee_details').click(function() {
var params = this.href.split('?').pop().split(/\?|&/).reduce(function(prev, curr) {
var p = curr.split('=');
prev[p[0]] = p[1];
return prev;
}, {});
console.log(params);
});
Output:
Object {name: "user1", id: "123"}
If you need IE7-8 support this solution will not work, as there is not Array.reduce.
$(function () {
$('.employee_details').click(function () {
var query = $(this).attr('href').split('?')[1];
var vars = query.split('&');
for (var i = 0; i < vars.length; i++) {
var pair = vars[i].split('=');
var varName = decodeURIComponent(pair[0]);
var varValue = decodeURIComponent(pair[1]);
if (varName == "name") {
alert("name = " + varValue);
} else if (varName == "id") {
alert("id = " + varValue);
}
}
});
});
It's not very elegant, but here it is!
var results = new Array();
var ref_array = $(".employee_details").attr("href").split('?');
if(ref_array && ref_array.length > 1) {
var query_array = ref_array[1].split('&');
if(query_array && query_array.length > 0) {
for(var i = 0;i < query_array.length; i++) {
results.push(query_array[i].split('=')[1]);
}
}
}
In results has the values. This should work for other kinds of url querys.
It's so simple
// function to parse url string
function getParam(url) {
var vars = [],hash;
var hashes = url.slice(url.indexOf('?') + 1).split('&');
for (var i = 0; i < hashes.length; i++) {
hash = hashes[i].split('=');
vars.push(hash[0]);
vars[hash[0]] = hash[1];
}
return vars;
}
// your code
$(function () {
$('.employee_details').click(function (e) {
e.preventDefault();
var qs = getParam($(this).attr('href'));
alert(qs["name"]);// user1
var status_id = $(this).attr('href').split('name');
});
});

jquery html(array) doesn't insert all items in array

When I run the javascript code below, it load specified amount of images from Flickr.
By var photos = photoGroup.getPhotos(10) code, I get 10 images from cache.
Then, I can see the object has exactly 10 items by checking console.log(photos);
But actual image appeared on the page is less than 10 items...
I have no idea why this work this way..
Thank you in advance.
<html>
<head>
<script src="http://code.jquery.com/jquery-2.1.0.min.js"></script>
<script>
var PhotoGroup = function(nativePhotos, callback) {
var _cache = new Array();
var numberOfPhotosLoaded = 0;
var containerWidth = $("#contents").css('max-width');
var containerHeight = $("#contents").css('max-height');
$(nativePhotos).each(function(key, photo) {
$("<img src='"+"http://farm" + photo["farm"] + ".staticflickr.com/" + photo["server"] + "/" + photo["id"] + "_" + photo["secret"] + "_b.jpg"+"'/>")
.attr("alt", photo['title'])
.attr("data-cycle-title", photo['ownername'])
.load(function() {
if(this.naturalWidth >= this.naturalHeight) {
$(this).attr("width", containerWidth);
} else {
$(this).attr("height", containerHeight);
}
_cache.push(this);
if(nativePhotos.length == ++numberOfPhotosLoaded)
callback();
})
});
var getRandom = function(max) {
return Math.floor((Math.random()*max)+1);
}
this.getPhotos = function(numberOfPhotos) {
var photoPool = new Array();
var maxRandomNumber = _cache.length-1;
while(photoPool.length != numberOfPhotos) {
var index = getRandom(maxRandomNumber);
if($.inArray(_cache[index], photoPool))
photoPool.push(_cache[index]);
}
return photoPool;
}
}
var Contents = function() {
var self = this;
var contentTypes = ["#slideShowWrapper", "#video"];
var switchTo = function(nameOfContent) {
$(contentTypes).each(function(contentType) {
$(contentType).hide();
});
switch(nameOfContent) {
case("EHTV") :
$("#video").show();
break;
case("slideShow") :
$("#slideShowWrapper").show();
break;
default :
break;
}
}
this.startEHTV = function() {
switchTo("EHTV");
document._video = document.getElementById("video");
document._video.addEventListener("loadstart", function() {
document._video.playbackRate = 0.3;
}, false);
document._video.addEventListener("ended", startSlideShow, false);
document._video.play();
}
this.startSlideShow = function() {
switchTo("slideShow");
var photos = photoGroup.getPhotos(10)
console.log(photos);
$('#slideShow').html(photos);
}
var api_key = '6242dcd053cd0ad8d791edd975217606';
var group_id = '2359176#N25';
var flickerAPI = 'http://api.flickr.com/services/rest/?jsoncallback=?';
var photoGroup;
$.getJSON(flickerAPI, {
api_key: api_key,
group_id: group_id,
format: "json",
method: "flickr.groups.pools.getPhotos",
}).done(function(data) {
photoGroup = new PhotoGroup(data['photos']['photo'], self.startSlideShow);
});
}
var contents = new Contents();
</script>
</head>
<body>
<div id="slideShow"></div>
</body>
</html>
I fix your method getRandom() according to this article, and completely re-write method getPhotos():
this.getPhotos = function(numberOfPhotos) {
var available = _cache.length;
if (numberOfPhotos >= available) {
// just clone existing array
return _cache.slice(0);
}
var result = [];
var indices = [];
while (result.length != numberOfPhotos) {
var r = getRandom(available);
if ($.inArray(r, indices) == -1) {
indices.push(r);
result.push(_cache[r]);
}
}
return result;
}
Check full solution here: http://jsfiddle.net/JtDzZ/
But this method still slow, because loop may be quite long to execute due to same random numbers occurred.
If you care about performance, you need to create other stable solution. For ex., randomize only first index of your images sequence.

passing page scraped data in the URL

In my Chrome extension, I'm trying to scrape information from the current tab (in content.js) and send it as parameter to the provided URL (background.js). It seems like I can scrape everything from the tab and append it to the URL except the values of input tags. Here's my code:
content.js:
var elements = new Array("form","h1","input","td","textarea","time","title","var");
//declare an array for found elements
var foundElements = new Array();
//declare an array for found ids
var foundIds = new Array();
//this counter is used to hold positions in the element array.
var elementCounter = 0;
//this counter is used to hold positions in the foundIds array
var idsCounter = 0;
//this counter is used to hold positions in the classCounter array.
var classCounter = 0;
//and we're going to output everything in a giantic string.
var output = "URL=" + document.URL;
//scrape the page for all elements
for (var i = 0; i < elements.length; i++)
{
var current = document.getElementsByTagName(elements[i]);
if(current.length>0)
{
for (var z=0; z<current.length; z++)
{
var inTxt = current[z].innerText;
output += "&" + elements[i] + "=" + inTxt;
}
elementCounter++;
//now that we have an array of a tag, check it for IDs and classes.
for (var y = 0; y<current.length; y++)
{
//check to see if the element has an id
if(current[y].id)
{
//these should be unique
var hit = false;
for (var x = 0; x<foundIds.length; x++)
{
if(foundIds[x]==current[y].id)
{
hit=true;
}
}
//if there was no hit...
if(!hit)
{
foundIds[idsCounter]=current[y].id;
idsCounter++;
var currVal = current[y].value;
output+="&" + current[y].id + "=" + currVal;
}
}
//now we pull the classes
var classes = current[y].classList;
if(classes.length>0)
{
for (var x = 0; x<classes.Length; x++)
{
var hit = false;
for (var z = 0; z<foundClasses.length; z++)
{
if(foundClasses[z]==classes[x])
{
hit=true;
}
}
//if there was not a hit
if(!hit)
{
foundClasses[classCounter]=classes[x];
classCounter++;
output+="&" + classes[x] + "=";
}
}
}
}
}
}
chrome.runtime.sendMessage({data: output});
background.js:
var output2;
chrome.extension.onMessage.addListener(function(request, sender, sendResponse) {
output2 = "text_input1=";
output2 += request.data;
});
chrome.browserAction.onClicked.addListener(function() {
chrome.tabs.create({url: "http://www.google.com?" + output2}, function(tab) {
chrome.tabs.executeScript(tab.id, {file: "content.js"}, function() {
sendMessage();
});
});
});
Does anyone know why the input tags values are passed as blank?
Because you're trying to get the input text by using current[z].innerText.
However, you have to use current[z].value for inputs.

Categories