Reading, parsing, iterating through JSON with Node.js - javascript

I'm trying to parse Reddit's RSS feed to grab the titles of front page articles, and having some trouble. Source code below:
//var util = require('util');
//var cheerio = require('cheerio');
var fs = require('fs');
var request = require('request');
var parseString = require('xml2js').parseString;
url = 'http://www.reddit.com/.xml';
request(url, function(error, response, xml){
parseString(xml, function(err, result) {
result = result.rss.channel[0];
console.log(result.item[0]['title']); // works fine, gets first title
for(var key in result){
console.log(result[key]['title']); // returns a bunch of 'undefined'
}
//console.log(util.inspect(result,false,null));
fs.writeFile("index.html", result, function(err){
if(err) { return console.log(err); }
return console.log("File saved.");
});
});
});

You get undefined because you should be iterating over result.item instead of just result. For example:
for(var key in result.item) {
console.log(result.item[key]['title']);
}
Additionally, you should just use a regular for-loop instead of using for..in, since it seems like result.item is just a plain array. For example:
var items = result.item;
for (var i = 0; i < items.length; ++i) {
console.log(items[i].title);
}

Related

Why my JSON file is showing all the values in a single key?

I had scrapped a website using Node.js environment. I had parsed the data in JSON format and saved the JSON data in a file named output.js
But instead of creating different keys (with different arrays) for different values, the whole data (value) gets stored in a single key. I had used "each" function and formed a loop to stored them in the different arrays but that seems like not working.
What may be the possible glitch?
Here is the code:
var cheerio = require('cheerio');
var request = require('request');
var fs = require('fs');
var allJSONdata = [];
request ('http://www.bseindia.com/corporates/ann.aspx',
function (error, response, html) {
if(!error && response.statusCode == 200){
var $ = cheerio.load(html);
var allRecords = $('div.content');
allRecords.each(function(index, element){
var title = $(element).find('td.TTHeadergrey').text();
var tempData = {
Header:title
}
allJSONdata.push(tempData);
});
}
fs.writeFile('output.json', JSON.stringify(allJSONdata), function(err){
console.log('successfully saved');
})
});
And my output.json data is following:
[{"Header":" /*all heads in this single key*/ "}]
You are not using the right selectors, so... Change this:
var allRecords = $('div.content');
allRecords.each(function(index, element){
var title = $(element).find('td.TTHeadergrey').text();
var tempData = {
Header:title
}
allJSONdata.push(tempData);
});
to this:
var allRecords = $('.content .TTHeadergrey');
allRecords.each(function(index, element){
if( (index % 5)===0 || (index % 5)===1){
var title = $(element).text();
var tempData = {
Header:title
};
allJSONdata.push(tempData);
}
});

JavaScript: Appending a post request json string to existing json file

I have a simple express app that takes a post request with some JSON data. I'd like to take that data and append it to an existing json file (If it exists). Key value pairs may be different. My current version pushes an object to an array of objects. Ideally, I'd like to add just another key/value pair:
app.post('/notes', function(req, res){
var body = "";
req.on('data', function(chunk){
body += chunk;
});
req.on('end', function(){
fs.readFile(__dirname + '/data/notes.json', function(err, data){
if (err) throw err;
console.log(body);
var fileObj = JSON.parse(data.toString());
var postObj = JSON.parse(body);
fileObj.notes.push(postObj);
var returnjson = JSON.stringify(fileObj);
fs.writeFile(__dirname + '/data/notes.json', returnjson, function(err){
if (err) throw err;
res.send(returnjson);
});
});
});
});
Example of what might be in notes.json:
{"note": "Dear Diary: The authorities have removed the black pants from the couch"}
This works, but I'm having trouble wrapping my head around appending whatever json comes in the post (Let's assume there is no nested data in this case).
EDIT: Not the same as just appending to a file. Needs to append to an object within a file.
You can simply iterate through the post object with the help of for ... in loop, and add its properties to the file object. Keep in mind that in this case, if property keys are identical, their value will be overwritten. To avoid it you can make a verification with the help of Object.prototype.hasOwnProperty().
app.post('/notes', function(req, res){
var body = "";
req.on('data', function(chunk){
body += chunk;
});
req.on('end', function(){
fs.readFile(__dirname + '/data/notes.json', function(err, data){
if (err) throw err;
console.log(body);
var fileObj = JSON.parse(data.toString());
var postObj = JSON.parse(body);
for(var key in postObj) {
fileObj[key] = postObj[key];
}
var returnjson = JSON.stringify(fileObj);
fs.writeFile(__dirname + '/data/notes.json', returnjson, function(err){
if (err) throw err;
res.send(returnjson);
});
});
});
});
Here is for ... each statement, if you don't want to overwrite properties. New properties would be generated with suffixes like: _1, _2 etc. You can also use something like shortid to be sure that properties do not repeat, but it would be more ugly and less readable.
for(var key in postObj) {
if(fileObj.hasOwnProperty(key)) {
while(true) {
i++;
newKey = key + '_' + i;
if(fileObj.hasOwnProperty(newKey) == false) {
fileObj[newKey] = postObj[key];
break;
}
}
} else {
fileObj[key] = postObj[key];
}
}

Nodes http request before an async.map

I'm facing issue to receive an array from a http request before using async.map to launch queries on them.
My server side controller code below (express 4) :
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function (req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
var list = ["127.0.0.1/32", "192.168.0.1/32"];
for (var i in list) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(list[i]));
}
asynce.map(myUrls, function (url, callback) {
request(url, function (error, response, html) {
var r = JSON.parse(html);
for (var i in r) {
IpBlockedForSpam.push(r[i]);
}
callback(error, html);
});
}, function (err, results) {
res.jsonp(IpBlockedForSpam);
});
};
This code work with var list as static.
What i want to achieive is to be able fill this variable using a http request like this :
request("http://localhost:9000/myapi/ip", function(error, response, body) {
//console.log(body);
remotelist.push(JSON.parse(body));
});
Calling http://localhost:9000/myapi/ip return :
[
"127.0.0.1/32",
"192.168.0.1/32"
]
I tried many thing without results because most time, my async method is launched before my required http call request to retrieve list.
Another thing, is it possible to not use url like http://localhost:9000/myapi/* and use only /myapi/*
Thank you in advance for suggestions, maybe i am wrong with this method.
See you.
You can just put the code inside the request() callback so that the list is obtained first and only when it has been retrieved run the rest of the code:
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function (req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
var list = ["127.0.0.1/32", "192.168.0.1/32"];
request("http://localhost:9000/myapi/ip", function(error, response, body) {
// add the IP address array retrieved from this request
list = list.concat(JSON.parse(body));
list.forEach(function(item, i) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(item));
});
asynce.map(myUrls, function (url, callback) {
request(url, function (error, response, html) {
var r = JSON.parse(html);
for (var i in r) {
IpBlockedForSpam.push(r[i]);
}
callback(error, html);
});
}, function (err, results) {
res.jsonp(IpBlockedForSpam);
});
});
};
P.S. it is not considered a good practice to iterate arrays with for (var i in r) because that is a property iteration that will accidentially include any enumerable properties of the array, not just array items.
I got it now, below working code :
'use strict';
var _ = require('lodash');
var request = require('request');
var asynce = require('async');
exports.index = function(req, res) {
function cleanip(str) {
return str.replace("/", "%2F");
}
var myUrls = [];
var IpBlockedForSpam = [];
//var list = ["127.0.0.1/32", "192.168.0.1/32"];
var list = [];
request("http://localhost:9000/myapi/ip", function(error, response, body) {
list = list.concat(JSON.parse(body));
list.forEach(function(item, i) {
myUrls.push("http://localhost:9000/myapi/ip/blockedForSpam/" + cleanip(item));
});
asynce.map(myUrls, function(url, callback) {
request(url, function(error, response, html) {
var r = JSON.parse(html);
r.forEach(function(item, i) {
IpBlockedForSpam.push(r[i]);
});
callback(error, html);
});
}, function(err, results) {
res.jsonp(IpBlockedForSpam);
});
});
};
Some brackets was not closed and concact instead of concat (it helped me to really understand this approach from now) :)
Last thing, is it possible to not use url like http://localhost:9000/myapi/* and use only /myapi/* ?
Thank you #jfriend00

Get array data from angularjs

First of all, sorry for my English. I'm wondering how to get an array data from angularjs, so i can save it with nodejs.
Here is my angularjs script:
angular.module('myAddList', [])
.controller('myAddListController', function(){
var addList = this;
addList.lists = [];
addList.tambah = function(){
addList.lists.push({title:addList.listTitle,greet:addList.listGreet});
addList.listTitle = '', addList.listGreet = '';
}
addList.hapusList = function(list){
addList.lists.splice(addList.lists.indexOf(list), 1);
}
});
and here is my nodejs:
var fs = require("fs");
var d = new Date();
var myJson = {title : {
"lists": []
}
};
function saveFile(){
fs.writeFile( document.getElementById("namafile").value + ".json", JSON.stringify( myJson ), "utf8", function(err) {
if(err) {
return console.log(err);
}else if(!err){
console.log("The file was saved!");
}
});
}
I think "myJson" should be from angularjs array which is "addList.lists = [];" but i dont know how to do that. Or maybe there is an alternative way?
-- Edit --
I think the only solution is to save the array to localStorage and save it to json format. But i have another problem it replace all whitespaces to this character "\" it so annoying.
Here is the following code (add a few changes), let's assume we already stored array to localStorage and save it using nodejs:
var fs = require("fs");
var myJson = {
key: "myvalue"
};
var d = new Date();
var locS = localStorage.getItem("invoice");
function saveFile(){
var nama = document.getElementById("namaFile").value;
fs.writeFile( nama + ".json", JSON.stringify( locS ), "utf8", function(err) {
if(err) {
return console.log(err);
}else if(!err){
console.log("The file was saved!");
}
});
}
myJson = fs.readFile("undefined.json", "utf8", function (err,data) {
if (err) {
return console.log(err);
}
console.log(JSON.parse(data));
console.log(data[2]);});
if i run this code, it give me a nice output
console.log(JSON.parse(data));
and when i tried this
console.log(data[2]);
it give me "\" as an output, btw here is the json file
"{\"tax\":13,\"invoice_number\":10,\"customer_info\":{\"name\":\"Mr. John Doe\",\"web_link\":\"John Doe Designs Inc.\",\"address1\":\"1 Infinite Loop\",\"address2\":\"Cupertino, California, US\",\"postal\":\"90210\"},\"company_info\":{\"name\":\"Metaware Labs\",\"web_link\":\"www.metawarelabs.com\",\"address1\":\"123 Yonge Street\",\"address2\":\"Toronto, ON, Canada\",\"postal\":\"M5S 1B6\"},\"items\":[{\"qty\":10,\"description\":\"Gadget\",\"cost\":9.95,\"$$hashKey\":\"004\"}]}"
Make $http request to your nodejs server like that
angular.module('myAddList', [])
.controller('myAddListController', function($http){//inject $http
var addList = this;
addList.lists = [];
addList.tambah = function(){
addList.lists.push({title:addList.listTitle,greet:addList.listGreet});
addList.listTitle = '', addList.listGreet = '';
}
addList.hapusList = function(list){
addList.lists.splice(addList.lists.indexOf(list), 1);
}
$http.post('your server url',addList).success(function(successReponse){
//do stuff with response
}, function(errorResponse){
//do stuff with error repsonse
}
});
and then you must have route for that request with post type, and then in controller that performs this route request you must perform your file save operations

node.js + cheerio scrape: Passing an array of urls to download?

Firstly, here is my code as I've progressed so far:
var http = require("http");
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
var cheerio = require("cheerio");
var url = "http://www.bloglovin.com/en/blogs/1/2/all";
var myArray = [];
var a = 0;
var getLinks = function(){download(url, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$(".content").each(function(i, e) {
var blogName = $(e).find(".blog-name").text();
var followLink = $(e).find("a").attr("href");
var blogSite = $(e).find(".description").text();
myArray[a] = [a];
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
a++;
console.log(myArray);
});
}
});
}
getLinks();
As you can see, followLinks is concatenated to followUrl, of which I'd like to pass through the 'url' download, so effectively I'll be scraping each of the pages using the same CSS rules, which will be added to the multidimensional array for the corresponding blogger.
How can I go about this?
I do something similar in one of my scraping jobs, but I use the async.js library to accomplish. Note that I'm also using the request module and cheerio.js in my scraping. I fetch and scrape rows of data from a single webpage, but suspect you could do something similar to fetch URLs and request / scrape them in the same manner.
I also admit this is quite basic coding, certainly could be optimized with a bit of refactoring. Hope it gives you some ideas at least...
First, I use request to fetch the page and call my parse function -
var url = 'http://www.target-website.com';
function(lastCallback) {
request(url, function(err, resp, body) {
if(!err) { parsePage(err, resp, body, lastCallback); }
else { console.log('web request error:' + resp.statusCode); }
}
}
Next, in my parsePage function, I load the website into Cheerio, fetch the HTML of each data row into an array, push my parseRow function and each HTML segment into another array, and use async.parallel to process each iteration -
var rows = [];
function parsePage(err, resp, body, callback1) {
var $ = cheerio.load(body);
$('div#targetTable tr').each(function(i, elem) {
rows.push($(this).html());
});
var scrRows = [];
rows.forEach(function(row) {
scrRows.push(function(callback2) {
parseRow(err, resp, row);
callback2();
});
async.parallel(scrRows, function() {
callback1();
});
}
Inside your loop, just create an object with the properties you scrape then push that object onto your array.
var blogInfo = {
blogName: blogName,
followLink: "http://www.bloglovin.com"+followLink;
blogSite: blogSite
};
myArray.push(blogInfo);
You have defined a = 0; So
myArray[a] = [a]; // => myArray[0] = [0]; myArray[0] becomes an array with 0 as only member in it
All these statements throw an error since Array can have only integer as keys.
myArray[a]["blogName"] = blogName;
myArray[a]["followLink"] = "http://www.bloglovin.com"+followLink;
myArray[a]["blogSite"] = blogSite;
Instead try this:
var obj = {
index: a,
blogName: blogName,
followLink: "http://www.bloglovin.com" + followLink,
blogSite: blogSite
}
myArray.push(obj);
console.log(myArray);

Categories