Don't Understand what's wrong with my Code, FreeCodeCamp (NodeJS) - javascript

I am studying nodejs from freecodecamp and nodeschool. I was stuck several hours to find the mistake of my code and still I can't understand the difference.
It would be awesome if someone explain what is wrong with my code.
Learnyounode problem name FILETRED LS.
My Code
var fs = require('fs'), path = require('path');
var directoryName = process.argv[2];
var extentionName = '.'+process.argv[3];
fs.readdir(directoryName, function(err, data) {
if (err) {
throw err;
}
var newExt = path.extname(data);
for (var i = 0; i < data.length; i++) {
if (newExt.toString() === extentionName.toString()) {
console.log(data[i]);
}
}
});
Solution Code
var fs = require('fs')
var path = require('path')
var folder = process.argv[2]
var ext = '.' + process.argv[3]
fs.readdir(folder, function (err, files) {
if (err) return console.error(err)
files.forEach(function(file) {
if (path.extname(file) === ext) {
console.log(file)
}
});
});

Your problem is you are trying to get the extension name (path.extname) of a list of files, while it should be done in the for loop.
Your error:
var newExt = path.extname(data);
The solution:
for (var i = 0; i < data.length; i++) {
var newExt = path.extname(data[i]);
if (newExt.toString() === extentionName.toString()) {
console.log(data[i]);
}
}
As a side note, newExt and extentionName are already strings, and thus, toString() is unnecessary.

Related

node.js webtorrent collect all files by magnet link

I have not used node.js before.
Have a .txt file with list of magnet links.
Want to write a json file with list of all files contained in these links.
var WebTorrent = require('webtorrent');
var fs = require('fs');
var client = new WebTorrent();
var array = fs.readFileSync('yop.txt').toString().split("\n");
i = 0;
while (i < array.length) {
//console.log(array[i]);
var magnetURI = array[i];
n = 0;
client.add(magnetURI, function (torrent) {
torrent.files.forEach(function (file) {
//console.log( file.name)
jsonString = JSON.stringify({'book': file.name});
fs.appendFile("data.json", jsonString, function (err) {
if (err) {console.log(err);} else { n++ }
});
if (n == torrent.files.length) {i++ }
})
})
}
when run gives the following error
Sorry for such a terrible code.
var WebTorrent = require('webtorrent')
var fs = require('fs')
var stream = fs.createWriteStream("2.txt");
var client = new WebTorrent()
var array = fs.readFileSync('yop.txt').toString().split("\n");
i = 0;
function parseMagnet (uri){
var magnetURI = uri[i]
console.log(magnetURI)
client.add(magnetURI, function (torrent) {
torrent.files.forEach(function (file) {
writeStr = (uri[i]+ '\n'+ file.name+ '\n');
stream.write(writeStr);
console.log(file.name)
});
console.log('Done !')
console.log(i)
i += 1
parseMagnet(array);
client.remove(magnetURI);
})
}
parseMagnet(array)

Wondering how to debug Node.js learnyounode "filter ls" exercise

I'm working through the learnyounode exercises. Specifically "filter ls". If you're not familiar with this exercise please refer to this description of it.
Here is the official solution:
var fs = require('fs');
var path = require('path');
fs.readdir(process.argv[2], function (err, list) {
list.forEach(function (filename) {
if (path.extname(filename) === '.' + process.argv[3]) {
console.log(filename);
}
});
});
Here is my solution:
var fs = require('fs');
var path = require('path');
var directory = process.argv[2];
var fileExtension = "." + process.argv[3];
function sortFiles (callback){
fs.readdir(directory, function(err,files){
if(err) return callback(err);
callback(null,files);
});
};
sortFiles(function(err,files){
var fileExt = "";
for(var i = 0; i < files; i++){
debugger;
fileExt = path.extname(files[i]);
if(fileExt === fileExtension){
console.log(files[i]);
};
};
});
The problem:
My code outputs nothing. As far as I can tell, there biggest difference between my code and the official solution is the following:
list.forEach(function (filename) {
if (path.extname(filename) === '.' + process.argv[3]) {
console.log(filename);
}
vs:
for(var i = 0; i < files; i++){
fileExt = path.extname(files[i]);
if(fileExt === fileExtension){
console.log(files[i]);
};
};
2 Questions:
1) How do I debug this code? I tried node debug node.js. From that I can see that when my directory is a folder that contains a cs file and a js file, files is an array, but each index contains "".
2) Why is my approach of a simple for loop not working? Why did they chose .foreach? Is there any reason other than it being more terse?
1) https://github.com/node-inspector/node-inspector
2) Forgot to check the length:
for(var i = 0; i < files.length; i++){
--^--
But forEach reads better, and it is more idiomatic.

Express render broken after save to firebase

I am writing an express app to generate a google map from geo coordinates out of photos. I am attempting to use firebase to save data about the images. The code is fully working except when I save the photo data to firebase it breaks the map rendering on the next page showing connection errors to all my local files in the console like so
So the page is rendering but the map doesn't load and nor do the images. The data I am saving to firebase is actually saving though, and If I remove the function that saves the data to firebase everything works as expected. I think it may have something to do with the way the response is being pushed but I am at a loss. In any other page where I am saving data to firebase it works fine.
Here is the code for the route that is generating the photo data and saving it to firebase:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
router.post("/:campaignId", stormpath.loginRequired, function(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
function saveImageInfo(finalImages, callback) {
var campaignId = req.param('campaignId');
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + campaignId + '/photos');
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
count++;
if (finalImages.length === count) {
callback(finalImages);
} else {
return;
}
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
saveImageInfo(finalImages, function(finalImages) {
var campaignId = req.param('campaignId');
console.log(res.req.next);
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: finalImages,
campaignId: campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
});
});
}
});
module.exports = router;
This is the only file I have written trying to push multiple objects to firebase. This is my first time using Firebase and Stormpath so any help would be greatly appreciated. Also one other thing that may be helpful is the error from the terminal being output when the issue happens:
POST /uploaded/-JapMLDYzPnbtjvt001X 200 690.689 ms - 2719
/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24
?a:null}function Db(a){try{a()}catch(b){setTimeout(function(){throw b;},Math.f
^
TypeError: Property 'next' of object #<IncomingMessage> is not a function
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:899:25)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:532:5)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:16:18
at /Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:25:533
at Db (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24:165)
at Ye (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:124:216)
at Ze (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:123:818)
UPDATE: It seems that the connection errors are inconsistent. Sometimes the images display just fine, sometimes only some of the images get a connection error, and other times everything including the google map script gets a connection error. This is really throwing me off no idea what the issue is. Any help or suggestions is greatly appreciated!
UPDATE 2: I changed the function saving the image data to firebase to use the firebase push function callback (to indicate completion) and added a length check on the forEach loop running to save each image's data. See updated code above. I am now getting the following error for each image that is uploaded in the terminal, but the connection errors are gone:
Error: Can't set headers after they are sent.
at ServerResponse.OutgoingMessage.setHeader (http.js:689:11)
at ServerResponse.header (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:666:10)
at ServerResponse.send (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:146:12)
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:900:10)
at View.exports.renderFile [as engine] (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/jade/lib/jade.js:325:12)
at View.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/view.js:93:8)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:530:10)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
OK I finally figured out the issue here. I did a few things to remedy my problem. First I converted the route to use next properly to separate out each part of the route out, it processes the images, then saves, then renders. Here is the updated code from that file:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
function processData(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
req.finalImages = finalImages;
req.campaignId = req.param('campaignId');
next();
});
});
}
}
function saveImageInfo(req, res, next) {
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + req.campaignId + '/photos');
var finalImages = req.finalImages;
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
console.log('Data saved successfully: ' + image);
count++;
if (req.finalImages.length === count) {
next();
}
}
});
});
}
router.post("/:campaignId", stormpath.loginRequired, processData, saveImageInfo, function(req, res) {
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: req.finalImages,
campaignId: req.campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
module.exports = router;
Then I realized in the tracestack I included in my question part of it was tracing back to another file I was using firebase in. I was using a call to .on() instead of using .once() when pulling my data. After reorganizing my route and changing all my calls to .on to .once for firebase data everything is now working properly. I think the real issue here was the use of .on() on my firebase calls instead of .once() as the .on() watches for events continually rather than .once which obviously only watches for it once.

Conceptual: Create a counter in an external function

I've written some javascript to successfully download hundreds of files from an external site, using wget at the core.
After downloading all of the files, I would like to do some stuff with them. The issue is, the files aren't of equal size. So, the last wget formed isn't necessarily the last file downloaded, meaning I can't really tell when the last file has completed.
I do, however, know how many files there are in total, and the number associated with each wget.
I have 3 js files, [parseproducts.js] ==> [createurl.js] ==> [downloadurl.js]
Using this information, how can I tell when all of the files have been downloaded?
I tried creating a "ticker" function in another file but the function resets itself on each instance, so it doesn't work at all!
Edit: Code added Didn't do this initially because I didn't think people would want to trawl through it! I'm new to programming/javascript/node. Please let me know if there's something that I could do better (I'm sure most of it could be more efficient!)
parseproducts.js
var fs = require('fs');
var iset = require('./ticker.js');
var createurl = require('./createurl.js');
var array = [];
filename = 'productlist.txt';
fs.readFile(filename, 'utf8', function(err, data) {
if (err) throw err;
content = data;
parseFile();
});
function parseFile() {
var stringarray = String(content).split(";");
for (var index = 0; index < stringarray.length; ++index) {
createurl(stringarray[index],index,stringarray.length);
console.log(index+'/'+stringarray.length+' sent.');
if (index === 0) {
iset(true,stringarray.length);
} else {
iset (false,stringarray.length);
}
};
};
createurl.js
function create(partnumber,iteration,total) {
var JSdownloadURL = require('./downloadurl.js');
JSdownloadURL(createurl(partnumber),partnumber,iteration,total);
function createurl(partnumber) {
var URL = ('"https://data.icecat.biz/xml_s3/xml_server3.cgi?prod_id='+partnumber+';vendor=hp;lang=en;output=productxml"');
return URL;
};
};
module.exports = create;
downloadurl.js
function downloadurl(URL,partnumber,iteration,total) {
// Dependencies
var fs = require('fs');
var url = require('url');
var http = require('http');
var exec = require('child_process').exec;
var spawn = require('child_process').spawn;
var checkfiles = require('./checkfiles.js');
// App variables
var file_url = URL;
var DOWNLOAD_DIR = './downloads/';
// We will be downloading the files to a directory, so make sure it's there
var mkdir = 'mkdir -p ' + DOWNLOAD_DIR;
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) throw err;
else download_file_wget(file_url);
});
// Function to download file using wget
var download_file_wget = function(file_url) {
// compose the wget command
var wget = 'wget --http-user="MyAccount" --http-password="MyPassword" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) throw err;
else console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
});
};
};
module.exports = downloadurl;
Failed attempt ticker.js
function iset(bol,total) {
if (bol === true) {
var i = 0;
} else {
var i = 1;
};
counter(i, total);
}
function counter(i,total) {
var n = n + i;
if (n === (total - 1)) {
var checkfiles = require('./checkfiles.js');
checkfiles(total);
} else {
console.log('nothing done');
};
}
module.exports = iset;
Update In response to answer
This is what my code looks like now. However, I get the error
child_process.js:945
throw errnoException(process._errno, 'spawn');
^
Error: spawn EMFILE
// Dependencies
var fs = require('fs');
var url = require('url');
var http = require('http');
var exec = require('child_process').exec;
var spawn = require('child_process').spawn;
var checkfiles = require('./checkfiles.js');
function downloadurl(URL,partnumber,iteration,total,clb) {
// App variables
var file_url = URL;
var DOWNLOAD_DIR = './downloads/';
// We will be downloading the files to a directory, so make sure it's there
var mkdir = 'mkdir -p ' + DOWNLOAD_DIR;
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) throw err;
else download_file_wget(file_url);
});
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
var wget = 'wget --http-user="amadman114" --http-password="Chip10" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
clb(null); // <-- you can pass more args here if you want, like result
// as a general convention callbacks take a form of
// callback(err, res1, res2, ...)
}
});
}
});
};
function clb() {
var LIMIT = 100,
errs = [];
for (var i = 0; i < LIMIT; i++) {
downloadurl(URL,partnumber,iternation,total, function(err) {
if (err) {
errs.push(err);
}
LIMIT--;
if (!LIMIT) {
finalize(errs);
}
});
}
}
function finalize(errs) {
// you can now check for err
//or do whatever stuff to finalize the code
}
module.exports = downloadurl;
OK, so you have this function downloadurl. What you need to do is to pass one more argument to it: the callback. And please, move requirements outside the function and don't define a function in a function unless necessary:
var fs = require('fs');
// other dependencies and constants
function downloadurl(URL,partnumber,iteration,total, clb) { // <-- new arg
// some code
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
var wget = 'wget --http-user="MyAccount" --http-password="MyPassword" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
clb(null); // <-- you can pass more args here if you want, like result
// as a general convention callbacks take a form of
// callback(err, res1, res2, ...)
}
});
}
});
};
This look nicer, doesn't it? Now when you call that function multiple times you do:
var LIMIT = 100,
errs = [];
for (var i = 0; i < LIMIT; i++) {
downloadurl(..., function(err) {
if (err) {
errs.push(err);
}
LIMIT--;
if (!LIMIT) {
finalize(errs);
}
});
}
function finalize(errs) {
// you can now check for err
//or do whatever stuff to finalize the code
}
That's a general idea. You have to tweak it to your needs (in particular you have to modify the intermediate function to accept a callback as well). Of course there are libraries which will take care of most this for you like kriskowal's Q (Q.all) or caolan's async (async.parallel).
Not sure if I have understood the problem correctly as I don't see the code. I have worked on creating a download engine. I used to make background AJAX calls to download files. After every successful download or 'onComplete' event I used to increment one variable to keep track of downloaded files. Provdided user won't refresh the page till all the download is complete. Else the download counter can be saved in LocalStorage also.

How can I get node.js to return data once all operations are complete

I am just learning server-side JavaScript so please bear with any glaring mistakes I've made.
I am trying to write a file parser that operates on HTML files in a directory and returns a JSON string once all files have been parsed. I started it with a single file and it works fine. it loads the resource from Apache running on the same machine, injects jquery, does the parsing and returns my JSON.
var request = require('request'),
jsdom = require('jsdom'),
sys = require('sys'),
http = require('http');
http.createServer(function (req, res) {
request({uri:'http://localhost/tfrohe/Car3E.html'}, function (error, response, body) {
if (!error && response.statusCode == 200) {
var window = jsdom.jsdom(body).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
// jQuery is now loaded on the jsdom window created from 'body'
var emps = {};
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var name = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = name.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2];
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
emps = JSON.stringify(emps);
//console.log(emps);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(emps);
});
} else {
res.writeHead(200, {"Content-Type": "text/plain"});
res.end("empty");
//console.log(response.statusCode);
}
});
}).listen(8124);
Now I am trying to extend this to using the regular file system (fs) and get all HTML files in the directory and parse them the same way and return a single combined JSON object once all files have been parsed. Here is what I have so far but it does not work.
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
emps = {};
//path = '/home/inet/www/media/employees/';
readDirectory = function(path) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
In this particular case, there are 2 html files in the directory. If i console.log(emps) during the htmlfiles.forEach() it shows me the results from the first file then the results for both files together the way I expect. how do I get emps to be returned to readDirectory so i can output it as desired?
Completed Script
After the answers below, here is the completed script with a httpServer to serve up the detail.
var sys = require('sys'),
fs = require("fs"),
http = require('http'),
jsdom = require('jsdom'),
emps = {};
var timed = setInterval(function() {
emps = {};
readDirectory('/home/inet/www/media/employees/', function(emps) {
});
}, 3600000);
readDirectory = function(path, callback) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
var imagecount = jquery("tr td img").length;
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step += 1;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
count -= 1;
if (count <= 0) {
callback(JSON.stringify(emps));
}
});
});
});
});
}
var init = readDirectory('/home/inet/www/media/employees/', function(emps) {
});
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(JSON.stringify(emps));
}).listen(8124);
That sure is a lot of code a couple of mistakes.
You're never calling the callback function you supply to readDirectory
You need to keep track of the files you have parsed, when you parsed all of them, call the callback and supply the emps
This should work:
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
//path = '/home/inet/www/media/employees/';
// This is a nicer way
function readDirectory(path, callback) {
fs.readdir(path, function(err, files) {
// make this local
var emps = {};
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
// Keep track of the number of files we have parsed
var count = htmlfiles.length;
var done = 0;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
// As soon as all have finished call the callback and supply emps
done++;
if (done === count) {
callback(emps);
}
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function(emps) {
console.log(emps);
});
You seem to be doing this a tad wrong
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
But you've defined your function as:
readDirectory = function(path) {
Where is the callback argument? Try this:
readDirectory = function(path, callback) {
then under emps[last + ",_" + first] = jquery(this).children('img').attr('src'); put
callback.call(null, emps);
Your callback function will be called however many times your loop goes on for. If you want it to return all of them at once, you'll need to get a count of how many times the loop is going to run for, count up until that number then call your callback when the emps array is full of the data you need.

Categories