I've written some javascript to successfully download hundreds of files from an external site, using wget at the core.
After downloading all of the files, I would like to do some stuff with them. The issue is, the files aren't of equal size. So, the last wget formed isn't necessarily the last file downloaded, meaning I can't really tell when the last file has completed.
I do, however, know how many files there are in total, and the number associated with each wget.
I have 3 js files, [parseproducts.js] ==> [createurl.js] ==> [downloadurl.js]
Using this information, how can I tell when all of the files have been downloaded?
I tried creating a "ticker" function in another file but the function resets itself on each instance, so it doesn't work at all!
Edit: Code added Didn't do this initially because I didn't think people would want to trawl through it! I'm new to programming/javascript/node. Please let me know if there's something that I could do better (I'm sure most of it could be more efficient!)
parseproducts.js
var fs = require('fs');
var iset = require('./ticker.js');
var createurl = require('./createurl.js');
var array = [];
filename = 'productlist.txt';
fs.readFile(filename, 'utf8', function(err, data) {
if (err) throw err;
content = data;
parseFile();
});
function parseFile() {
var stringarray = String(content).split(";");
for (var index = 0; index < stringarray.length; ++index) {
createurl(stringarray[index],index,stringarray.length);
console.log(index+'/'+stringarray.length+' sent.');
if (index === 0) {
iset(true,stringarray.length);
} else {
iset (false,stringarray.length);
}
};
};
createurl.js
function create(partnumber,iteration,total) {
var JSdownloadURL = require('./downloadurl.js');
JSdownloadURL(createurl(partnumber),partnumber,iteration,total);
function createurl(partnumber) {
var URL = ('"https://data.icecat.biz/xml_s3/xml_server3.cgi?prod_id='+partnumber+';vendor=hp;lang=en;output=productxml"');
return URL;
};
};
module.exports = create;
downloadurl.js
function downloadurl(URL,partnumber,iteration,total) {
// Dependencies
var fs = require('fs');
var url = require('url');
var http = require('http');
var exec = require('child_process').exec;
var spawn = require('child_process').spawn;
var checkfiles = require('./checkfiles.js');
// App variables
var file_url = URL;
var DOWNLOAD_DIR = './downloads/';
// We will be downloading the files to a directory, so make sure it's there
var mkdir = 'mkdir -p ' + DOWNLOAD_DIR;
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) throw err;
else download_file_wget(file_url);
});
// Function to download file using wget
var download_file_wget = function(file_url) {
// compose the wget command
var wget = 'wget --http-user="MyAccount" --http-password="MyPassword" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) throw err;
else console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
});
};
};
module.exports = downloadurl;
Failed attempt ticker.js
function iset(bol,total) {
if (bol === true) {
var i = 0;
} else {
var i = 1;
};
counter(i, total);
}
function counter(i,total) {
var n = n + i;
if (n === (total - 1)) {
var checkfiles = require('./checkfiles.js');
checkfiles(total);
} else {
console.log('nothing done');
};
}
module.exports = iset;
Update In response to answer
This is what my code looks like now. However, I get the error
child_process.js:945
throw errnoException(process._errno, 'spawn');
^
Error: spawn EMFILE
// Dependencies
var fs = require('fs');
var url = require('url');
var http = require('http');
var exec = require('child_process').exec;
var spawn = require('child_process').spawn;
var checkfiles = require('./checkfiles.js');
function downloadurl(URL,partnumber,iteration,total,clb) {
// App variables
var file_url = URL;
var DOWNLOAD_DIR = './downloads/';
// We will be downloading the files to a directory, so make sure it's there
var mkdir = 'mkdir -p ' + DOWNLOAD_DIR;
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) throw err;
else download_file_wget(file_url);
});
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
var wget = 'wget --http-user="amadman114" --http-password="Chip10" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
clb(null); // <-- you can pass more args here if you want, like result
// as a general convention callbacks take a form of
// callback(err, res1, res2, ...)
}
});
}
});
};
function clb() {
var LIMIT = 100,
errs = [];
for (var i = 0; i < LIMIT; i++) {
downloadurl(URL,partnumber,iternation,total, function(err) {
if (err) {
errs.push(err);
}
LIMIT--;
if (!LIMIT) {
finalize(errs);
}
});
}
}
function finalize(errs) {
// you can now check for err
//or do whatever stuff to finalize the code
}
module.exports = downloadurl;
OK, so you have this function downloadurl. What you need to do is to pass one more argument to it: the callback. And please, move requirements outside the function and don't define a function in a function unless necessary:
var fs = require('fs');
// other dependencies and constants
function downloadurl(URL,partnumber,iteration,total, clb) { // <-- new arg
// some code
var child = exec(mkdir, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
var wget = 'wget --http-user="MyAccount" --http-password="MyPassword" -P ' + DOWNLOAD_DIR + ' ' + file_url;
// excute wget using child_process' exec function
var child = exec(wget, function(err, stdout, stderr) {
if (err) {
clb(err);
} else {
console.log(iteration+'/'+total+' downloaded. '+partnumber + ' downloaded to ' + DOWNLOAD_DIR);
clb(null); // <-- you can pass more args here if you want, like result
// as a general convention callbacks take a form of
// callback(err, res1, res2, ...)
}
});
}
});
};
This look nicer, doesn't it? Now when you call that function multiple times you do:
var LIMIT = 100,
errs = [];
for (var i = 0; i < LIMIT; i++) {
downloadurl(..., function(err) {
if (err) {
errs.push(err);
}
LIMIT--;
if (!LIMIT) {
finalize(errs);
}
});
}
function finalize(errs) {
// you can now check for err
//or do whatever stuff to finalize the code
}
That's a general idea. You have to tweak it to your needs (in particular you have to modify the intermediate function to accept a callback as well). Of course there are libraries which will take care of most this for you like kriskowal's Q (Q.all) or caolan's async (async.parallel).
Not sure if I have understood the problem correctly as I don't see the code. I have worked on creating a download engine. I used to make background AJAX calls to download files. After every successful download or 'onComplete' event I used to increment one variable to keep track of downloaded files. Provdided user won't refresh the page till all the download is complete. Else the download counter can be saved in LocalStorage also.
Related
I want to download multiple files from the web using this code:
var fs = require('fs');
var http = require('http');
var request = require('request');
var file;
for(var i = 1; i <= 5; i++) {
//CHECK IF REMOTE FILE EXISTS
request('http://webaddress.com/filename' + i + '.jar', function (err, resp) {
//IF EXISTS DO
if (resp.statusCode == 200) {
//DOWNLOAD DATA AND CREATE A NEW .JAR FILE
file = fs.createWriteStream('D:\\filename' + i + '.jar');
http.get('http://webaddress.com/filename' + i + '.jar', function(response) {
response.pipe(file);
file.on('finish', function() {
file.close();
});
});
}
//FILE DOES NOT EXIST
});
}
The result I want is: multiple files downloaded with filenames filename1-5.jar. The result I am getting is just 1 file with filename filename5.jar (or the last value of the i var in the loop). What am I doing wrong?
Like #Ionut said your requests are async so you need to wait for it
let fs = require('fs');
let request = require('request');
let download = (uri, filename) => {
return new Promise ((resolve, reject) => {
request.head(uri, function(err, res) {
if (res.statusCode === 200) {
request(uri).pipe(fs.createWriteStream(filename)).on('close', resolve);
} else {
reject(res.statusCode);
}
});
});
};
let promises = [];
for(let i = 1; i <= 5; i++) {
promises.push(download('http://webaddress.com/filename' + i + '.jar', 'D:\\filename' + i + '.jar'));
}
Promise.all(promises).then(() => {
process.exit(0);
});
Your request is asynchronous and it will execute only after your loop finishes hence the 5 from the filename. A solution for this is to threat your code separately by creating a new function and call it inside the loop:
var fs = require('fs');
var http = require('http');
var request = require('request');
var file;
function customRequest(i){
//CHECK IF REMOTE FILE EXISTS
return request('http://webaddress.com/filename' + i + '.jar', function(err, resp) {
//IF EXISTS DO
if (resp.statusCode == 200) {
//DOWNLOAD DATA AND CREATE A NEW .JAR FILE
file = fs.createWriteStream('D:\\filename' + i + '.jar');
http.get('http://webaddress.com/filename' + i + '.jar', function(response) {
response.pipe(file);
file.on('finish', function() {
file.close();
});
});
}
//FILE DOES NOT EXIST
});
}
for (var i = 1; i <= 5; i++) {
customRequest(i)
}
I am studying nodejs from freecodecamp and nodeschool. I was stuck several hours to find the mistake of my code and still I can't understand the difference.
It would be awesome if someone explain what is wrong with my code.
Learnyounode problem name FILETRED LS.
My Code
var fs = require('fs'), path = require('path');
var directoryName = process.argv[2];
var extentionName = '.'+process.argv[3];
fs.readdir(directoryName, function(err, data) {
if (err) {
throw err;
}
var newExt = path.extname(data);
for (var i = 0; i < data.length; i++) {
if (newExt.toString() === extentionName.toString()) {
console.log(data[i]);
}
}
});
Solution Code
var fs = require('fs')
var path = require('path')
var folder = process.argv[2]
var ext = '.' + process.argv[3]
fs.readdir(folder, function (err, files) {
if (err) return console.error(err)
files.forEach(function(file) {
if (path.extname(file) === ext) {
console.log(file)
}
});
});
Your problem is you are trying to get the extension name (path.extname) of a list of files, while it should be done in the for loop.
Your error:
var newExt = path.extname(data);
The solution:
for (var i = 0; i < data.length; i++) {
var newExt = path.extname(data[i]);
if (newExt.toString() === extentionName.toString()) {
console.log(data[i]);
}
}
As a side note, newExt and extentionName are already strings, and thus, toString() is unnecessary.
I have a text file which simply lists some URL's. I'm trying to take each line from the text file, and add it to an array of urls for further operation.
var fs = require('fs'),
Urls = [];
var returnURLS = function(file) {
var read = function(callback) {
fs.readFile(file, function(err, logData){
if (err) throw err;
var text = logData.toString();
var lines = text.split('\n');
lines.forEach(function(line, callback){
var url = "http://www." + line;
Urls.push(url);
});
callback();
});
};
var giveBackAnswer = function() {
console.log("1: ", Urls);
return Urls;
};
read(giveBackAnswer);
};
console.log("2: ", returnURLS('textFileWithListOfURLs.txt'));
My console output clearly shows that the file system operations have not completed by the time the program is supposed to log the results, but that the results eventually do show up.
2: the urls are undefined
1: [ 'http://www.cshellsmassage.com',
'http://www.darsanamartialarts.com',
'http://www.davidgoldbergdc.com',
'http://www.dayspaofbroward.com',.... (etc)
What is the best way to get these functions to operate synchronously?
1) Compile the Urls array through file system operations
2) Print the array to the console once it has been filled
Well, your function takes returns undefined. This is because all functions in JavaScript return undefined.
If you would like to hook on your function using callbacks, it has to take a callback itself and then you'd place your continuation in that callback:
var returnURLS = function(file, whenDone) {
var read = function(callback) {
fs.readFile(file, function(err, logData){
if (err) whenDone(err);
var text = logData.toString();
var lines = text.split('\n');
lines.forEach(function(line, callback){
var url = "http://www." + line;
Urls.push(url);
});
callback();
});
};
var giveBackAnswer = function() {
console.log("1: ", Urls);
whenDone(null, Urls);
};
read(giveBackAnswer);
};
Which would let you do:
returnURLS("textFileWithList.txt", function(err, list){
console.log("2: ", list);
});
The alternative solution using promises (bluebird) would look something like:
var fs = Promise.promisify(require("fs"));
var returnURLS = function(file) {
return fs.readFileAsync(file).then(function(logData){
var text = logData.toString();
var lines = text.split('\n');
return lines.map(function(line){
return "http://www." + line;
});
});
};
Which would let you do:
returnURLS("url.txt").then(function(data){
console.log("Got data!", data);
});
You could use fs.readFileSync in that simple case :
var returnURLS = function(file) {
var text = fs.readFileSync(file).toString();
var lines = text.split('\n');
lines.forEach(function(line, callback){
var url = "http://www." + line;
Urls.push(url);
});
return Urls;
};
That's perfectly OK when you don't need parallelism, like in this small utility program.
But the solution you'll reapply everywhere else is to be wholly asynchronous by not returning the result but passing it as argument to a callback :
var fetchURLS = function(callback) {
fs.readFile(file, function(err, logData){
if (err) throw err;
var text = logData.toString();
var lines = text.split('\n');
lines.forEach(function(line, callback){
var url = "http://www." + line;
Urls.push(url);
});
callback(Urls);
});
};
};
fetchURLS('textFileWithListOfURLs.txt', function(urls){
console.log("2: ", urls);
});
When your code grows in complexity, it becomes convenient to use promises to reduce the "callback hell".
Wrap the function with a callback
var fs = require('fs'),
Urls = [];
function doit(cb){
var returnURLS = function(file) {
var read = function(callback) {
fs.readFile(file, function(err, logData){
if (err) throw err;
var text = logData.toString();
var lines = text.split('\n');
lines.forEach(function(line, callback){
var url = "http://www." + line;
Urls.push(url);
});
callback();
});
};
var giveBackAnswer = function() {
console.log("1: ", Urls);
return Urls;
};
read(giveBackAnswer);
};
cb(returnURLS);
}
doit(function(result){
console.log("2: ", result('textFileWithListOfURLs.txt'));
});
I am writing an express app to generate a google map from geo coordinates out of photos. I am attempting to use firebase to save data about the images. The code is fully working except when I save the photo data to firebase it breaks the map rendering on the next page showing connection errors to all my local files in the console like so
So the page is rendering but the map doesn't load and nor do the images. The data I am saving to firebase is actually saving though, and If I remove the function that saves the data to firebase everything works as expected. I think it may have something to do with the way the response is being pushed but I am at a loss. In any other page where I am saving data to firebase it works fine.
Here is the code for the route that is generating the photo data and saving it to firebase:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
router.post("/:campaignId", stormpath.loginRequired, function(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
function saveImageInfo(finalImages, callback) {
var campaignId = req.param('campaignId');
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + campaignId + '/photos');
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
count++;
if (finalImages.length === count) {
callback(finalImages);
} else {
return;
}
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
saveImageInfo(finalImages, function(finalImages) {
var campaignId = req.param('campaignId');
console.log(res.req.next);
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: finalImages,
campaignId: campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
});
});
}
});
module.exports = router;
This is the only file I have written trying to push multiple objects to firebase. This is my first time using Firebase and Stormpath so any help would be greatly appreciated. Also one other thing that may be helpful is the error from the terminal being output when the issue happens:
POST /uploaded/-JapMLDYzPnbtjvt001X 200 690.689 ms - 2719
/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24
?a:null}function Db(a){try{a()}catch(b){setTimeout(function(){throw b;},Math.f
^
TypeError: Property 'next' of object #<IncomingMessage> is not a function
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:899:25)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:532:5)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:16:18
at /Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:25:533
at Db (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24:165)
at Ye (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:124:216)
at Ze (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:123:818)
UPDATE: It seems that the connection errors are inconsistent. Sometimes the images display just fine, sometimes only some of the images get a connection error, and other times everything including the google map script gets a connection error. This is really throwing me off no idea what the issue is. Any help or suggestions is greatly appreciated!
UPDATE 2: I changed the function saving the image data to firebase to use the firebase push function callback (to indicate completion) and added a length check on the forEach loop running to save each image's data. See updated code above. I am now getting the following error for each image that is uploaded in the terminal, but the connection errors are gone:
Error: Can't set headers after they are sent.
at ServerResponse.OutgoingMessage.setHeader (http.js:689:11)
at ServerResponse.header (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:666:10)
at ServerResponse.send (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:146:12)
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:900:10)
at View.exports.renderFile [as engine] (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/jade/lib/jade.js:325:12)
at View.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/view.js:93:8)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:530:10)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
OK I finally figured out the issue here. I did a few things to remedy my problem. First I converted the route to use next properly to separate out each part of the route out, it processes the images, then saves, then renders. Here is the updated code from that file:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
function processData(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
req.finalImages = finalImages;
req.campaignId = req.param('campaignId');
next();
});
});
}
}
function saveImageInfo(req, res, next) {
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + req.campaignId + '/photos');
var finalImages = req.finalImages;
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
console.log('Data saved successfully: ' + image);
count++;
if (req.finalImages.length === count) {
next();
}
}
});
});
}
router.post("/:campaignId", stormpath.loginRequired, processData, saveImageInfo, function(req, res) {
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: req.finalImages,
campaignId: req.campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
module.exports = router;
Then I realized in the tracestack I included in my question part of it was tracing back to another file I was using firebase in. I was using a call to .on() instead of using .once() when pulling my data. After reorganizing my route and changing all my calls to .on to .once for firebase data everything is now working properly. I think the real issue here was the use of .on() on my firebase calls instead of .once() as the .on() watches for events continually rather than .once which obviously only watches for it once.
I am just learning server-side JavaScript so please bear with any glaring mistakes I've made.
I am trying to write a file parser that operates on HTML files in a directory and returns a JSON string once all files have been parsed. I started it with a single file and it works fine. it loads the resource from Apache running on the same machine, injects jquery, does the parsing and returns my JSON.
var request = require('request'),
jsdom = require('jsdom'),
sys = require('sys'),
http = require('http');
http.createServer(function (req, res) {
request({uri:'http://localhost/tfrohe/Car3E.html'}, function (error, response, body) {
if (!error && response.statusCode == 200) {
var window = jsdom.jsdom(body).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
// jQuery is now loaded on the jsdom window created from 'body'
var emps = {};
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var name = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = name.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2];
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
emps = JSON.stringify(emps);
//console.log(emps);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(emps);
});
} else {
res.writeHead(200, {"Content-Type": "text/plain"});
res.end("empty");
//console.log(response.statusCode);
}
});
}).listen(8124);
Now I am trying to extend this to using the regular file system (fs) and get all HTML files in the directory and parse them the same way and return a single combined JSON object once all files have been parsed. Here is what I have so far but it does not work.
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
emps = {};
//path = '/home/inet/www/media/employees/';
readDirectory = function(path) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
In this particular case, there are 2 html files in the directory. If i console.log(emps) during the htmlfiles.forEach() it shows me the results from the first file then the results for both files together the way I expect. how do I get emps to be returned to readDirectory so i can output it as desired?
Completed Script
After the answers below, here is the completed script with a httpServer to serve up the detail.
var sys = require('sys'),
fs = require("fs"),
http = require('http'),
jsdom = require('jsdom'),
emps = {};
var timed = setInterval(function() {
emps = {};
readDirectory('/home/inet/www/media/employees/', function(emps) {
});
}, 3600000);
readDirectory = function(path, callback) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
var imagecount = jquery("tr td img").length;
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step += 1;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
count -= 1;
if (count <= 0) {
callback(JSON.stringify(emps));
}
});
});
});
});
}
var init = readDirectory('/home/inet/www/media/employees/', function(emps) {
});
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(JSON.stringify(emps));
}).listen(8124);
That sure is a lot of code a couple of mistakes.
You're never calling the callback function you supply to readDirectory
You need to keep track of the files you have parsed, when you parsed all of them, call the callback and supply the emps
This should work:
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
//path = '/home/inet/www/media/employees/';
// This is a nicer way
function readDirectory(path, callback) {
fs.readdir(path, function(err, files) {
// make this local
var emps = {};
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
// Keep track of the number of files we have parsed
var count = htmlfiles.length;
var done = 0;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
// As soon as all have finished call the callback and supply emps
done++;
if (done === count) {
callback(emps);
}
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function(emps) {
console.log(emps);
});
You seem to be doing this a tad wrong
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
But you've defined your function as:
readDirectory = function(path) {
Where is the callback argument? Try this:
readDirectory = function(path, callback) {
then under emps[last + ",_" + first] = jquery(this).children('img').attr('src'); put
callback.call(null, emps);
Your callback function will be called however many times your loop goes on for. If you want it to return all of them at once, you'll need to get a count of how many times the loop is going to run for, count up until that number then call your callback when the emps array is full of the data you need.