Related
This script is to get the title of the webpage where the URL of the website will be passed from an excel file, check to see if the title contains the keyword, and then store that domain in the new excel file.
There is no issue with the partial code, but the title comparison does not work as expected. Does anyone have an idea how to fix it?
here is my code
var request = require("request");
var cheerio = require("cheerio");
const xlsx = require("xlsx");
jsonData = [{ Domain: "blogger.com" }, { Domain: "stackoverflow.com" }];
function fetchTitle(url, onComplete = null) {
request(url, function (error, response, body) {
var output = url; // default to URL
if (!error && (response && response.statusCode) === 200) {
var $ = cheerio.load(body);
console.log(`URL = ${url}`);
var title = $("head > title").text().trim();
console.log(`Title = ${title}`);
output = `[${title}] (${url})`;
var keywords = ["Developers", "blog"];
var results = [];
var UrlArray = [];
for (var i = 0; i < keywords.length; i++) {
var match = title.match(new RegExp(keywords.join("|"), "g"));
results.push(keywords[i]);
}
if (match.length > 0) {
UrlArray.push({
Domain: url,
Keywords: results,
Title: output,
});
finalJsonData = JSON.stringify(UrlArray);
const ws = xlsx.utils.json_to_sheet(UrlArray);
const wb = xlsx.utils.book_new();
xlsx.utils.book_append_sheet(wb, ws, "Responses");
xlsx.writeFile(wb, "output.xlsx");
}
} else {
console.log(
`Error = ${error}, code = ${response && response.statusCode}`
);
}
console.log(`output = ${output} \n\n`);
if (onComplete) onComplete(output);
});
}
jsonData.forEach(function (table) {
var tableName = table.Domain;
var URL = "http://" + tableName;
fetchTitle(URL);
});
When I execute the script, I am able to get the title, but when I compare it with the keyword, it is not working as expected. Keywords are not being stored. You can see how the output looks after executing the script.
The script shows that both domains have keywords, but only blogger is stored in the spreadsheet, even then keywords aren't stored
you're overwriting the file on each loop,
keywords is an array, so it doesn't get saved, furthermore, keywords column will always contain all keywords, not the matching ones...
as requests are async, you need to track them all, and write results only when all requests are finished.
try this:
match case insensitive, and store only matching keywords for that site, not all (I also added "no match" for domains with no match)
store results outside the loop
move writing results into a separate function
add request counter and callback to track requests
write results when requests are done
the code:
var request = require("request");
var cheerio = require("cheerio");
const xlsx = require("xlsx");
const jsonData = [{ Domain: "blogger.com" }, { Domain: "stackoverflow.com" }];
var UrlArray = [];
function writeResults() {
const finalJsonData = JSON.stringify(UrlArray);
const ws = xlsx.utils.json_to_sheet(UrlArray);
const wb = xlsx.utils.book_new();
xlsx.utils.book_append_sheet(wb, ws, "Responses");
xlsx.writeFile(wb, "output.xlsx");
}
function fetchTitle(url, onComplete = null) {
request(url, function (error, response, body) {
var output = url; // default to URL
if (!error && (response && response.statusCode) === 200) {
var $ = cheerio.load(body);
console.log(`URL = ${url}`);
var title = $("head > title").text().trim();
console.log(`Title = ${title}`);
output = `[${title}] (${url})`;
var keywords = ["Developers", "blog"];
var results = [];
for (var i = 0; i < keywords.length; i++) {
let match = title.match(new RegExp(keywords[i], "gi"));
if (match && match.length > 0) {
results.push(keywords[i]);
}
}
UrlArray.push({
Domain: url,
Keywords: results.length > 0 ? results.join(', ') : 'no match',
Title: output,
});
} else {
console.log(
`Error = ${error}, code = ${response && response.statusCode}`
);
}
console.log(`output = ${output} \n\n`);
if (onComplete) onComplete(output);
});
}
let counter = 0;
jsonData.forEach(function (table) {
var tableName = table.Domain;
var URL = "http://" + tableName;
fetchTitle(URL, ()=>{
counter++;
if(counter === jsonData.length) {
console.log(`all ${counter} requests done`);
writeResults();
}
});
});
I wrote an app that displays images by ID and all the images save and located in S3.
I have 2 instances and 1 LB in my AWS machine and the index.js is located in both instances.
In my index.js I wrote the path to my S3 bucket and I get an error message like this :
Blockquote
This XML file does not appear to have any style information associated with it. The document tree is shown below.
The code in index.js :
//Get Images Names by Id / Color from MongoDB
var mongoose = require('mongoose');
var db = mongoose.connect('mongodb://db_usr:db_pass#ds023550.mlab.com:23550/db_ringapp2016_g');
var userSchema = require('./1_define_schema');
var size = 0;
var express = require('express'),
url = require('url'),
app = express();
var fs = require('fs');
var http = require('http');
var Upload = require('s3-uploader');
var restify = require('restify');
app.listen(process.env.PORT || 3000);
// connection error
mongoose.connection.once('error', function (err) {
console.log('connectiob error' + err);
});
//connecting to DB
mongoose.connection.once('open', function () {
console.log("============================");
console.log("Connected Successfully to DB");
console.log("============================");
userSchema.find({}, function(err, user){
if(err) throw err;
//first route - call first get function from WS.
app.get('/AllPictures' ,
function (req, res) {
console.log("DB: Get all Pictures");
res.status(200).json(user[0].Name);
});
})
//second route - call second function from WS (by Id).
app.get('/PicById/:Id/:Size', function (req, res) {
userSchema.find({Id:req.params.Id} , function(err, user){
if(err) throw err;
console.log("DB: Get Pic by ID: "+req.params.Id+" from MongoDB");
console.log("Name Pic: "+user[0].Name);
console.log("Size Pic: "+req.params.Size);
var temp = user[0].Name.split(".");
var result = temp[0]+req.params.Size+"."+temp[1];
console.log(result);
res.send('<!DOCTYPE HTML><html><head></head><body><img src="https://s3-us-west-2.amazonaws.com/galshaharbucket/'+result+'"></body></html>');
console.log("============================");
})
})
//thired route - call thired get from WS (by Color).
app.get('/PicByColor/:Color', function (req, res) {
userSchema.find({Color:req.params.Color}, function(err, user){
if(err) throw err;
var names = [];
console.log("DB: Get Pic by Color: "+req.params.Color+" from MongoDB");
console.log("--------------------");
for(var i = 0; i<user.length; ++i){
var temp = user[i].Name.split(".");
names [i] = temp[0]+"S."+temp[1];
console.log("Name Pic: "+names [i]);
console.log("--------------------");
}
var temp = [];
for(var i = 0; i<user.length; ++i){
temp [i] = '<img src="https://s3-us-west-2.amazonaws.com/galshaharbucket/'+names [i]+'">'
}
var result = temp.join("");
res.send('<!DOCTYPE HTML><html><head></head><body>'+result+'</body></html>');
console.log("============================");
})
})
app.get('/GetAllPictures', function (req, res) {
userSchema.find({}, function(err, user){
if(err) throw err;
var names = [];
console.log("DB: Get All Pictures from MongoDB");
for(var i = 0; i<user.length; ++i){
var temp = user[i].Name.split(".");
names [i] = temp[0]+"S."+temp[1];
}
var temp = [];
for(var i = 0; i<user.length; ++i){
temp [i] = '<img src="https://s3-us-west-2.amazonaws.com/galshaharbucket/'+names [i]+'">'
}
var result = temp.join("");
res.send('<!DOCTYPE HTML><html><head></head><body>'+result+'</body></html>');
console.log("============================");
})
})
// Upload an image
app.get('/Upload/:Path', function (req, res) {
var knox = require('knox').createClient({
key: 'AKIAJ4ROKKJBECGFSYIA'
, secret: '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'
, bucket: 'galshaharbucket'
});
var file = req.params.Path;
console.log(file);
var upload_name = "upload_"+ file; // or whatever you want it to be called
knox.putFile(file, upload_name, {
"Content-Type": "image/jpeg"
}, function (err, result) {
if (err != null) {
return console.log(err);
} else {
console.log("Uploaded to amazon S3");
console.log("--------------------");
}
});
})
});
// The function that recieve the name from mongo and display it
function getImageById(){
Input = document.getElementById("imageId");
Size = document.getElementById("imageSize");
size=Size.value;
alert(size);
if(Input.value==""){
alert("Please Enter Id Number Between 1-33");
return;
}
url = "https://s3-us-west-2.amazonaws.com/galshaharbucket/PicById/"+Input.value+"/"+Size.value;
//url = 'https://s3-us-west-2.amazonaws.com/galshaharbucket/'+name;
if(Size.value=="L"){
popupWindow = window.open(
url,'popUpWindow','height=658,width=1120,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
if(Size.value=="M"){
popupWindow = window.open(
url,'popUpWindow','height=525,width=820,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
if(Size.value=="S"){
popupWindow = window.open(
url,'popUpWindow','height=330,width=520,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
}
function getImageByColor(str){
Input = document.getElementById("imageColor");
if(Input.value==""){
alert("Please Enter a Color: red / green / blue / yellow");
return;
}
else{
path = "https://s3-us-west-2.amazonaws.com/galshaharbucket/PicByColor/"+Input.value;
popupWindow = window.open(
path,'popUpWindow','height=608,width=1020,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
}
function uploadImage(){
var fileChooser = document.getElementById('path');
var results = document.getElementById('results');
var file = fileChooser.files[0];
alert(file.name);
path = "https://s3-us-west-2.amazonaws.com/galshaharbucket/Upload/"+file.name+"";
popupWindow = window.open(
path,'popUpWindow','height=608,width=1020,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
function getAllPics(){
path = "https://s3-us-west-2.amazonaws.com/galshaharbucket/GetAllPictures";
popupWindow = window.open(
path,'popUpWindow','height=608,width=1020,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
}
How I can display the image correctly without an error ? What can cause to this error ?
Thank you,
Tom
Edit your bucket policy and make sure to have something like (or use AWS policy generator under the Bucket Permission section).
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "PublicReadGetObject",
"Effect": "Allow",
"Principal": "*",
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::galshaharbucket/*"
}
]
}
if galshaharbucket is your bucket name so it grants everyone access to the objects in the specified folder.
you can read more about bucket policies
I am writing an express app to generate a google map from geo coordinates out of photos. I am attempting to use firebase to save data about the images. The code is fully working except when I save the photo data to firebase it breaks the map rendering on the next page showing connection errors to all my local files in the console like so
So the page is rendering but the map doesn't load and nor do the images. The data I am saving to firebase is actually saving though, and If I remove the function that saves the data to firebase everything works as expected. I think it may have something to do with the way the response is being pushed but I am at a loss. In any other page where I am saving data to firebase it works fine.
Here is the code for the route that is generating the photo data and saving it to firebase:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
router.post("/:campaignId", stormpath.loginRequired, function(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
function saveImageInfo(finalImages, callback) {
var campaignId = req.param('campaignId');
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + campaignId + '/photos');
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
count++;
if (finalImages.length === count) {
callback(finalImages);
} else {
return;
}
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
saveImageInfo(finalImages, function(finalImages) {
var campaignId = req.param('campaignId');
console.log(res.req.next);
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: finalImages,
campaignId: campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
});
});
}
});
module.exports = router;
This is the only file I have written trying to push multiple objects to firebase. This is my first time using Firebase and Stormpath so any help would be greatly appreciated. Also one other thing that may be helpful is the error from the terminal being output when the issue happens:
POST /uploaded/-JapMLDYzPnbtjvt001X 200 690.689 ms - 2719
/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24
?a:null}function Db(a){try{a()}catch(b){setTimeout(function(){throw b;},Math.f
^
TypeError: Property 'next' of object #<IncomingMessage> is not a function
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:899:25)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:532:5)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:16:18
at /Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:25:533
at Db (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:24:165)
at Ye (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:124:216)
at Ze (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/firebase/lib/firebase-node.js:123:818)
UPDATE: It seems that the connection errors are inconsistent. Sometimes the images display just fine, sometimes only some of the images get a connection error, and other times everything including the google map script gets a connection error. This is really throwing me off no idea what the issue is. Any help or suggestions is greatly appreciated!
UPDATE 2: I changed the function saving the image data to firebase to use the firebase push function callback (to indicate completion) and added a length check on the forEach loop running to save each image's data. See updated code above. I am now getting the following error for each image that is uploaded in the terminal, but the connection errors are gone:
Error: Can't set headers after they are sent.
at ServerResponse.OutgoingMessage.setHeader (http.js:689:11)
at ServerResponse.header (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:666:10)
at ServerResponse.send (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:146:12)
at fn (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:900:10)
at View.exports.renderFile [as engine] (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/jade/lib/jade.js:325:12)
at View.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/view.js:93:8)
at EventEmitter.app.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/application.js:530:10)
at ServerResponse.res.render (/Users/jpribesh/Desktop/Code/BanditSignBoss/node_modules/express/lib/response.js:904:7)
at /Users/jpribesh/Desktop/Code/BanditSignBoss/routes/campaigns.js:20:25
at Array.forEach (native)
OK I finally figured out the issue here. I did a few things to remedy my problem. First I converted the route to use next properly to separate out each part of the route out, it processes the images, then saves, then renders. Here is the updated code from that file:
var express = require('express');
var router = express.Router();
var util = require('util');
var fs = require('fs');
var im = require('imagemagick');
var stormpath = require('express-stormpath');
var _ = require('lodash')
var Firebase = require('firebase');
function processData(req, res, next) {
function gatherImages(files, callback) {
//accept single image upload
if (!_.isArray(files)) {
files = [files];
}
var uploads = [];
var count = 0;
files.forEach(function(file) {
fs.exists(file.path, function(exists) {
if (exists) {
var name = req.body[file.originalname];
console.log(name);
var path = file.path;
var upFile = file.name;
uploads.push({
file: upFile,
imgPath: path,
caption: name || 'no comment'
});
count++;
}
if (files.length === count) {
callback(uploads);
}
});
});
}
function getGeoLoc(path, callback) {
im.readMetadata('./' + path, function(error, metadata) {
var geoCoords = false;
if (error) throw error;
if (metadata.exif.gpsLatitude && metadata.exif.gpsLatitudeRef) {
var lat = getDegrees(metadata.exif.gpsLatitude.split(','));
var latRef = metadata.exif.gpsLatitudeRef;
if (latRef === 'S') {
lat = lat * -1;
}
var lng = getDegrees(metadata.exif.gpsLongitude.split(','));
var lngRef = metadata.exif.gpsLongitudeRef;
if (lngRef === 'W') {
lng = lng * -1;
}
var coordinate = {
lat: lat,
lng: lng
};
geoCoords = coordinate.lat + ' ' + coordinate.lng;
console.log(geoCoords);
}
callback(geoCoords);
});
}
function getDegrees(lat) {
var degrees = 0;
for (var i = 0; i < lat.length; i++) {
var cleanNum = lat[i].replace(' ', '');
var parts = cleanNum.split('/');
var coord = parseInt(parts[0]) / parseInt(parts[1]);
if (i == 1) {
coord = coord / 60;
} else if (i == 2) {
coord = coord / 3600;
}
degrees += coord;
}
return degrees.toFixed(6);
}
function processImages(uploads, callback) {
var finalImages = [];
var count = 0;
uploads.forEach(function(upload) {
var path = upload.imgPath;
getGeoLoc(path, function(geoCoords) {
upload.coords = geoCoords;
finalImages.push(upload);
count++;
if (uploads.length === count) {
callback(finalImages);
}
});
});
}
if (req.files) {
if (req.files.size === 0) {
return next(new Error("Why didn't you select a file?"));
}
gatherImages(req.files.imageFiles, function(uploads) {
processImages(uploads, function(finalImages) {
req.finalImages = finalImages;
req.campaignId = req.param('campaignId');
next();
});
});
}
}
function saveImageInfo(req, res, next) {
var user = res.locals.user;
var count = 0;
var campaignPhotosRef = new Firebase('https://vivid-fire-567.firebaseio.com/BSB/userStore/' + user.username + '/campaigns/' + req.campaignId + '/photos');
var finalImages = req.finalImages;
finalImages.forEach(function(image) {
campaignPhotosRef.push(image, function(err) {
if (err) {
console.log(err);
} else {
console.log('Data saved successfully: ' + image);
count++;
if (req.finalImages.length === count) {
next();
}
}
});
});
}
router.post("/:campaignId", stormpath.loginRequired, processData, saveImageInfo, function(req, res) {
res.render("uploadMapPage", {
title: "File(s) Uploaded Successfully!",
files: req.finalImages,
campaignId: req.campaignId,
scripts: ['https://maps.googleapis.com/maps/api/js?key=AIzaSyCU42Wpv6BtNO51t7xGJYnatuPqgwnwk7c', '/javascripts/getPoints.js']
});
});
module.exports = router;
Then I realized in the tracestack I included in my question part of it was tracing back to another file I was using firebase in. I was using a call to .on() instead of using .once() when pulling my data. After reorganizing my route and changing all my calls to .on to .once for firebase data everything is now working properly. I think the real issue here was the use of .on() on my firebase calls instead of .once() as the .on() watches for events continually rather than .once which obviously only watches for it once.
I am using express.js, mongoose, jquery and socket.io
I am trying to pass the object "allFightScores" to the socket on clientside. Here is where I am requesting information from mongoose in my routes/index.js:
var models = require('../models/index.js');
var passport = require('passport');
var crawl = require('../crawler.js');
var flash = require('connect-flash');
var express = require('express');
var app = express();
var server = require('http').createServer(app);
var io = require('socket.io').listen(server);
exports.submit_scores = function(req, res){
var scored_fight = new models.UserScore({
"f1": req.body.f1,
"f2": req.body.f2,
"f1_roundScores": req.body.f1_roundScores,
"f2_roundScores": req.body.f2_roundScores,
"f1_score": req.body.f1_score,
"f2_score": req.body.f2_score,
"user_email": req.body.user_email
});
models.UserScore.find({
"f1": scored_fight.f1,
"f2": scored_fight.f2,
"f1_score": scored_fight.f1_score,
"f2_score": scored_fight.f2_score,
"user_email": scored_fight.user_email
}, function(err, data){
if (data.length === 0){
scored_fight.save(function(err, user_fight){
if (err) {
return "error";
}
else {
models.UserScore.find({"f1": user_fight.f1, "f2": user_fight.f2}, function(err, allFightScores){
console.log("from index-routes " +allFightScores);
io.sockets.emit('show scores', allFightScores)
})
}
})
//put a callback on the user_scored_fight data, also emit that data with the average scores;
res.json(scored_fight);
}
else if (data[0].f1 === scored_fight.f1 && data[0].f2 === scored_fight.f2 && data[0].user_email === scored_fight.user_email) {
res.json(200);
console.log("data already judged.");
}
})
}
Here is where I am catching the data on my clientside (public/javascripts/script.js):
jQuery(function($){
socket = io.connect();
var $group_f1_score = $('#gf1_score');
var $group_f2_score = $('#gf2_score');
socket.on('show scores', function(mongooseData){
console.log("mongooseData from scripts " + mongooseData)
$group_f1_score.empty();
$group_f2_score.empty();
//sum fighter scores for all user submissions
var f1_sumScore = 0;
var f2_sumScore = 0;
for (var i = 0; i < mongooseData.length; i++){
f1_sumScore += mongooseData[i].f1_score;
f2_sumScore += mongooseData[i].f2_score;
}
//get the simple average
var f1_avgScore = f1_sumScore/mongooseData.length;
var f2_avgScore = f2_sumScore/mongooseData.length;
$group_f1_score.append(f1_avgScore);
$group_f2_score.append(f2_avgScore);
})
})
I am not sure why the data is not emitting to my clientside and am out of ideas. Am I querying the data and passing it in the callback correctly?
I can't see this in your code:
server.listen(port);
I am just learning server-side JavaScript so please bear with any glaring mistakes I've made.
I am trying to write a file parser that operates on HTML files in a directory and returns a JSON string once all files have been parsed. I started it with a single file and it works fine. it loads the resource from Apache running on the same machine, injects jquery, does the parsing and returns my JSON.
var request = require('request'),
jsdom = require('jsdom'),
sys = require('sys'),
http = require('http');
http.createServer(function (req, res) {
request({uri:'http://localhost/tfrohe/Car3E.html'}, function (error, response, body) {
if (!error && response.statusCode == 200) {
var window = jsdom.jsdom(body).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
// jQuery is now loaded on the jsdom window created from 'body'
var emps = {};
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var name = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = name.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2];
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
emps = JSON.stringify(emps);
//console.log(emps);
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(emps);
});
} else {
res.writeHead(200, {"Content-Type": "text/plain"});
res.end("empty");
//console.log(response.statusCode);
}
});
}).listen(8124);
Now I am trying to extend this to using the regular file system (fs) and get all HTML files in the directory and parse them the same way and return a single combined JSON object once all files have been parsed. Here is what I have so far but it does not work.
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
emps = {};
//path = '/home/inet/www/media/employees/';
readDirectory = function(path) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
In this particular case, there are 2 html files in the directory. If i console.log(emps) during the htmlfiles.forEach() it shows me the results from the first file then the results for both files together the way I expect. how do I get emps to be returned to readDirectory so i can output it as desired?
Completed Script
After the answers below, here is the completed script with a httpServer to serve up the detail.
var sys = require('sys'),
fs = require("fs"),
http = require('http'),
jsdom = require('jsdom'),
emps = {};
var timed = setInterval(function() {
emps = {};
readDirectory('/home/inet/www/media/employees/', function(emps) {
});
}, 3600000);
readDirectory = function(path, callback) {
fs.readdir(path, function(err, files) {
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
var count = htmlfiles.length;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
var imagecount = jquery("tr td img").length;
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step += 1;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
count -= 1;
if (count <= 0) {
callback(JSON.stringify(emps));
}
});
});
});
});
}
var init = readDirectory('/home/inet/www/media/employees/', function(emps) {
});
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end(JSON.stringify(emps));
}).listen(8124);
That sure is a lot of code a couple of mistakes.
You're never calling the callback function you supply to readDirectory
You need to keep track of the files you have parsed, when you parsed all of them, call the callback and supply the emps
This should work:
var sys = require("sys"),
fs = require("fs"),
jsdom = require("jsdom"),
//path = '/home/inet/www/media/employees/';
// This is a nicer way
function readDirectory(path, callback) {
fs.readdir(path, function(err, files) {
// make this local
var emps = {};
var htmlfiles = [];
files.forEach(function(name) {
if(name.substr(-4) === "html") {
htmlfiles.push(name);
}
});
// Keep track of the number of files we have parsed
var count = htmlfiles.length;
var done = 0;
htmlfiles.forEach(function(filename) {
fs.readFile(path + filename, "binary", function(err, data) {
if(err) throw err;
window = jsdom.jsdom(data).createWindow();
jsdom.jQueryify(window, 'http://ajax.googleapis.com/ajax/libs/jquery/1.4.4/jquery.min.js', function (window, jquery) {
jquery("tr td img").parent().parent().each(function(){
var step = 0;
jquery(this).children().each(function(index){
if (jquery(this).children('img').attr('src') !== undefined) {
step++;
var empname = jquery(this).parent().next().next().children('td:nth-child('+step+')').children().children().text();
var name_parts = empname.split(",");
var last = name_parts[0];
var name_parts = name_parts[1].split(/\u00a0/g);
var first = name_parts[2]
emps[last + ",_" + first] = jquery(this).children('img').attr('src');
}
});
});
// As soon as all have finished call the callback and supply emps
done++;
if (done === count) {
callback(emps);
}
});
});
});
});
}
readDirectory('/home/inet/www/media/employees/', function(emps) {
console.log(emps);
});
You seem to be doing this a tad wrong
readDirectory('/home/inet/www/media/employees/', function() {
console.log(emps);
});
But you've defined your function as:
readDirectory = function(path) {
Where is the callback argument? Try this:
readDirectory = function(path, callback) {
then under emps[last + ",_" + first] = jquery(this).children('img').attr('src'); put
callback.call(null, emps);
Your callback function will be called however many times your loop goes on for. If you want it to return all of them at once, you'll need to get a count of how many times the loop is going to run for, count up until that number then call your callback when the emps array is full of the data you need.