I'm trying to scrape data from a word document with node.js.
My current problem is that the below console log will return the value inside the juice block as the appropriate varaible. If I move that to outside the juice block it is completely lost. I tried putting return
function getMargin(id, content){
var newMargin = content.css("margin-left");
if(newMargin === undefined){
var htmlOfTarget = content.toString(),
whereToCut = theRaw.indexOf("<div class=WordSection1>");
fs.writeFile("bin/temp/temp_" + id + ".htm", theRaw.slice(0, whereToCut) + htmlOfTarget + "</body> </html>", function (err){
if (err) {
throw err;
}
});
juice("bin/temp/temp_" + id + ".htm", function (err, html) {
if (err) {
throw err;
}
var innerLoad = cheerio.load(html);
newMargin = innerLoad("p").css("margin-left");
console.log(newMargin); // THIS newMargin AS VALUE
});
}
console.log(newMargin);//THIS RETURNS newMargin UNDEFINED
return newMargin;
}
I think the problem lies with fs.write and juice being Asyc functions. I just have no idea how to get around it. I have to be able to call getMargin at certain points, in a sequential order.
As mentioned in comment, change your program flow to run in callbacks, after async code has completed...
// accept callback as parameter, and run it after async methods complete...
function getMargin(id, content, callback){
var newMargin = content.css("margin-left");
if(newMargin === undefined){
var htmlOfTarget = content.toString(),
whereToCut = theRaw.indexOf("<div class=WordSection1>");
fs.writeFile("bin/temp/temp_" + id + ".htm", theRaw.slice(0, whereToCut) + htmlOfTarget + "</body> </html>", function (err){
if (err) {
throw err;
}
// move the juice call inside the callback of the file write operation
juice("bin/temp/temp_" + id + ".htm", function (err, html) {
if (err) {
throw err;
}
var innerLoad = cheerio.load(html);
newMargin = innerLoad("p").css("margin-left");
console.log(newMargin); // THIS newMargin AS VALUE
// now run the callback passed in the beginning...
callback();
});
});
}
}
// call getMargin with callback to run once complete...
getMargin("myId", "myContent", function(){
// continue program execution in here....
});
Related
I am closing my writing connection in a "finally" block but even after going into this block the program seems to go back into the try block.
Here is the code :
function printAllAssetsAndThreats(startX, startY) {
try {
con.query("SELECT * FROM Kunde1Assets;", function (err, result, fields) {
for (var i in result) {
console.log("b");
myDoc.font('Helvetica-Bold')
.fontSize(20)
.text('str', startX, startY);
var xxx = result[i].KundenAssetID;
startY = startY + 50;
//jetzt jeweils alle gefährdungen:
var sql = "SELECT DISTINCT c.AID, b.GID, b.Name, c.Name AS Asset FROM Kunde1Verbindungen a, Gefährdungen b, Kunde1Assets c WHERE a.KundenAssetID = \"" + xxx + " \"AND a.GID = b.GID AND c.KundenAssetID = a.KundenAssetID";
con.query(sql, function (err, result2, fields) {
for (var i2 in result2) {
}
});
///////////////////
}
});
} catch (e) {
} finally {
console.log("a");
end();
}
}
So even after printing "a" there´ll be "b" printed in the console.
Thank you for your help !!!
Tim
EDIT
I have tried rewriting the code f.e. with callbacks but it´s still not working as it is supposed to do
start(()=>{
myDoc.end();
});
function start(callback){
console.log("hello");
myDoc.font('Helvetica-Bold')
.fontSize(20)
.text('str', 44,44);
con.query("SELECT * FROM Kunde1Assets;", function (err, result, fields) {
console.log("hello");
var counter=0;
for(var i in result){
console.log("hello");
start2(result[i].KundenAssetID, ()=>{});
counter++;
if (counter==result.length){
console.log("yo");break;callback();
}
if (counter==result.length){
console.log("yo");callback();
}
}
//
});
}
function start2(kaid, callback){
var sql = "SELECT DISTINCT c.AID, b.GID, b.Name, c.Name AS Asset FROM Kunde1Verbindungen a, Gefährdungen b, Kunde1Assets c WHERE a.KundenAssetID = \"" + kaid + " \"AND a.GID = b.GID AND c.KundenAssetID = a.KundenAssetID";
con.query(sql, function (err, result2, fields) {
for (var i2 in result2) {
console.log(kaid +"---"+ result2[i2].Name);
myDoc.font('Helvetica-Bold')
.fontSize(20)
.text('str', 44,44);
}
});
}
EDIT:
Tried it with promises (not working yet)
myDoc.pipe(fs.createWriteStream('node.pdf'));
var promise = start();
promise.then(function(result){
console.log(result);
myDoc.end();
})
function start(){
return new Promise (function(resolve, reject){
con.query("SELECT * FROM Kunde1Assets;", function (err, result, fields) {
console.log(result);
for (var i in result){
myDoc.font('Helvetica-Bold')
.fontSize(20)
.text(result[i].Name, 30, 20+(i*30));
var promise2 = start2 (result[i].KundenAssetID);
promise.then(function(name){
for (var i2 in name){
myDoc.font('Helvetica-Bold')
.fontSize(20)
.text('result[i].Name', 30, 20+(i2*30));
}
});
}
resolve(result);
if (Error) reject();
});
});
}
function start2(kaid){
return new Promise(function( resolve, reject){
var sql = "SELECT DISTINCT c.AID, b.GID, b.Name, c.Name AS Asset FROM Kunde1Verbindungen a, Gefährdungen b, Kunde1Assets c WHERE a.KundenAssetID = \"" + kaid + " \"AND a.GID = b.GID AND c.KundenAssetID = a.KundenAssetID";
con.query(sql, function (err, result2, fields) {
for (var i2 in result2) {
//console.log(kaid +"---"+ result2[i2].Name);
}
resolve(result2);
if (Error) reject();
});
});
}
Your try/catch/finally block is running synchronously. It will hit the finally within the first tick of the JS event loop. However (I assume, based on the structure of the callback), the con.query() function is asynchronous. That means that it will run in at least the tick after your finally. Therefore, this will always run after everything inside the finally.
If you want it to work as expected, look into asynchronous methods of achieving the try/catch/finally structure. There are million ways to skin a cat in regards to this, the async library, promises, etc. Or just keep any code you want to run after the connection inside the connection callback.
Either way, the issue is a combination of sync and async code running alongside each other.
I'm trying to generate a hashchain using the following code:
var async = require('async');
var _ = require('lodash');
var offset = 1e7;
var games = 1e7;
var game = games;
var serverSeed = '238asd1231hdsad123nds7a182312nbds1';
function loop(cb) {
var parallel = Math.min(game, 1000);
var inserts = _.range(parallel).map(function() {
return function(cb) {
serverSeed = genGameHash(serverSeed);
game--;
query('INSERT INTO `hash` SET `hash` = ' + pool.escape(serverSeed));
};
});
async.parallel(inserts, function(err) {
if (err) throw err;
// Clear the current line and move to the beginning.
var pct = 100 * (games - game) / games;
console.log('PROGRESS: ' + pct.toFixed(2) + '%')
if (game > 0){
loop(cb);
}else {
console.log('Done');
cb();
}
});
}
loop(function() {
console.log('Finished with SEED: ', serverSeed);
});
When I run this code it generates a hash chain of 1k hash's, while I'm trying to generate a chain of 1m hash's. It seems like async isn't working properly, but I have no idea why, there are no errors in console, nothing that points out a flaw.
Any ideas?
Do you can run it with smaller games (about 3000)?
Your parallel function nerver send done signal because the callback of inserts item never trigged. I think query function has two pramasters query(sql: string, callback?: (err, result) => void) (Typescript style).
I suggest you change your logic and flow like below block code:
var inserts = _.range(parallel).map(function() {
return function(cb) {
serverSeed = genGameHash(serverSeed);
query('INSERT INTO `hash` SET `hash` = ' + pool.escape(serverSeed), function(err, result) {
if(result && !err) {
game--;
}
cb(); // remember call the callback
});
};
});
In your code, you have used async.parallel, I think it is not good idea, too many connection has be open(1m). Recommeded for this case is parallelLimit
I posted a question before and realized my problem actually was async functions. I managed to work out most of it, but I got one little problem left. Using async I used waterfall to create an order for the some queries...
exports.getMenu = function(id_restaurant, callback){
async.waterfall([
async.apply(firstQuery, id_restaurant),
secondQuery,
thirdQuery,
fourthQuery,
formMenu
], function(err, result){
if(err){
console.log(err);
}
callback(result);
});
};
Everything works until fourthQuery, where I have to loop to get all dishes of a menu.
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if(err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes[i] = results2;
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
};
So what i can see that happens from printing the value of 'i' each loop is that it always has the value of 2. Because that's 'size' value. Also, even though it's saving results of index '2' I believe the callback is being done even before the for loop is done, because my fifth function is recieving an empty array.
How can i make my code wait to callback until my for loop is done?
NOTE: Sorry, part of my code is in spanish, tried to translate the important parts of it.
There are a few ways to handle this, one is to look into promise architecture. Promise.all will let you supply one callback to handle the values from each child promise.
To use what you've already got, however, I'd push the values into your dishes array, rather than assigning them specifically to i indexes, then check the size of that array at the end of each connection. When the array length matches the size, fire the callback. (as seen below)
If you need a way to tie each result to that specific i value, I'd recommend pushing them as an object
dishes.push({'index': i, 'dish': results2})
Afterward, if you need the array of just dishes, you can sort the array by that index value and run a map function.
dishes.sort(function(a,b){ return a.index - b.index; })
dishes = dishes.map(function(a){ return a.dish })
Here's the code adjusted:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback) {
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if (err) {
console.log(err);
callback(true);
return;
}
for (var i = 0; i < size; i++) {
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [array_secciones[i]],
function(err, results2) {
if (err) {
console.log(err);
callback(true);
return;
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2)
if(dishes.length == size){
console.log("I'm sending " + dishes);
callback(null, dishes, array_nombresSecc)
}
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[i]));
// this prints the result but only if it has a value over 2
});
};
}); // pool
;
};
Since you're already using the async, I would suggest replacing the for() loop in fourthQuery with async.each().
The updated fourthQuery would look like this:
function fourthQuery(array_totalP, array_nombresSecc, array_secciones, callback){
var size = array_nombresSecc.length;
var array_secciones = array_secciones;
var array_nombresSecc = array_nombresSecc;
var dishes = [];
pool.getConnection(function(err, connection) {
if(err) {
console.log(err);
callback(true);
return;
}
async.each(array_secciones,
function(item, itemCallback) {
// Function fun for each item in array_secciones
connection.query("SELECT name, price FROM menu_product WHERE id_seccion = ? AND active = 1", [item],
function(err, results2) {
if(err) {
console.log(err);
return itemCallback(true);
}
console.log("Result query 4 " + JSON.stringify(results2));
dishes.push(results2);
console.log("VALOR PLATILLOS EN i : " + JSON.stringify(dishes[dishes.length-1]));
// this prints the result but only if it has a value over 2
return itemCallback();
});
},
function(err) {
// Function run after all items in array are processed or an error occurs
console.log("I'm sending " + dishes); // this logs an empty array
callback(null, dishes, array_nombresSecc);
});
}); // pool
};
Alternatively, you can use async.map(), which handles gathering the results in the final callback so doesn't rely on the dishes variable.
Is any possible to increment var licznik in this block of code?
I try sth like this, But always receives 0. Could someone explain me what I'm doing wrong?
rows.forEach(function(record) {
var licznik = 0;
var offer = manager.createOffer('76561198252553560');
inventory.forEach(function(item) {
if(licznik <= record.amount) {
if(item.market_hash_name == record.real_name) {
var asid = item.assetid;
(function(licznik){
connection.query('SELECT count(id) as wynik FROM used where asset_id = \'' + asid + '\'', function(err, wiersze) {
if (wiersze[0].wynik == 0) {
var employee = {
asset_id: asid,
trans_id: record.tid
};
connection.query('INSERT INTO used SET ?', employee, function(err, res) {
if (err) throw err;
offer.addMyItem(item);
console.log(licznik);
&licznik++;
});
}
});
})(licznik);
}
}
});
});
As the comment on your original question points out, I have no context for what this code is actually trying to accomplish. What I can tell you is that your callbacks supplied to connection.query are NOT fired on each iteration of the forEach. The whole reason connection.query takes a callback is because you don't know when the operation will complete. Node is designed to be asynchronous so all it does on each iteration of the forEach loop is begin the query. The callback supplied to the query could be invoked at any time which also means that a query that fired after another query could potentially fire its callback before the callback from the first query. It just depends on how long each query takes.
If you need licznik to be incremented on every iteration of the forEach then you need to increment it after your if statement.
rows.forEach(function(record) {
var licznik = 0;
var offer = manager.createOffer('76561198252553560');
inventory.forEach(function(item) {
if(licznik <= record.amount) {
// .... omitted for brevity
}
licznik++; // <-- increment here, outside of the closure.
});
});
Again, I have zero clue what you're actually trying to do with that variable so this may not solve your real problem, but that's the way to get it to increment in that loop.
PS - You may not be understanding that you actually have two licznik variables here. You create a new one when you wrap all your logic in a closure function like you did. If you change the variable declared at the top of your closure function you'll see that it's not the same variable as the one outside the closure function.
rows.forEach(function(record) {
var licznik = 0;
var offer = manager.createOffer('76561198252553560');
inventory.forEach(function(item) {
if(licznik <= record.amount) {
if(item.market_hash_name == record.real_name) {
var asid = item.assetid;
(function(licznik2) { // <-- notice this is a new variable
connection.query('SELECT count(id) as wynik FROM used where asset_id = \'' + asid + '\'', function(err, wiersze) {
if (wiersze[0].wynik == 0) {
var employee = {
asset_id: asid,
trans_id: record.tid
};
connection.query('INSERT INTO used SET ?', employee, function(err, res) {
if (err) throw err;
offer.addMyItem(item);
console.log(licznik2);
licznik2++;
});
}
});
})(licznik);
}
}
});
});
UPDATED CODE: i, I'm new to Javascript programming and getting an undefined variable when trying to assign a new variable from a method.
I'm using node.js and creating a redis server using the redis-client in the "client variable".
var redis = require("redis");
var client = redis.createClient();
client.on("error", function (err) {
console.log("Error " + err); });
var numberPosts;
client.get("global:nextPostId", function(err, replies) {
numberPosts = replies;
console.log(numberPosts);
});
console.log(numberPosts);
When I call console.log inside the call back function it returns the proper value, however when I call the console.log outside of the callback function it returns "undefined". I'm trying to assign the value that is inside the callback function to the global variable numberPosts.
Any help is much appreciated, thanks.
Matt
I believe this will work:
client.get("global:nextPostId", function (err, reply) {
console.log("Number of posts: " + reply.toString());
})
The AJAX call is asynchronous so it doesn't have return value.. instead you have to use callback function and only there you have the value returned by the server method.
Edit: to assign the return value to global variable, first declare global variable:
var _numOfPosts = "";
Then:
client.get("global:nextPostId", function (err, reply) {
_numOfPosts = reply.toString());
})
However, the value won't be available until the AJAX call is finished so your original code can't work. There is not direct return value to store.
You can set timer to some reasonable response time, then have the code using the global variable in there.
Edit II: in order to call the method again once it's finished, have such code:
var _nextPostCallCount = 0;
function GetNextPost() {
//debug
console.log("GetNextPost called already " + _nextPostCallCount + " times");
//sanity check:
if (_nextPostCallCount > 1000) {
console.log("too many times, aborting");
return;
}
//invoke method:
client.get("global:nextPostId", function(err, replies) {
numberPosts = parseInt(replies.toString(), 10);
console.log("num of replies #" + (_nextPostCallCount + 1) + ": " + numberPosts);
//stop condition here.... for example if replies are 0
if (!isNaN(numberPosts) && numberPosts > 0)
GetNextPost();
});
//add to counter:
_nextPostCallCount++;
}
GetNextPost();
This will call the method over and over until the result is 0 or you pass some hard coded limit to prevent endless loop.
Try this instead to see errors:
var redis = require("redis");
client = redis.createClient();
client.on("error", function (err) {
console.log("Error " + err); });
//note the error logging
var numberPosts = client.get("global:nextPostId", function (error, response) {
if (error) {
console.log("async: " + error);
} else {
console.log("programming: " + response);
}
});
console.log("is lotsa fun: " + numberPosts);
As Shadow Wizard has pointed out you are trying to use numberPosts before there is something in it, as client.get() hasn't returned anything.
Read this to get a handle on node.js flow:
http://www.scribd.com/doc/40366684/Nodejs-Controlling-Flow
I was facing the the same issue when I applied the MVC framework.
To solve the problem, I employed the render function.
In the posts Model
exports.get = function(id,render) {
client.incr('post:id:'+id, function(err, reply) {
render(reply);
});
};
In the posts Controller
exports.get = function(req, res) {
posts.get('001', function (data){res.render('index',{post:data});});
};