OK, so I have a situation where I cannot just fire thousands of requests to an API server.
I have a Node process (no UI) that I need to have process each API response/update sequentially, waiting for completion before sending the next request.
I may be making this more complicated than I think - not sure. I can only figure out how to do this with recursive calls, but this results in a stack overflow as there can be thousands of records. The general process is this:
get rows from SQL table with ID's (result)
formulate and send of an API call to retrieve ID's info
if returned data has image data, write it back to SQL table
wait on this process so not to bombard API server with thousands of requests all at once
repeat until last ID is processed (can be thousands, more than stack space)
Here's sample code (not actual so ignore syntax errors if any)...
UPDATED: actual running code with sensitive items removed
var g_con = null; //...yeah I know, globals are bad
//
// [ found updating ]
//
function getSetImage(result, row, found) {
if(row >= result.length) { //...exit on no row or last row processed
con.end();
return;
}
item = result[row]; //...next SQL row
if((item !== undefined) && (item.autoid !== undefined)) {
//...assemble API and send request
//
let url = 'https://...API header...'
+ item.autoid
+ '...API params...';
request(url, (error, response, body) => {
if(response.statusCode !== 200)
throw('Server is not responding\n' + response.statusMessage);
let imageData = JSON.parse(body);
if((imageData.value[0] !== undefined) &&
(imageData.value[0].DETAIL !== undefined) &&
(imageData.value[0].DETAIL.Value.length) ) {
//...post back to SQL
//
found++;
console.log('\n' + item.autoid + '/['+ item.descr + '], ' + 'Found:' + found);
qry = 'update inventory set image = "'+imageData.value[0].DETAIL.Value+'" where autoid = "'+item.autoid+'";';
g_con.query(qry, (err) => {
if (err) {
console.log('ERROR:',err.message, '\nSQL:['+err.sql+']\n');
throw err.message;
}
});
row++;
setTimeout(()=>{getSetImage(result, row, found)}, 0); //...nested call after SQL
} else {
row++;
process.stdout.write('.'); //...show '.' for record, but no image
setTimeout(()=>{getSetImage(result, row, found)}, 0); //...nested call after SQL
}
}); //...request callback
}
// } else {
// throw '\nERROR! result['+row+'] undefined? Images found: '+found;
// }
}
//
// [ main lines ]
//
(() => {
let params = null;
try {
params = JSON.parse(fs.readFileSync('./config.json'));
//...load autoids array from SQL inventory table - saving autoids
// autoids in INVENTRY join on par_aid's in INVENTRYIMAGES
//
g_con = mysql.createConnection(params.SQLConnection);
g_con.connect((err) => { if(err) {
console.log('ERROR:',err.message);
throw err.message;
}
});
//...do requested query and return data or an error
//
let qry = 'select autoid, descr from inventory order by autoid;';
g_con.query(qry, (err, results, flds) => {
if (err || flds === undefined) {
console.log('ERROR:',err.message, '\nSQL:['+err.sql+']\n');
throw err.message;
}
console.log('Results length:',results.length);
let row = 0;
let found = 0;
getSetImage(results, row, found);
});
}
catch (err) {
console.log('Error parsing config parameters!');
console.log(err);
}
})();
So here's the answer using Promises (except for MySQL):
//
// [ found updating ]
//
async function getSetImage(data) {
for(let item of data) {
if(item && item.autoid) {
//...assemble API and send request
//
let url = g_URLHeader + g_URLPartA + item.autoid + g_URLPartB;
let image = await got(url).json().catch(err => {
console.log(err);
err.message = 'API server is not responding';
throw err;
});
if(image && image.value[0] && image.value[0].DETAIL &&
image.value[0].DETAIL.Value.length ) {
console.log('\nFound: ['+item.autoid+' - '+item.descr
+ '] a total of ' + g_found + ' in ' + g_count + ' rows');
g_found++;
//...post back to SQL
//
let qry = 'update inventory set image = "'
+ image.value[0].DETAIL.Value
+ '" where autoid = "'
+ item.autoid+'";';
await g_con.query(qry, (err) => {
if (err) {
console.log('ERROR:',err.message, '\nSQL:['+err.sql+']\n');
throw err.message;
}
});
} else {
process.stdout.write('.'); //...show '.' for record, but no image
} //...if/else image.value
g_count++;
} //...if item
} //...for()
}
As I've said in all my comments, this would be a ton simpler using promises and async/await. To do that, you need to switch all your asynchronous operations over to equivalents that use promises.
Here's a general outline based on the original pseudo-code you posted:
// use got() for promise version of request
const got = require('got');
// use require("mysql2/promise" for promise version of mysql
async function getSetImage(data) {
for (let item of data) {
if (item && item.id) {
let url = uriHeader + uriPartA + item.id + uriPartB;
let image = await got(url).json().catch(err => {
// log and modify error, then rethrow
console.log(err);
err.msg = 'API Server is not responding\n';
throw err;
});
if (image.value && image.value.length) {
console.log('\nFound image for ' + item.id + '\n');
let qry = 'update inventory set image = "' + image.value + '" where id = "' + item.id + '";';
await con.query(qry).catch(err => {
console.log('ERROR:', err.message, '\nSQL:[' + err.sql + ']\n');
throw err;
});
}
} else {
// no image data found
process.stdout.write('.'); //...show '.' for record, but no image
}
}
}
//...sql query is done, returning "result" - data rows
getSetImage(result).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
Some notes about this code:
The request() library is no longer getting new features and is in maintenance mode and you need to change to a different library to get built-in promise support. You could use request-promise (also in maintenance mode), but I recommend one of the newer libraries such as got() that is more actively being developed. It has some nice features (automatically checks status for you to be 2xx, built-in JSON parsing, etc...) which I've used above to save code.
mysql2/promise has built-in promise support which you get with const mysql = require('mysql2/promise');. I'd recommend you switch to it.
Because of the user of async/await here, you can just loop through your data in a regular for loop. And, no recursion required. And, no stack build-up.
The way promises work by default, any rejected promises will automatically terminate the flow here. The only reason I'm using .catch() in a couple places is just for custom logging and tweaking of the error object. I then rethrow which propagates the error back to the caller for you.
You can tweak the error handling to your desire. The usual convention with promises is to throw an Error object (not a string) and that's often what callers are expecting to see if the promise rejects.
This code can be easily customized to log errors and continue on to subsequent items in the array. Your original code did not appear to do that so I wrote it to abort if it got an error.
Related
Here's the complete code I'm trying to run. I will, however, censor the url of the website for personal reasons. I am trying to scrape titles from a very slow website which occasionally sends error status codes in the 4xx range because that, so to handle that I throw an error then retry fetching the same pages after a couple of seconds. The problem is that this error is never caught by the "catch" block. Any idea what I am doing wrong?
const URL = "https://webpage.com/page=";
const SELECTOR = ".post-title.entry-title>a";
const MAX_CONCURRENT_FETCH = 5;
const NB_OF_PAGES = 125;
const ERROR_WAIT_TIME = 20000;
const titles = [];
const parser = new DOMParser();
function fetchPages() {
helper(1, MAX_CONCURRENT_FETCH);
function helper(first, last) {
const requests = [];
console.log("\n" + "*".repeat(40));
for (let i = first; i <= last; i++) {
requests.push(fetch(URL + i));
console.log(`Fetching page: ${i}`);
}
console.log("*".repeat(40) + "\n");
try {
Promise.all(requests).then(responses => {
responses.forEach(async(response, i) => {
/* if no code errors, parse the page, extract the titles then add them to the "titles" array */
if (response.ok) {
const htmlPage = await response.text();
console.log("\n" + "*".repeat(40));
console.log(`Extracting titles from page: ${first+i}`);
console.log("*".repeat(40) + "\n");
const htmlObject = parser.parseFromString(htmlPage, "text/html");
htmlObject.querySelectorAll(SELECTOR).forEach(node => {
titles.push(node.textContent);
console.log(`Title: ${node.textContent}`);
});
}
/* code error, get out of the forEach method by throwing an error */
else {
console.log("*".repeat(40) + "\n");
console.log("Throwing error...");
throw {
response: response
}
}
});
/* keep fetching until the last page */
if (last + 1 < NB_OF_PAGES)
helper(last + 1, last + MAX_CONCURRENT_FETCH);
/* once all pages have been fetched, show the result on screen */
else showResult();
});
}
/* catch the error that was throw inside the forEach method, show the status code error and how long before the pages will be fetched again */
catch (err) {
console.log("Error captured...");
console.log(`Status Code: ${err.response.status}, retrying in ${ERROR_WAIT_TIME/1000} seconds.`);
console.log("*".repeat(40) + "\n");
setTimeout(() => helper(first, last), ERROR_WAIT_TIME);
}
}
}
function showResult() {
const uniqueTitles = [...new Set(titles)].sort();
const titlesUl = document.createElement("ul");
uniqueTitles.forEach(title => {
const titleLi = document.createElement("li");
titleLi.textContent = title;
titlesUl.appendChild(titleLi);
});
document.body.innerHTML = "";
document.body.appendChild(titlesUl);
}
fetchPages();
The try...catch statement contains Promises.all(). This is a non-blocking asynchronous function. The try...catch only applies to initializing the Promises.all() function, it does not catch resolved Promises.
Also note that you are sending all fetch requests in parallel, this may overload the server somewhat. It would be more polite to fire requests one by one using async await.
I'm new to learning Node.js, so I'm still getting used to asynchronous programming and callbacks. I'm trying to insert a record into a MS SQL Server database and return the new row's ID to my view.
The mssql query is working correctly when printed to console.log. My problem is not knowing how to properly return the data.
Here is my mssql query - in addJob.js:
var config = require('../../db/config');
async function addJob(title) {
var sql = require('mssql');
const pool = new sql.ConnectionPool(config);
var conn = pool;
let sqlResult = '';
let jobID = '';
conn.connect().then(function () {
var req = new sql.Request(conn);
req.query(`INSERT INTO Jobs (Title, ActiveJD) VALUES ('${title}', 0) ; SELECT ##IDENTITY AS JobID`).then(function (result) {
jobID = result['recordset'][0]['JobID'];
conn.close();
//This prints the correct value
console.log('jobID: ' + jobID);
}).catch(function (err) {
console.log('Unable to add job: ' + err);
conn.close();
});
}).catch(function (err) {
console.log('Unable to connect to SQL: ' + err);
});
// This prints a blank
console.log('jobID second test: ' + jobID)
return jobID;
}
module.exports = addJob;
This is my front end where a modal box is taking in a string and passing it to the above query. I want it to then receive the query's returned value and redirect to another page.
// ADD NEW JOB
$("#navButton_new").on(ace.click_event, function() {
bootbox.prompt("New Job Title", function(result) {
if (result != null) {
var job = {};
job.title = result;
$.ajax({
type: 'POST',
data: JSON.stringify(job),
contentType: 'application/json',
url: 'jds/addJob',
success: function(data) {
// this just prints that data is an object. Is that because I'm returning a promise? How would I unpack that here?
console.log('in success:' + data);
// I want to use the returned value here for a page redirect
//window.location.href = "jds/edit/?jobID=" + data;
return false;
},
error: function(err){
console.log('Unable to add job: ' + err);
}
});
} else {
}
});
});
And finally here is the express router code calling the function:
const express = require('express');
//....
const app = express();
//....
app.post('/jds/addJob', function(req, res){
let dataJSON = JSON.stringify(req.body)
let parsedData = JSON.parse(dataJSON);
const addJob = require("../models/jds/addJob");
let statusResult = addJob(parsedData.title);
statusResult.then(result => {
res.send(req.body);
});
});
I've been reading up on promises and trying to figure out what needs to change here, but I'm having no luck. Can anyone provide any tips?
You need to actually return a value from your function for things to work. Due to having nested Promises you need a couple returns here. One of the core features of promises is if you return a Promise it participates in the calling Promise chain.
So change the following lines
jobID = result['recordset'][0]['JobID'];
to
return result['recordset'][0]['JobID']
and
req.query(`INSERT INTO Jobs (Title, ActiveJD) VALUES ('${title}', 0) ; SELECT ##IDENTITY AS JobID`).then(function (result) {
to
return req.query(`INSERT INTO Jobs (Title, ActiveJD) VALUES ('${title}', 0) ; SELECT ##IDENTITY AS JobID`).then(function (result) {
and
conn.connect().then(function () {
to
return conn.connect().then(function () {
You may need to move code around that is now after the return. You would also be well served moving conn.close() into a single .finally on the end of the connect chain.
I recommend writing a test that you can use to play around with things until you get it right.
const jobId = await addJob(...)
console.log(jobId)
Alternatively rewrite the code to use await instead of .then() calls.
I am using Web3 to get a list of smart contract and then iterate (loop) through each of them to get multiple variables of the smart contracts. Unfortunately, I am not able to execute a function once all the async calls within my loop are done.
Logic:
Get the number of Games
For i = 0 until i < Games
Get the smart contract address (from Smart Contract)
Get the Start Time value (from Smart Contract)
Get the End Time value (from Smart Contract)
(Once all calls of the loop are done)
Order the Games by Start Time
Display the Games
When I do console.log(contractInstanceGame) (step 3) after my loop, the array is empty as the previous calls are not completed.
Code:
var contractInstanceGame = [];
contractAddressRegistry = '0xc0b55bff524b953a5248ccb5a60b00647052ae8b';
// Fetch all the contract addresses
let contractRegistry = web3.eth.contract(contractAbiRegistry);
let contractInstanceRegistry = contractRegistry.at(contractAddressRegistry);
contractInstanceRegistry.numberOfGames(function(err, res) {
if (!err) {
let numberOfGames = res.toNumber();
for (i = 0; i < numberOfGames; i++) {
let contractGame = web3.eth.contract(contractAbiGame);
contractInstanceRegistry.games(i, function(err, res) {
if (!err) {
// Create the object
contractInstanceGame[i] = [];
contractInstanceGame[i]['Caller'] = contractGame.at(res);
contractInstanceGame[i]['Address'] = res;
// Get the Start Time
contractInstanceGame[i]['Caller'].startTime(function(err, res) {
if (!err) {
contractInstanceGame[i]['StartTime'] = res.toNumber();
} else {
console.error("Could not get the Game start time: " + err);
}
});
// Get the End Time
contractInstanceGame[i]['Caller'].endTime(function(err, res) {
if (!err) {
contractInstanceGame[i]['EndTime'] = res.toNumber();
} else {
console.error("Could not get the Game end time: " + err);
}
});
} else {
console.error("Could not get the Game contract address: " + err);
}
});
}
console.log(contractInstanceGame);
// Perform the Order of contractInstanceGame by Start Time`
// Display contractInstanceGame
} else {
console.error("Could not get the number of Games: " + err);
}
EDIT:
Examples of the solutions I tried:
Using then() on the call itself does not work as I am facing the following error:
inpage.js:14 Uncaught Error: The MetaMask Web3 object does not support synchronous methods like eth_call without a callback parameter. See https://github.com/MetaMask/faq/blob/master/DEVELOPERS.md#dizzy-all-async---think-of-metamask-as-a-light-client for details.
contractInstanceRegistry.numberOfGames()
.then(function(x){
console.log(x);
});
I also tried to Promisifed and use await, but I am facing the error: Uncaught SyntaxError: await is only valid in async function
let numberOfGames = promisify(cb => contractInstanceRegistry.numberOfGames(cb));
let numberOfGamesX = await numberOfGames;
Im currently learning Node JS and Javascript. Im trying to develop an application to read and download Mangas.
First i want to build up a Database. Here is where i encounter the problem.
When i run my program on my server which has 4GB of RAM (to fill my DB) i get the Fatal Error Javascript heap out of memory.
When i run the same program on my local computer with 8GB of RAM, everything works as its supposed to.
Here is the code where i fill up my DB with Manga Chapters.
function insertChapters(callback){
sql_selectAll("Mangas", function (selectError, selectResult) {
if(!selectError){
selectResult.forEach(function (mangaItem, mangaIndex) {
gin.mangafox.chapters(mangaItem.Title)
.then(chapters =>{
chapters.forEach(function (chapterItem) {
var Chapter = {
Title: chapterItem.name,
NR: chapterItem.chap_number,
URL: chapterItem.src,
MangaID: mangaItem.MangaID,
MangaName: mangaItem.Title,
VolumeNR: chapterItem.volume
};
sql_insertInto("Chapters", Chapter, function (insertError, insertResult) {
if(!insertError){
var insertedChapter =
"------------------------------------------------------------------------\n" +
" Added new Chapter: " + Chapter.NR + " For: " + mangaItem.Title + "\n" +
"------------------------------------------------------------------------\n";
callback(null,insertedChapter ,insertResult);
}
else{
if(insertError.code === "ER_DUP_ENTRY") {
var dupEntry = "------------------------------------------------------------------------\n" +
" Duplicate Entry: Chapter: " + Chapter.NR + " For: " + mangaItem.Title + "\n" +
"------------------------------------------------------------------------\n"
callback(null, dupEntry, null);
}
else{
callback(insertError, null, null);
}
}
})
})
})
.catch(fetchChapterError => {
callback(fetchChapterError, null, null);
})
})
}
else{
callback(selectError, null, null);
}
});
}`
I dont really know how to solve this problem, because im not sure what the problem is:
Is the problem simply that i dont have enough RAM in my server?
Do i have a problem with my code? Am i leaking memory somewhere?
Is it possible that my code needs that much memory?
Thank you so much in advance, i appreciate every help i can get.
EDIT:
function sql_selectAll(tableName, callback){
var sql = 'SELECT * FROM `' + tableName + '`';
connection.query(sql, function (err, selectAllResult) {
callback(err, selectAllResult);
})
}
function sql_insertInto(tableName, insertionObject, callback) {
var sql = 'insert into ' + tableName + ' set ?';
connection.query(sql, insertionObject, function (err, insertResult) {
callback(err, insertResult);
});
}
You are calling the mangafox endpoint for every SQL result simultaneously, rather than one at a time or in chunks. You can try using async/await for this. I'm not familiar with the sql API you are using, but assuming that the methods sql_selectAll and sql_insertInto return promises if they aren't given a callback, you can rewrite your function to something like this:
async function insertChapters(callback) {
try {
const mangas = await sql_selectAll("Mangas");
for (const mangaItem of mangas) {
const chapters = await gin.mangafox.chapters(mangaItem.Title);
for (const chapterItem of chapters) {
const Chapter = {
Title: chapterItem.name,
NR: chapterItem.chap_number,
URL: chapterItem.src,
MangaID: mangaItem.MangaID,
MangaName: mangaItem.Title,
VolumeNR: chapterItem.volume
};
try {
const insertResult = await sql_insertInto("Chapters", Chapter);
const insertedChapter =
"------------------------------------------------------------------------\n" +
" Added new Chapter: " + Chapter.NR + " For: " + mangaItem.Title + "\n" +
"------------------------------------------------------------------------\n";
callback(null, insertedChapter, insertResult);
} catch (error) {
if (error.code === "ER_DUP_ENTRY") {
const dupEntry = "------------------------------------------------------------------------\n" +
" Duplicate Entry: Chapter: " + Chapter.NR + " For: " + mangaItem.Title + "\n" +
"------------------------------------------------------------------------\n";
callback(null, dupEntry, null);
} else {
throw error;
}
}
}
}
} catch (error) {
callback(error, null, null);
}
}
Notice the await keywords - these are allowed in async functions and essentially tell the JS engine to pause execution of the async function and do something else until the awaited promise resolves.
Also notice that you can use regular old fashioned try/catch blocks when handling promises using the await keyword! I didn't do anything special to handle the fetchChapterError because it will be handled by the outermost catch block! Also, for the insertion errors, if it's not a duplicate entry, we can just rethrow the error and let that get caught by the outermost catch block as well.
If your SQL functions do not return promises, then on Node version 8 (latest) you can use util.promisify:
const util = require('util');
sql_selectAll = util.promisify(sql_selectAll);
If you are not using Node 8, then you can use another promisify implementation (for example, check out bluebird), or you can write your own pretty easily (read the MDN article on promises).
I am trying to build a result_arr of location objects to send as a response, but I am not sure how to send the response only when the entire array has been built. The response contains an empty array, but result_arr array is filled after the response has already been sent.
function handle_getLocations(req, res, done){
var con_id = req.body["contractor_id"];
console.log("Contractor ID :" + con_id.toString());
var result_arr = new Array();
employee.getActiveByContractor(con_id, function(err, employees){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
if(employees.length === 0) done(null);
for(var i=0;i<employees.length;i++){
assignment.getLocationsByEmployeeID(employees[i].employee_id, function(err, locations){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
console.log("Number of locations: " + locations.length.toString());
for(var j=0;j<locations.length;j++){
console.log("Assignment is: " + locations[j].assignment_id.toString());
location.getAllByID(locations[j].location_id, function(err, loc){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
var loc_obj = {};
loc_obj.display_name = loc[0].display_name;
loc_obj.location_id = loc[0].location_id;
console.log("Location is: " + loc_obj.display_name);
console.log("Location ID is: " + loc_obj.location_id.toString());
result_arr.push(loc_obj);
console.log(result_arr);
done(result_arr);
});
};
});
};
});
};
I know that in nodejs the idea is to not make blocking calls, but I am not sure how to make sure all of the information is sent in the response.
You are calling many asynchronous functions in the loop and do not have any logic to check when all they are completed to send the response back to the client.
I modified your code a bit to add the logic in VannilaJS way which is very messy below but working code.
Anyways I would suggest you to use promise based/asynchronous modules
like async, bluebird etc to handle this nicely. Using them, you
can improve readability and easy maintainability in your code to get
rid of callback hells and other disadvantages.
async http://caolan.github.io/async/
bluebird https://github.com/petkaantonov/bluebird
You can read more about this on the below link,
https://strongloop.com/strongblog/node-js-callback-hell-promises-generators/
function handle_getLocations(req, res, done){
var con_id = req.body["contractor_id"];
console.log("Contractor ID :" + con_id.toString());
var result_arr = new Array();
employee.getActiveByContractor(con_id, function(err, employees){
if (err) {
console.log("Logging error in json:\n");
res.json({"code" : 100, "status" : "Error in connection database"});
return;
};
if(employees.length === 0) done(null);
var employeesChecked = 0;
var errors = [];
function sendResponse(){
if(employeesChecked === employees.length) {
res.json(result_arr);
//done(result_arr); // If required, uncomment this line and comment the above line
}
}
for(var i=0;i<employees.length;i++){
assignment.getLocationsByEmployeeID(employees[i].employee_id, function(err, locations){
var locationsChecked = 0;
if (err) {
console.log(err);
errors.push(err);
++employeesChecked;
sendResponse();
} else {
console.log("Number of locations: " + locations.length.toString());
for(var j=0;j<locations.length;j++){
console.log("Assignment is: " + locations[j].assignment_id.toString());
location.getAllByID(locations[j].location_id, function(err, loc){
++locationsChecked;
if (err) {
console.log(err);
errors.push(err);
} else {
var loc_obj = {};
loc_obj.display_name = loc[0].display_name;
loc_obj.location_id = loc[0].location_id;
console.log("Location is: " + loc_obj.display_name);
console.log("Location ID is: " + loc_obj.location_id.toString());
result_arr.push(loc_obj);
console.log(result_arr);
}
if(locationsChecked === locations.length) {
++employeesChecked;
}
sendResponse();
});
}
}
});
}
});
}
In order not to consume much time during the request-response life time, you need to separate each logic in a single endpoint, but sometimes as your case, you may need to hit the database more than a time to fetch data that depends on another, so assuming that employee.getActiveByContractor returning promise and as it's an async method so you need to to chain it with .then like this:
employee.getActiveByContractor(con_id)
.then(function(employees) {
Also, you my need to read about Promise.
As Basim says, this is a good time to use Promises.
getLocationsByEmployeeID and getAllByID are async so they won't be done by the time the loop is finished and you send your response.
Promises are built into the latest Node.js version.
Learn here: https://www.udacity.com/course/javascript-promises--ud898
Suggestion:
Create promise wrappers for getLocationsByEmployeeID and getAllByID
Use Promise.all to make sure every getLocationsByEmployeeID and getAllByID are complete
return your http response within Promise.all's "success" callback