Im trying to make a webscraper(educational puposes), and I got really far, but this little issue is bugging me.
I made a request callback function, and im trying to get lines 75-78 to work. However to get this to work, I need PDF_LISTS and PDF_LINKS to initilaze to the right values.
I've already tried to make them global variables, and what not, for some reason that doesnt work. So my question is: How do I make a callback function that will call that for loop (75-78) and succesfully initilaze PDF_LISTS and PDF_LINKS to the correct values ?
(Dont worry I use this on educational content, with the prof's permission). First time posting here!
// URL_LINKS has the pdf links of the pages
PDF_LINKS = [];
// URL_LIST has the names of the pdf links
PDF_LIST = [];
function fillPDF(callback) {
request(url, function(err, res, body) {
$ = cheerio.load(body);
links = $('a'); //jquery get all hyperlinks
$(links).each(function(i, link) {
var value = $(link).attr('href');
// creates objects to hold the file
if (value.substring(value.length - 3, value.length) == "pdf") {
PDF_LINKS[i] = $(link).attr('href');
PDF_LIST[i] = $(link).text();
}
})
});
}
// must decleare fillPDF variable or else you wont initilze teh variables
fillPDF() {
//HERE I WANT PDF_LINKS and PDF_LIST to be intialized to 33.....
}
for (j = 0; j < PDF_LIST.length; j++) {
request(PDF_LINKS[j]).pipe(fs.createWriteStream(PDF_LIST[j]));
}
You may push your values into arrays using array's push method, avoiding array's element to be undefined.
You can put your final for loop into a function, and then use fillPDF();
You also need to call fillPDF's callback once the request is over.
PDF_LINKS = [];
PDF_LIST = [];
function fillPDF(callback) {
request(url, function(err, res, body) {
$ = cheerio.load(body);
links = $('a');
$(links).each(function(i, link) {
var value = $(link).attr('href');
if (value.slice(-3) == "pdf") {
PDF_LINKS.push(value);
PDF_LIST.push($(link).text());
}
})
callback();
});
}
function writePDF() {
for (j = 0; j < PDF_LIST.length; j++) {
request(PDF_LINKS[j]).pipe(fs.createWriteStream(PDF_LIST[j]));
}
}
fillPDF(writePDF);
Related
I have two functions that I am trying to run when I load the page. dataRetrieve() gets the data from a firebase collection. populate() is supposed to populate a boxlist with the entries retrieved from dataRetrieve(). The main problem is that it lists the array as empty when I run populate() after dataRetrieve() no matter what I try. The last thing I tried was this:
async function dataRetrieve(){
const getAdmins = firebase.functions().httpsCallable('getAdmins');
// Passing params to data object in Cloud functinon
getAdmins({}).then((results) => {
admins = results;
console.log("admins retrieved");
console.log(admins);
}).then(() => {
populate();
});
}
async function populate(){
let list = document.getElementById("user-list");
//loop through users in out Users object and add them to the list
for (var i = 0; i < admins.length; i++) {
let newItem = document.createElement('option');
newItem.innerHTML = admins[i].first + " " +admins[i].last;
newItem.id = admins[i].uid;
if (i == 0) {
newItem.className = "active";
}
console.log(newItem.innerHTML + " " + newItem.id)
list.appendChild(newItem);
}
updateResponse(list.firstChild);
list.size = admins.length;
console.log(document.getElementById("user-list").size)
//collect all the list items
let listItems = list.querySelectorAll('option');
//loop through the list itmes and add a click listener to each that toggles the 'active' state
for (var i = 0; i < listItems.length; i ++) {
listItems[i].addEventListener('click', function(e) {
if (!e.target.classList.contains('active')) {
for (var i = 0; i < listItems.length; i ++) {
listItems[i].classList.remove('active');
}
e.target.classList.add('active');
updateResponse(e.target);
}
})
}
}
also, admins is a global variable listed at the start of the script:
var admins = [];
I am trying to run all this onload so I can instantly generate the list
I thought that .next would cause it to wait to get the values before running, but even making results a parameter and transferring it directly into the function that way gives an undefined array. I don't understand why the function insists on calling on old data. Pls help.
I'm not sure what updateResponse function does. If it's not returning a promise then I'd make the populate function synchronous first. Also do you really need to use admins array somewhere else apart from populate function that it is a global variable? If not then I'd just pass it as a parameter.
async function dataRetrieve() {
const getAdmins = firebase.functions().httpsCallable('getAdmins');
// Passing params to data object in Cloud function
const results = await getAdmins({})
console.log("admins retrieved");
console.log(results);
// Passing results in populate function
populate(results.data)
// If your function returns an array, pass the array itself
}
function populate(admins) {
let list = document.getElementById("user-list");
//loop through users in out Users object and add them to the list
// Using a for-of loop instead so no need to worry about checking the length here
for (const admin of admins) {
let newItem = document.createElement('option');
newItem.innerHTML = admin.first + " " + admin.last;
newItem.id = admin.uid;
//if (i == 0) {
// newItem.className = "active";
//}
console.log(newItem.innerHTML + " " + newItem.id)
list.appendChild(newItem);
}
updateResponse(list.firstChild);
// rest of the logic
}
I guess you know how to check when the page loads. call the retrieve function when the page is loaded. Then you should call the populate function at the end of the retrieve function. this makes sure that the populate function is called after you get all the data
I have an CSV parsing function in JavaScript which gets data (movie names) from CSV and gets data using Ajax call in loop.
movies = new Array();
for (var i = 1; i < allData.length; i++) {
var mName = allData[i][0];
var mPath = allData[i][1];
// console.log(decodeURIComponent(mName));
$.get(apiCall, function showData(data) {
if (data) {
mData = data.results;
if (mData.length > 1) {
var urlData = new URLSearchParams(this.url);
var movie_name = urlData.get('query');
movies.push(movie_name);
}
}
})
}
If data got more then one record for any movie it will save it as a conflict in array.
Problem is, I can access movies array inside inner if (but it is in iteration so I can't use that) and at loop end it is not accessible. How can I access that?
You should not make api calls inside a for loop. Instead do this,
movies = new Array();
function makeApiCallForEntireArray(index, arr, cb){
if(arr.length == index){
cb(true);
return;
}
$.get(apiCall, function showData(data) {
if (data) {
mData = data.results;
if (mData.length > 1) {
var urlData = new URLSearchParams(this.url);
var movie_name = urlData.get('query');
movies.push(movie_name);
}
}
makeApiCallForEntireArray(index+1, arr, cb);
})
}
makeApiCallForEntireArray(0, allData, function(){
//api calls finished
//movie accesssible here with all the data
});
You will not be able to access the content added in movies array at the end of the loop because ajax requests are still in progress. You need to do this some other way so that you can be sure that its end of asynch ajax calls.
Im going to use the answer of #Jaromanda X in my question here Can't get the summation in for loop
Promise.all(allData.map(function(d) {
return $.get(apiCall, function showData(data){
return data.results;
});
})).then(function(res) {
//push your movies here...the result of your apiCall is inside the res variable
});
First off I thought I'd get this problem solved after this great thread: nodeJs callbacks simple example
However, I am still unsure of how to proceed. Like the title hints at: I need a callback given to a callback who already has node arguments being passed to it
Code:
(function()
var reqs = {
http: require('http'),
path: require('path'),
fs: require('fs')
};
reqs.http.createServer(function (request, response) {
response.writeHead(200, {
'Content-Type': 'text/plain'
});
response.end('Hello HTTP!');
}).listen(8080);
var printCount = function(count) {
console.log(count);
};
var callCount = function(err, list, callback) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
};
//count images from executing directory
var countImages = function(dirName) {
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
};
countImages(__dirname);
})();
I think the key line here is
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
I'm passing the printCount function to the same function that is called back after fs.readdir asynchronously executes but it seems that me passing null to its first two arguments is overriding Node functionality that passes the callback err and list automatically. How can I get around this? I simply want to count the images in the executing directory and be able to store that value in my main function.
Pretty new to event style programming. Any extra reading suggestions are welcome. There is tons of content out there but I really want to get this up and running for a meeting this weekend. Thanks guys!
you can't quite do what you are doing, you are doing callCount(null, null, printCount) which executes the function. But you need to pass a function as a callback. What you want is something like the following, which captures the call back you want and returns a function you can pass as a callback to your api call
var callCount = function(callback) {
return function(err, list) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
}
}
and then
reqs.fs.readdir(dirName, callCount(printCount));
In my Express route, I am trying to return a list of elements that I am grabbing from MongoDB using Mongoose. I'm basically iterating through an array of items, and making MongoDB calls to get the parameter objects that each item has. However, I'm having trouble making sure that I get all the parameters before I send the response. I've tried using promises, other async library functions, etc, but none of them have seemed to work.
The current iteration of the code looks like this (I have tried a lot of different things):
exports.findAll = function(req, res){
Flow.find({}, function(err, items) {
console.log(items);
var payload = {}
var params = [];
for (var i=0; i < items.length; i++) {
var count2 = 0;
async.whilst(
function() {
return ((items[i]) && (count2 < items[i].params.length));
},
function(callback) {
Parameter.findById(items[i].params[count2], function(err, out) {
params.push(out);
count2++;
callback();
});
},
function(err) {
console.log(params);
var payload = {
"flows": items,
"params": params
};
res.send(payload);
console.log('success: flows found');
}
);
}
This code sends a payload with params not being completely full.
What would be a good way to deal with this? Honestly I just want these database calls to be synchronous, but I just can't figure out how to make this work.
This doesn't really seem necessary as you can actually use the $in operator with all the results from your first query:
Flow.find({},function(err,items) {
var ids = [];
// blocking? yes, but should be minor - do better if there are problems
for ( var i=0; i < items.length; i++ ) {
for ( var n=0; n < items[i].params.length; n++ ) {
ids.push( items[i].params[n] );
}
}
Parameter.find({ "_id": { "$in": ids } },function(err,params) {
res.send({ "flows": items, "params": params });
});
});
So there should be no reason to execute multiple queries inside an async loop, or loops as your code seems to be missing as the direct cause of the problem there.
I have an array of Facebook users (userList) and I want to store the number of mutual friends for each user in the array as a property (mfCount). I have checked that I am getting the correct number of mutual friends if I put in an individual user, but I'm not sure why I can't add this value to each user in the array?
function getfriends() {
FB.api('/me/friends', function(response) {
userList = userList.concat(response.data);
userCount = response.data.length;
for( i=0; i<response.data.length; i++) {
userId = response.data[i].id;
FB.api('/me/mutualfriends/'+userId+'/', function(response) {
userList[i].mfCount = response.data.length;
userCount--;
if(userCount === 0) { display_results();}
});
}
});
}
Have a look at the implementation below.
I've broken it out into multiple functions to separate each step.
When you're dealing with loops and callbacks, it becomes very important to keep track of what scope your anonymous functions are being defined in.
You can theoretically do it all in a one-liner like you were writing...
...but it gets very, very confusing as you go further and further into nested-callbacks.
One solution would be to make every variable inside each function 100% global, so that only i needs to have an enclosed reference. That's not really pretty, though.
Look through each function and take note of what parameters are going into the functions each step calls (or closures for callbacks). They're all needed (whether you separate them this way, or through closures in a one-liner or whatever).
The following worked just fine for me, inside of the Facebook developer sandbox (first time using the API).
The logs were for my benefit to see how the data was coming out, and to keep a basic stack-trace.
var userList = [],
userCount = 0;
function getfriends () {
//console.log("getFriends");
var url = "/me/friends";
FB.api(url, function (response) {
if (response.error && response.error.message) { return false; }
userList = userList.concat(response.data);
userCount = response.data.length;
compareAllFriends();
});
}
function compareAllFriends () {
//console.log("compareAllFriends");
var i = 0, l = userCount, userID;
for (; i < l; i += 1) {
userID = userList[i].id;
compareFriendsWith (i, userID);
}
}
function compareFriendsWith (i, id) {
//console.log("compareFriendsWith", i, id);
var path = "/me/mutualfriends/",
url = path + id + "/";
FB.api(url, (function (i) {
return function (response) {
//console.log(i, response);
var numFriends = (response.data) ? response.data.length : 0;
setMutualFriends(i, numFriends);
userCount -= 1;
//console.log(userCount);
if (userCount === 0) {
display_results();
//console.log("DISPLAYING");
}
};
}(i)));
}
function setMutualFriends (i, friendcount) { userList[i].mfCount = friendcount; }