NodeJS x-ray web-scraper:multiple urls in a loop callback - javascript

im using x-ray, which is great but lack of tutorial. anyway, I use an array of urls named urls. In the loop ,each url fetched and return result through callback. in the callback function i need to know what was the url which was parsed. How can i know which url is returning if the callback returns only err and results? (is it really an x-ray question or js)
xrayRamiLevy = function(){
var index = 0;
for (index; index < urls.length; index++){
x(urls[index].url, '.product_item',
[{
title : '.prodDescDiv h3',
description : '.prodBrand',
imageUrl : '.image_icons_zone .image img#src',
onclick : '.image_icons_zone .image a #onclick',
}]
)
(function(err, results){
for (var i = 0; i < results.length; i++){
var s = results[i].onclick.substr(0, results[i].onclick.lastIndexOf("'"));
s = s.slice(s.lastIndexOf("'") + 1);
results[i].catalogueNumber = s;
delete results[i].onclick;
if (results[i].description !== undefined && results[i].description.length > 0)
s = results[i].description.replace(/\s+/g, ' ').trim();
results[i].description = s;
if (urls[index].category !== undefined && urls[index].category.length > 0)
results[i].categoriesIds = urls[index].category;
if (urls[index].subcategory !== undefined && urls[index].subcategory.length > 0)
results[i].subcategoriesIds = urls[index].subcategory;
}
fs.writeFile("./results.json", JSON.stringify(results, null, '\t'));
});
}
}

See example of getting the url using JS closure. Note how the URL from the array is available in the fn callback.
var Xray = require('x-ray');
var util = require('util');
var x = Xray();
var sitesToHandle = ['https://dribbble.com?x=1', 'https://dribbble.com?x=2'];
sitesToHandle.forEach((urlToHandle) => {
x(urlToHandle, 'li.group', [{
title: '.dribbble-img strong',
image: '.dribbble-img [data-src]#data-src',
}]).(function (err, results) {
console.log(`let's now handle the result of ${urlToHandle}, the results are ${util.inspect(results)}`);
});
});
p.s. side note, when handling the returned errors, you might take a look at an error handling guide that I just wrote here

I didn't fully get your example, HOWEVER:
urls.forEach(function(url, index){
//whatever you need to do to prep your call to x
var callback = x(url, '.product_item', ...);
callback(wrappedCallback(url));
}
function wrappedCallback(url) {
return function(err, results){
// url is defined here
}
};

Related

MongoDB Scheduled Trigger Save Error -- What to Return?

This problem is very annoying. So, I am making a scheduled trigger run every 24 hours. It simply gets items from one collection does some data processing then appends information to another collection. The functioning code works even when the function runs. But it will not let me save because there are "runtime" errors? Even though it was executed perfectly and returned.
Console Error
> result (JavaScript):
EJSON.parse('{"$undefined":true}')
I suppose this has something to do with returning. but when I return null I get this:
> result:
null
> result (JavaScript):
EJSON.parse('null')
when trying to save I get this at the top of the page:
runtime error during function validation
Function Code:
exports = async function() {
const usersCol = context.services.get("SchoologyDashCluster").db("SchoologyDashApp").collection("users");
const gradesCol = context.services.get("SchoologyDashCluster").db("SchoologyDashApp").collection("grades");
var usersCusor = await usersCol.find( ).toArray();
var gradesCusor = await gradesCol.find( ).toArray();
let insert = [];
for (let i = 0; i < usersCusor.length; i++) {
var user = usersCusor[i];
var userSavedGrades = gradesCusor[i].grades
var currentGrades = await getGrades(user.schoologyUID, user.consumerKey, user.secretKey);
var lastGraded = NaN;
let index = gradesCusor[i].grades.length - 1;
while (true) {
if (gradesCusor[i].grades[index].changed == 1) {
lastGraded = index;
break
}
index = index - 1;
}
console.log(lastGraded)
if (userSavedGrades[lastGraded].grades.ga == currentGrades.ga){
currentGrades = { changed : 0, time: new Date().getTime()};
} else {
currentGrades = {changed : 1, grades: currentGrades, time : new Date().getTime()};
}
gradesCol.updateOne(
{"user" : user._id},
{"$push" : {"grades" : currentGrades}}
)
}
// return usersCol.find( );
return null;
};
The answer was simple and now I feel ignorant. Instinctual I put the module imports at the top of the document. However this is incorrect and they need to be placed in the exports function, like so:
exports = function (x,y,z) {
const http = context.http;
return;
}

Undefined errors using Node.js, Mongoose, and Discord.js [Cannot read property of undefined]

I've been scouring similar problems but haven't seem to have found a solution that quite works on my end. So I'm working on a Discord bot that takes data from a MongoDB database and displays said data in the form of a discord embedded message using Mongoose. For the most part, everything is working fine, however one little section of my code is giving me trouble.
So I need to import an array of both all available users and the "time" data of each of those users. Here is the block of code I use to import said data:
for (i = 0;i < totalObj; i++){
timeArray[i] = await getData('time', i);
userArray[i] = await getData('user', i);
}
Now this for loop references a function I made called getData which obtains the data from MongoDB by this method:
async function getData(field, value){
var data;
await stats.find({}, function(err, result){
if(err){
result.send(err);
}else{
data = result[value];
}
});
if(field == "user"){
return data.user;
}else if (field == "time"){
return data.time;
}else{
return 0;
}
So that for loop is where my errors currently lie. When I try to run this code and display my data through a discord message, I get this error and the message does not get sent:
(node:13936) UnhandledPromiseRejectionWarning: TypeError: Cannot read property 'time' of undefined
Now the strange thing is, this error does not happen every time. If I continue calling the command that triggers this code from my discord server, it's almost like a 50/50 shot if the command actually shows the message or instead gives this error. It is very inconsistent.
This error is confounding me, as the undefined part does not make sense to me. The objects that are being searched for in the mongoDB collection are definitely defined, and the for loop never exceeds the number of objects present. My only conclusion is that I'm doing something wrong with my asynchronous function design. I have tried altering code to use the getData function less often, or to not use awaits or asynchronous design at all, however this leaves my final discord message with several undefined variables and an eventual crash.
If anyone has any advice or suggestions, that would be very much appreciated. Just for reference, here is the full function that receives the data, sorts it, and prepares a string to be displayed on the discord server (though the error only seems to occur in the first for loop):
async function buildString(){
var string = "";
var totalObj;
var timeArray = [];
var userArray = [];
var stopSort = false;
await stats.find({}, function(err, result){
if(err){
result.send(err);
}else{
totalObj = result.length;
}
});
for (i = 0;i < totalObj; i++){
timeArray[i] = await getData('time', i);
userArray[i] = await getData('user', i);
}
while(!stopSort){
var keepSorting = false;
for(i = 0; i < totalObj ; i++){
var target = await convertTime(timeArray[i]);
for(j = i + 1 ; j < totalObj ; j++){
var comparison = await convertTime(timeArray[j]);
if(target > comparison){
//Switch target time with comparison time so that the lower time is up front
var temp = timeArray[i];
timeArray[i] = timeArray[j];
timeArray[j] = temp;
//Then switch the users around so that the user always corresponds with their time
var userTemp = userArray[i];
userArray[i] = userArray[j];
userArray[j] = userTemp;
//The loop will continue if even a single switch is made
keepSorting = true;
}
}
}
if(!keepSorting){
stopSort = true;
}
}
//String building starts here
var placeArray = [':first_place: **1st', ':second_place: **2nd', ':third_place: **3rd', '**4th', '**5th', '**6th', '**7th', '**8th', '**9th', '**10th'];
for(i = 0; i < totalObj; i++){
string = await string.concat(placeArray[i] + ": " + userArray[i] + "** - " + timeArray[i] + " \n\n");
console.log('butt');
}
console.log("This String:" + string);
return string;
}
I think problem is you are trying to await function with callback, it will not work => access to data.time may run before data = result[value]. If you need await callback, you can use custom Promise (or use util.promisify, more info here)
Promise:
function findStats(options) {
return new Promise((resolve, reject) => {
return stats.find(options, function (err, result) {
if (err) {
return reject(err)
}
return resolve(result)
})
})
}
utils.promisify
const util = require('util');
const findStats = util.promisify(stats.find);
Now you can use await in your function
async function getData(field, value) {
try {
const result = await findStats({})
const data = result.value
if (field === 'user') {
return data.user
}
if (field === 'time') {
return data.time
}
return 0
} catch (error) {
// here process error the way you like
// or remove try-catch block and sanitize error in your wrap function
}
}

NodeJS + TinyURL - adding items to a list

Sorry if this is a stupid question, but how would I go about adding items into a list? So what I've got is a loop that basically runs through and tries to convert all the urls to tinyurls from a web scraper . It still produces an empty list for images_short. I'm not very familiar with nodejs's syntax. Here's a snippet of code, I've put some data in the images_long list:
const TinyURL = require('tinyurl');
var images_long = ['https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-skateboarding-lucas-premiere-adv-primeknit-khaki-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=728297932403d74d2ac1afa5ecdfa97d', 'https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-nmd-r1-stlt-triple-black-first-look-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=918752eba81826e4398950efc69a5141'];
var images_short = [];
for (i = 0; i < 2; i++) {
TinyURL.shorten(images_long[i], function(res) {
images_short.push(res[i]);
});
}
I still get an empty list when I changed images_short.push(res[i]); to images_short.push(res);
res is a string, so just images_short.push(res); will do the trick. Also, you should iterate with respect to the length of the variable you're indexing, and you should var your indexing variable (i):
const TinyURL = require('tinyurl');
var images_long = [
'https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-skateboarding-lucas-premiere-adv-primeknit-khaki-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=728297932403d74d2ac1afa5ecdfa97d',
'https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-nmd-r1-stlt-triple-black-first-look-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=918752eba81826e4398950efc69a5141'];
var images_short = [];
for (var i = 0; i < images_long.length; i++) {
TinyURL.shorten(images_long[i], function(res) {
images_short.push(res);
});
}
The tinyurl library is async.
Is we use native map, the resulting callback wouldn't be returned if we try to console.log(images_short) until all the links in the array have been shortened.
We can however, use async and specificically use async.map to return the results like the example below.
const TinyURL = require('tinyurl');
const async = require('async');
var images_long = [
'https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-skateboarding-lucas-premiere-adv-primeknit-khaki-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=728297932403d74d2ac1afa5ecdfa97d',
'https://hypebeast.imgix.net/http%3A%2F%2Fhypebeast.com%2Fimage%2F2017%2F06%2Fadidas-nmd-r1-stlt-triple-black-first-look-0.jpg?fit=max&fm=pjpg&h=344&ixlib=php-1.1.0&q=90&w=516&s=918752eba81826e4398950efc69a5141'];
function shorten(item, cb) {
TinyURL.shorten(item, function(res) {
cb(null, res);
});
}
async.map(images_long, shorten, (err, results) => {
console.log(results);
});
we can assign images_short if you want to keep consistency.

Giving a provided Node JS callback my own custom Callback

First off I thought I'd get this problem solved after this great thread: nodeJs callbacks simple example
However, I am still unsure of how to proceed. Like the title hints at: I need a callback given to a callback who already has node arguments being passed to it
Code:
(function()
var reqs = {
http: require('http'),
path: require('path'),
fs: require('fs')
};
reqs.http.createServer(function (request, response) {
response.writeHead(200, {
'Content-Type': 'text/plain'
});
response.end('Hello HTTP!');
}).listen(8080);
var printCount = function(count) {
console.log(count);
};
var callCount = function(err, list, callback) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
};
//count images from executing directory
var countImages = function(dirName) {
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
};
countImages(__dirname);
})();
I think the key line here is
var imageCount = reqs.fs.readdir(dirName, callCount(null, null, printCount));
I'm passing the printCount function to the same function that is called back after fs.readdir asynchronously executes but it seems that me passing null to its first two arguments is overriding Node functionality that passes the callback err and list automatically. How can I get around this? I simply want to count the images in the executing directory and be able to store that value in my main function.
Pretty new to event style programming. Any extra reading suggestions are welcome. There is tons of content out there but I really want to get this up and running for a meeting this weekend. Thanks guys!
you can't quite do what you are doing, you are doing callCount(null, null, printCount) which executes the function. But you need to pass a function as a callback. What you want is something like the following, which captures the call back you want and returns a function you can pass as a callback to your api call
var callCount = function(callback) {
return function(err, list) {
var count = 0;
if(err) throw err;
// console.log(err);
for (var i = 0; i < list.length; i++) {
// console.log(reqs.path.extname(list[i]));
if(reqs.path.extname(list[i]) === ".png" || reqs.path.extname(list[i]) === ".jpg")
{
count++;
console.log(count);
}
}
callback(count);
}
}
and then
reqs.fs.readdir(dirName, callCount(printCount));

How To handle result set of websql in html 5?

I m creating mobile web application using html5 and javascript.I m having two javascript files. AttributesDatabase.js and AttributeView.js.From AttributeView.js i m calling one function from AttributeDatabase.js in that i m executing one select query.Now the query result should go to AtttributeView.js.But the Websql transaction is asynchronous call that is what it is not returning proper result.Is there any way to handle the websql result.
Please help if any way there?
Edited
AttributeView.js
var AttributeDAOObj = new AttributeDAO();
AttributeDAOObj.GetAttributeList();
alert(AttributeDAOObj.GetAttributeList()); //This alert is coming as undefined.
AttributeDAO.js
this.GetAttributeList = function () {
var baseDAOObj = new BaseDAO();
var query = "SELECT AttributeName FROM LOGS";
// this.Successcalbackfromsrc = this.myInstance.Successcalback;
var parm = { 'query': query, 'Successcalback': this.myInstance.Successcalback };
baseDAOObj.executeSql(parm);
}
//To Create database and execute sql queries.
function BaseDAO() {
this.myInstance = this;
//Creating database
this.GetMobileWebDB = function () {
if (dbName == null) {
var dbName = 'ABC';
}
var objMobileWebDB = window.openDatabase(dbName, "1.0", dbName, 5 * 1024 * 1024);
return objMobileWebDB;
}
//Executing queries and getting result
this.executeSql = function (query) {
var objMobileWebDB = this.myInstance.GetMobileWebDB();
objMobileWebDB.transaction(function (transaction) {
//In this transaction i m returning the result.The result value is coming.
transaction.executeSql(query, [], function (transaction, result) { return result; }, this.Errorclback);
});
}
}
The problem is in you succes call back (like in the comment to your question, stated by DCoder)
function (transaction, result) { return result; }
this is returning where to?
So this is how to do it (or at least one way)
you can do for example:
function (transaction,result){
console.log("yes, I have some result, but this doesn't say anything, empty result gives also a result");
// so check if there is a result:
if (result != null && result.rows != null) {
if (result.rows.length == 0) {
// do something if there is no result
}else{
for ( var i = 0; i < result.rows.length; i++) {
var row = result.rows.item(i);
var id = result.rows.item(i).id; //supposing there is an id in your result
console.log('Yeah! row id = '+id);
}
}
}else{
// do something if there is no result
}
};
note the code above can be compacter, but this is how to understand it better.
another way is to put this function is a seperate piece of code, so you keep the sql statement more compact and readable. Like you call you error callback this can be in your function (with this. in front of it) or a completely seperate function.

Categories