Retrieve data with request in JavaScript - javascript

I am using the request library in javascript.
If I use
function executeYQL(q) {
const uri = 'http://query.yahooapis.com/v1/public/yql'
const qs = {
q: encodeURIComponent(q),
format: 'json',
env: 'http://datatables.org/alltables.env'
};
request(uri, qs, (err, res, body) => {
if (!err && res.statusCode === 200) {
return JSON.parse(body);
} else {
console.log(res.statusCode);
console.log(err);
}
});
};
exports.getStocks = (req, res) => {
const q = 'select * from yahoo.finance.historicaldata where symbol = "YHOO" AND startDate = "2009-09-11" and endDate = "2010-03-10"';
const json = executeYQL(q);
res.json(json);
};
it results in status code 400, but if I use
const uri = 'http://query.yahooapis.com/v1/public/yql?q=' + encodeURIComponent(q) + '&format=json&env=http://datatables.org/alltables.env'
it retrieves the data correctly; however, the variable json inside getStocks() is undefined.
I don't know why it's not populated. Do I have to use Promise? I guess it's because of synchronous call, so it runs res.json(json) before const json = executeYQL(q) is done?
How do I make my code in each line wait until the code previous lines are executed?

Related

It doesn't show me the data in the terminal when i send the post request twice

I am trying to create a middleware that receive a form-data and return the fieldname, contentType and the value. So when I send the firts post the data view in the terminal but if I send the same request again doesn't show me the data in the terminal.
And if a toggle the image, the data come show in the terminal
This is my code:
server:
const express = require("express");
const Upes = require("../upes");
const app = express();
const start = new Upes();
app.post("/", start.setup.bind(start), (req, res) => {
res.send("all right");
});
app.listen(3000, () => {
console.log("The server is active");
});
the index of my middleware:
const getData = require("./utils/getData");
const parseContentType = require("./utils/parseContentType");
class Upes {
setup(req, res, next) {
const contentType = parseContentType(req.headers["content-type"]);
if (!contentType) {
throw new Error("Malformed content-type");
}
const SUBTYPES = ["form-data", "x-www-form-urlencoded"];
if (!SUBTYPES.includes(contentType.subtype)) {
throw new Error(
"The subtypes does not match the following subtypes: " + SUBTYPES
);
}
getData(req, contentType.params.boundary, (data) => {
console.log(data);
});
next();
}
}
module.exports = Upes;
The function that receive the data and processes it:
function getData(req, boundary, callback) {
let chunk = "";
let data = [];
req.on("data", (buffer) => {
chunk += buffer.toString();
});
req.on("end", () => {
// Split the chunk in blocks
const blocks = getBlock(chunk, boundary);
blocks.forEach((block) => {
let [params, value] = block.split("\r\n\r\n");
params = params.split(";");
let fieldname = params[1].split("=")[1].replaceAll('"', "");
let contentType = () => {
const condition = params.length === 3;
if (condition) {
let type = params[2].split(":")[1].replace(" ", "");
return type;
}
return "text-plain";
};
const payload = {
fieldname: fieldname,
contentType: contentType(),
value: "", // value.replace("\r\n", "")
};
data.push(payload);
});
callback(data);
});
}
function getBlock(body, boundary) {
boundary = boundary.replaceAll("-", "");
return body.replaceAll("-", "").split(`${boundary}`).slice(1, -1);
}
module.exports = getData;
Send the same request 20 times
I don't know what happend, please can someone help me?

How to use nock to intercept requests regardless of body

I'm trying to use nock in my tests to intercept the request calls i'm making from the native https module in Node.js. I'm using Promise.all to make two requests to the external server. I want my tests to intercept the calls, and check some of the form fields to make sure they're filled in as i want.
I have my class setup below (kept the most relevant parts of code in):
const archiver = require('archiver');
const { generateKeyPairSync } = require('crypto');
const FormData = require('form-data');
const fs = require('fs');
const https = require('https');
class Platform {
constructor() {
this.FILESTORE_USERNAME = process.env.FILESTORE_USERNAME;
this.FILESTORE_PASSWORD = process.env.FILESTORE_PASSWORD;
}
store(serviceName) {
const { publicKey, privateKey } = this._generateKeys();
return Promise.all([this._postKey(publicKey), this._postKey(privateKey)])
.then(() => {
return this._zipKeys(publicKey, privateKey, serviceName);
})
.catch((err) => {
throw err;
});
}
_postKey(key) {
const options = this._getOptions();
const keyName = (key.search(/(PUBLIC)/) !== -1) ? 'publicKey' : 'privateKey';
const form = new FormData();
form.append('file', key);
form.append('Name', keyName);
form.append('MimeMajor', 'application');
form.append('MimeMinor', 'x-pem-file');
form.append('Extension', (keyName == 'publicKey') ? 'pub' : '');
form.append('FileClass', 'MFS::File');
options.headers = form.getHeaders();
options.headers.Authorization = 'Basic ' + Buffer.from(this.FILESTORE_USERNAME + ':' + this.FILESTORE_PASSWORD).toString('base64');
return new Promise((resolve, reject) => {
let post = https.request(options, (res) => {
let data = '';
if (res.statusCode < 200 || res.statusCode > 299) {
reject(new Error('File Storage API returned a status code outside of acceptable range: ' + res.statusCode));
} else {
res.setEncoding('utf8');
res.on('data', (chunk) => {
data += chunk;
});
res.on('error', (err) => {
reject(err);
});
res.on('end', () => {
if (data) {
resolve(JSON.parse(data));
} else {
resolve();
}
});
}
});
post.on('error', (err) => {
reject(err);
});
form.pipe(post);
post.end();
});
}
_getOptions() {
return {
hostname: 'api.example.com',
path: '/media/files/',
method: 'POST',
};
}
}
module.exports = Platform;
And then, my testing code looks like the below. I'm using mocha, sinon, chai, sinon-chai and nock.
const Platform = require('/usr/src/app/api/Services/Platform');
const crypto = require('crypto');
const fs = require('fs');
const nock = require('nock');
const yauzl = require('yauzl');
describe('Platform', function() {
let platform;
beforeEach(() => {
platform = new Platform();
});
afterEach(() => {
const list = fs.readdirSync('/usr/src/app/api/Services/data/');
list.forEach((file) => {
fs.unlink('/usr/src/app/api/Services/data/' + file, (err) => {
if (err) throw err;
});
});
nock.cleanAll();
});
after(() => {
nock.restore();
});
describe('store', function() {
it('should post each generated key to an external storage place', async function() {
this.timeout(5000);
// const stub = sinon.stub(platform, '_postKey').resolves();
const scope = nock('https://api.example.com')
.persist()
.post('/media/files/', (body) => {
// console.log(body);
})
.reply(200);
let serviceName = 'test';
let actual = await platform.store(serviceName)
.catch((err) => {
(() => { throw err; }).should.not.throw();
});
console.log(scope);
// expect(stub.callCount).to.equal(2);
expect(actual).to.be.a('string');
expect(actual).to.include(serviceName + '.zip');
// stub.reset();
});
});
});
The problem I am coming across is this error that is thrown when running my tests:
AssertionError: expected [Function] to not throw an error but 'Error:
Nock: No match for request {\n "method": "POST",\n "url":
"https://api.example.com/media/files/",\n "headers": {\n
"content-type": "multipart/form-data;
boundary=--------------------------363749230271182821931703",\n
"authorization": "Basic abcdef1224u38454857483hfjdhjgtuyrwyt="\n },\n
"body":
"----------------------------363749230271182821931703\r\nContent-Disposition: form-data; name=\"file\"\r\n\r\n-----BEGIN PUBLIC
KEY-----\nMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAq+QnVOYVjbrHIlAEsEoF\nZ4sTvqiB3sJGwecNhmgrUp9U8oqgoB50aW6VMsL71ATRyq9b3vMQKpjbU3R2RcOF\na6mlaBtBjxDGu2nEpGX++mtPCdD9HV7idvWgJ3XS0vGaCM//8ukY+VLBc1IB8CHC\nVj+8YOD5Y9TbdpwXR+0zCaiHwwd8MHIo1kBmQulIL7Avtjh55OmQZZtjO525lbqa\nWUZ24quDp38he2GjLDeTzHm9z1RjYJG6hS+Ui0s2xRUs6VAr7KFtiJmmjxPS9/vZ\nwQyFcz/R7AJKoEH8p7NE7nn/onbybJy+SWRxjXVH8afHkVoC65BiNoMiEzk1rIsx\ns92woHnq227JzYwFYcLD0W+TYjtGCB8+ks+QRIiV0pFJ3ja5VFIxjn9MxLntWcf2\nhsiYrmfJlqmpW1DMfZrtt41cJUFQwt7CpN72aix7btmd/q0syh6VVlQEHq/0nDky\nItv7dqyqZc9NNOMqK9/kXWhbq5cwS21mm+kTGas5KSdeIR0LH7uVtivB+LKum14e\nRDGascZcXZIVTbOeCxA6BD7LyaJPzXmlMy4spXlhGoDYyVRhpvv2K03Mg7ybiB4X\nEL1oJtiCFkRX5LtRJv0PCRJjaa3UvfyIuz8bHK4ANxIZqcwZwER+g02gw8iqNfMa\nDWXpfMJUU8TQuLGWZQaGJc8CAwEAAQ==\n-----END
PUBLIC
KEY-----\n\r\n----------------------------363749230271182821931703\r\nContent-Disposition: form-data;
name=\"Name\"\r\n\r\npublicKey\r\n----------------------------363749230271182821931703\r\nContent-Disposition: form-data;
name=\"MimeMajor\"\r\n\r\napplication\r\n----------------------------363749230271182821931703\r\nContent-Disposition: form-data;
name=\"MimeMinor\"\r\n\r\nx-pem-file\r\n----------------------------363749230271182821931703\r\nContent-Disposition: form-data;
name=\"Extension\"\r\n\r\npub\r\n----------------------------363749230271182821931703\r\nContent-Disposition: form-data;
name=\"FileClass\"\r\n\r\nMFS::File\r\n----------------------------363749230271182821931703--\r\n"\n}'
was thrown
I take it it's because nock expects me to fake out the body for the request to get a correct match? Is there a way of just looking for requests made to that address, regardless of the body, so I can do my own tests or whatever.
When the post method of a Nock Scope is passed a second argument, it is used to match against the body of the request.
Docs for specifying the request body
In your test, you're passing a function as the second argument, but not returning true so Nock is not considering it a match.
From the docs:
Function: nock will evaluate the function providing the request body
object as first argument. Return true if it should be considered a
match
Since your goal is to assert form fields on the request, your best approach would be to leave the function there, do your assertions where the // console.log(body); line is, but add return true; to the end of the function.
You could also return true or false depending on if your form fields match your assertions, but in my experience it makes the error output from the test convoluted. My preference is to use standard chai expect() calls and let the assertions bubble errors before Nock continues with request matching.

Nested HTTP requests in Firebase cloud function

I'm using an HTTP-triggered Firebase cloud function to make an HTTP request. I get back an array of results (events from Meetup.com), and I push each result to the Firebase realtime database. But for each result, I also need to make another HTTP request for one additional piece of information (the category of the group hosting the event) to fold into the data I'm pushing to the database for that event. Those nested requests cause the cloud function to crash with an error that I can't make sense of.
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require('request');
exports.foo = functions.https.onRequest(
(req, res) => {
var ref = admin.database().ref("/foo");
var options = {
url: "https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****",
json: true
};
return request(
options,
(error, response, body) => {
if (error) {
console.log(JSON.stringify(error));
return res.status(500).end();
}
if ("results" in body) {
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if ("name" in result &&
"description" in result &&
"group" in result &&
"urlname" in result.group
) {
var groupOptions = {
url: "https://api.meetup.com/" + result.group.urlname + "?sign=true&photo-host=public&key=****",
json: true
};
var categoryResult = request(
groupOptions,
(groupError, groupResponse, groupBody) => {
if (groupError) {
console.log(JSON.stringify(error));
return null;
}
if ("category" in groupBody &&
"name" in groupBody.category
) {
return groupBody.category.name;
}
return null;
}
);
if (categoryResult) {
var event = {
name: result.name,
description: result.description,
category: categoryResult
};
ref.push(event);
}
}
}
return res.status(200).send("processed events");
} else {
return res.status(500).end();
}
}
);
}
);
The function crashes, log says:
Error: Reference.push failed: first argument contains a function in property 'foo.category.domain._events.error' with contents = function (err) {
if (functionExecutionFinished) {
logDebug('Ignoring exception from a finished function');
} else {
functionExecutionFinished = true;
logAndSendError(err, res);
}
}
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1436:15)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
at /user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1479:13
at Object.forEach (/user_code/node_modules/firebase-admin/node_modules/#firebase/util/dist/index.node.cjs.js:837:13)
at validateFirebaseData (/user_code/node_modules/firebase-admin/node_modules/#firebase/database/dist/index.node.cjs.js:1462:14)
If I leave out the bit for getting the group category, the rest of the code works fine (just writing the name and description for each event to the database, no nested requests). So what's the right way to do this?
I suspect this issue is due to the callbacks. When you use firebase functions, the exported function should wait on everything to execute or return a promise that resolves once everything completes executing. In this case, the exported function will return before the rest of the execution completes.
Here's a start of something more promise based -
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require("request-promise-native");
exports.foo = functions.https.onRequest(async (req, res) => {
const ref = admin.database().ref("/foo");
try {
const reqEventOptions = {
url:
"https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=xxxxxx",
json: true
};
const bodyEventRequest = await request(reqEventOptions);
if (!bodyEventRequest.results) {
return res.status(200).end();
}
await Promise.all(
bodyEventRequest.results.map(async result => {
if (
result.name &&
result.description &&
result.group &&
result.group.urlname
) {
const event = {
name: result.name,
description: result.description
};
// get group information
const groupOptions = {
url:
"https://api.meetup.com/" +
result.group.urlname +
"?sign=true&photo-host=public&key=xxxxxx",
json: true
};
const categoryResultResponse = await request(groupOptions);
if (
categoryResultResponse.category &&
categoryResultResponse.category.name
) {
event.category = categoryResultResponse.category.name;
}
// save to the databse
return ref.push(event);
}
})
);
return res.status(200).send("processed events");
} catch (error) {
console.error(error.message);
}
});
A quick overview of the changes -
Use await and async calls to wait for things to complete vs. being triggered in a callback (async and await are generally much easier to read than promises with .then functions as the execution order is the order of the code)
Used request-promise-native which supports promises / await (i.e. the await means wait until the promise returns so we need something that returns a promise)
Used const and let vs. var for variables; this improves the scope of variables
Instead of doing checks like if(is good) { do good things } use a if(isbad) { return some error} do good thin. This makes the code easier to read and prevents lots of nested ifs where you don't know where they end
Use a Promise.all() so retrieving the categories for each event is done in parallel
There are two main changes you should implement in your code:
Since request does not return a promise you need to use an interface wrapper for request, like request-promise in order to correctly chain the different asynchronous events (See Doug's comment to your question)
Since you will then call several times (in parallel) the different endpoints with request-promise you need to use Promise.all() in order to wait all the promises resolve before sending back the response. This is also the case for the different calls to the Firebase push() method.
Therefore, modifying your code along the following lines should work.
I let you modifying it in such a way you get the values of name and description used to construct the event object. The order of the items in the results array is exactly the same than the one of the promises one. So you should be able, knowing that, to get the values of name and description within results.forEach(groupBody => {}) e.g. by saving these values in a global array.
const functions = require('firebase-functions');
const admin = require('firebase-admin');
admin.initializeApp();
var rp = require('request-promise');
exports.foo = functions.https.onRequest((req, res) => {
var ref = admin.database().ref('/foo');
var options = {
url:
'https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****',
json: true
};
rp(options)
.then(body => {
if ('results' in body) {
const promises = [];
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if (
'name' in result &&
'description' in result &&
'group' in result &&
'urlname' in result.group
) {
var groupOptions = {
url:
'https://api.meetup.com/' +
result.group.urlname +
'?sign=true&photo-host=public&key=****',
json: true
};
promises.push(rp(groupOptions));
}
}
return Promise.all(promises);
} else {
throw new Error('err xxxx');
}
})
.then(results => {
const promises = [];
results.forEach(groupBody => {
if ('category' in groupBody && 'name' in groupBody.category) {
var event = {
name: '....',
description: '...',
category: groupBody.category.name
};
promises.push(ref.push(event));
} else {
throw new Error('err xxxx');
}
});
return Promise.all(promises);
})
.then(() => {
res.send('processed events');
})
.catch(error => {
res.status(500).send(error);
});
});
I made some changes and got it working with Node 8. I added this to my package.json:
"engines": {
"node": "8"
}
And this is what the code looks like now, based on R. Wright's answer and some Firebase cloud function sample code.
const functions = require("firebase-functions");
const admin = require("firebase-admin");
admin.initializeApp();
const request = require("request-promise-native");
exports.foo = functions.https.onRequest(
async (req, res) => {
var ref = admin.database().ref("/foo");
var options = {
url: "https://api.meetup.com/2/open_events?sign=true&photo-host=public&lat=39.747988&lon=-104.994945&page=20&key=****",
json: true
};
await request(
options,
async (error, response, body) => {
if (error) {
console.error(JSON.stringify(error));
res.status(500).end();
} else if ("results" in body) {
for (var i = 0; i < body.results.length; i++) {
var result = body.results[i];
if ("name" in result &&
"description" in result &&
"group" in result &&
"urlname" in result.group
) {
var groupOptions = {
url: "https://api.meetup.com/" + result.group.urlname + "?sign=true&photo-host=public&key=****",
json: true
};
var groupBody = await request(groupOptions);
if ("category" in groupBody && "name" in groupBody.category) {
var event = {
name: result.name,
description: result.description,
category: groupBody.category.name
};
await ref.push(event);
}
}
}
res.status(200).send("processed events");
}
}
);
}
);

Node.js server waterfall error TypeError: Cannot read property 'Symbol(Symbol.toStringTag)' of undefined

I am writing a server with two functions, one using the output of another function. When the server runs, it gives an error:
TypeError: Cannot read property 'Symbol(Symbol.toStringTag)' of undefined
at isAsync (/Users/charles/Documents/Router/node_modules/async/dist/async.js:228:32)
at wrapAsync (/Users/charles/Documents/Router/node_modules/async/dist/async.js:232:12)
at nextTask (/Users/charles/Documents/Router/node_modules/async/dist/async.js:5308:20)
at Object.waterfall (/Users/charles/Documents/Router/node_modules/async/dist/async.js:5320:5)
at /Users/charles/Documents/Router/routes/yelp.js:46:15
at /Users/charles/Documents/Router/node_modules/mongojs/lib/cursor.js:59:24
at handleCallback (/Users/charles/Documents/Router/node_modules/mongojs/node_modules/mongodb/lib/utils.js:120:56)
at /Users/charles/Documents/Router/node_modules/mongojs/node_modules/mongodb/lib/cursor.js:683:5
at handleCallback (/Users/charles/Documents/Router/node_modules/mongojs/node_modules/mongodb-core/lib/cursor.js:171:5)
at setCursorDeadAndNotified (/Users/charles/Documents/Router/node_modules/mongojs/node_modules/mongodb-core/lib/cursor.js:505:3)
The code is
const express = require('express');
const router = express.Router();
const request = require('request-promise-lite');
const async = require('async');
router.get('/yelp', function(req, res, next) {
db.input.find({}, {
term: 1,
location: 1,
_id: 0
})
.limit(1).sort({
$natural: -1
}, function(err, input) {
if (err) {
res.send(err)
}
console.log(input);
async.waterfall([yelpSearch(input[0]), googleSearch],
function sendJson(err, restaurants) {
console.log("waterfall starting");
if (err) res.send(err);
res.json(restaurants);
})
})
});
// Yelp API call
const yelpSearch = function(input, cb) {
const client = yelp.client(apiKey);
client.search(input)
.then(response => {
console.log(response.jsonBody.businesses);
cb(null, response.jsonBody.businesses);
})
.catch(e => {
console.log(e);
});
}
// Google API call
const googleSearch = function(restaurants, cb) {
console.log("google starts")
var apiKey = google_apiKey;
var cseKey = cseID;
restaurants.forEach(function(restaurant) {
var keyWord = restaurant.name + restaurant.city + restaurant.state;
var googleURL = "https://www.googleapis.com/customsearch/v1?key=" + apiKey +
"q=" + keyWord +
"&searchType=image" +
"&cx" + cseKey +
"&count=5" +
"&safe=medium";
var imageURLs = [];
request.get(googleURL, {
json: true,
headers: {
'User-Agent': 'thaorell'
}
}).then(function(response) {
response.items.forEach(function(item) {
imageURLs.append(item.link)
});
restaurant.append(imageURLs);
console.log(imageURLs);
})
})
cb(null, restaurants)
};
Does anyone have any experience in this? The error is on the line with: async.waterfall([yelpSearch(input[0]), googleSearch]. I am using Yelp API to search for restaurants then for each restaurant, I would like to fetch the Google Search for images of that restaurant.
I guess, you are passing arguments to the first function in the waterfall wrongly, it should be:
async.waterfall([
async.constant(input[0]),
yelpSearch,
googleSearch
], function sendJson(err, restaurants) {
// ...
});
Both yelpSearch and googleSearch are consts; they doesn't get hoisted. So, at the time you call async.waterfall([yelpSearch(input[0]), googleSearch],, the functions does not exist yet, so it fails. Declare them above router.get instead (or change them to hoisted function declarations).

NodeJS multiple requests

I am writing a web scraper that makes multiple requests based on a list that looks like this
1. Category1
1a. categoryItem1
1b. categoryItem2
2. Category2
2a. categoryItem1
2b. categoryItem2
2c. categoryItem3
3. Category3
3a. categoryItem1
Both Category and categoryItem are links. Only 1 Category can be expanded at a time. The amount of Categories and categoryItems can change so I don't know the exact amount before hand.
I am gathering the data on each categoryItem page to be saved in a json that looks like this
{
"Category1": [
"categoryItem1: {
// Details saved here
},
"categoryItem2: {
// Details saved here
}
],
"Category2": [
"categoryItem1: {
// Details saved here
},
"categoryItem2: {
// Details saved here
},
"categoryItem3: {
// Details saved here
}
],
"Category3": [
"categoryItem1: {
// Details saved here
}
]
}
The only thing left for me is to figure out how to make this act synchronous
Get the opening page
Open each Category list
Open each categoryItem details page
THIS was the web scraper tutorial that I followed, if you would like to know. Due to async calls I don't know when the very last page is parsed, so here is the structure of the script
server.js
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function (req, res) {
globalJSON = {};
baseUrl = 'http://...';
// 1.) open page with list
request.get(baseUrl, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
// select the list
$('#categoryListSelector').filter(function () {
var data = $(this);
var listItem = data.find('#listItemSelector');
var expansionLink = listItem.find('a').attr('href'); //
var category = listItem.find('font').text();
// Save category to global json
globalJSON[category] = [];
// 2.) Expand the list by opening expansionLink
request.get(baseUrl + expansionLink, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
// Select the sub items of each list item
$('#subItem selector').filter(function () {
var data = $(this);
var categoryItemPageLinkElement = data.find('a');
var categoryItemName = categoryItemPageLinkElement.text();
var categoryItemLink = $(categoryItemPageLinkElement).attr('href');
if (typeof categoryItemLink != "undefinded" && categoryItemLink != null && categoryItemLink != "") {
categoryItemObject = {}; // { categoryItemName: categoryItemDetails }
categoryItemDetails = {};
// 3.) Open the categoryItem page to start gathering data
request.get(baseUrl + categoryItemLink, function (error, response, html) {
if (!error) {
var $ = cheerio.load(html);
// GATHER and save data here
// Done gathering data save to global json
categoryItemObject[categoryItemName] = categoryItemDetails;
globalJSON[category].push(categoryItemObject);
}
});
}
});
}
});
});
fs.writeFile('output.json', JSON.stringify(globalJSON, null, 4), function (err) {
console.log('File successfully written!');
});
res.send(globalJSON);
}//END if(!error)
});
})//END app.get()
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
Update
I did get my issue solved with some help from the feller below, and this is what I came up with. Now, there might be a better way, feel free to let me know.
Basic Layout
Promise.all(categoriesArr.map(categoryObj => new Promise((resolve, reject)=>{
request.get(baseUrl + categoryObj.categoryItemLink, (error, response, html)=>{
if(error){
return reject(error);
}
//build an array of ALL the categoryItemLinks
return resolve(res, html);
});
}))).then(function(statesArray) {
Promise.all(allCategoryItems.map(categoryItemObject => new Promise((resolve, reject)=>{
request.get(baseUrl + categoryItemObject.categoryItemPageLink, (error, response, html)=>{
if(error){
return reject(error);
}
// Gather Data and put into dataJson
return resolve(response, html);
});
}))).then(function(data) {
// Do finishing stuff
}).catch(/*error*/);
}).catch(/*error*/);
server.js
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function (req, res) {
categoriesArr = [];
allCategoryItems = [];
dataJson = {}; // Global json to hold all the data
baseUrl = 'http://www.blahblah.org';
request.get(baseUrl, function(error, response, html) {
if (!error) {
var $ = cheerio.load(html);
$('#categorySelector').filter(function() {
var data = $(this);
var categoryItemLink = data.find('a').attr('href');
categoriesArr.push({
"categoryItemLink": categoryItemLink
});
});
Promise.all(categoriesArr.map(categoryObj => new Promise((resolve, reject)=>{
request.get(baseUrl + categoryObj.categoryItemLink, (error, response, html)=>{
if(error){
return reject(error);
}
var $ = cheerio.load(html);
$('#categoryItemSelector').filter(function() {
var data = $(this);
var categoryItemPageLinkElement = data.find('a');
var categoryItemPageLink = $(categoryItemPageLinkElement).attr('href');
if(typeof categoryItemPageLink != "undefinded" && categoryItemPageLink != null && categoryItemPageLink != "") {
allCategoryItems.push({
"categoryItemPageLink": categoryItemPageLink
});
}
});
return resolve(res, html);
});
}))).then(function(statesArray) {
Promise.all(allCategoryItems.map(categoryItemObject => new Promise((resolve, reject)=>{
request.get(baseUrl + categoryItemObject.categoryItemPageLink, (error, response, html)=>{
if(error){
return reject(error);
}
var $ = cheerio.load(html);
// Gather Data and put into dataJson
return resolve(response, html);
});
}))).then(function(data) {
// Do finishing stuff
}).catch(/*error*/);
}).catch(/*error*/);
}//END if(!error)
});
})//END app.get()
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
You can use Promise.all(), so something like:
Promise.all(urls.map(url => new Promise((resolve, reject)=>{
request.get(url, (err, res, html)=>{
if(err){
return reject(err);
}
return resolve(res, html);
});
}))).then(/*success*/).catch(/*error*/);
In that code, the .then() executes after all requests have come back with a response.

Categories