Highland consume and toArray method together - javascript

Hi people and happy holidays!
I'm trying to consume a stream of csv rows with highland. To do some special processing and avoid passing down the headers to the stream, I'm calling .consume() and then I wanted the outcome in an array. The problem is that the callback of .toArray() is never called. I'm just curious about this as I already changed my solution to .map().toArray() but I still think .consume() would be a more elegant solution. ;) This is the piece of code that reads from a csv file and processes the rows:
const fs = require('fs');
const _ = require('highland');
const dataStream = fs.createReadStream('file.csv', 'utf8');
_(dataStream)
.split()
.consume((err, row, push, next) => {
console.log('consuming a row', row); // <-- this shows data
if (err) {
push(err);
return next();
}
// This condition is needed as .consume() passes an extra {} at the end of the stream <-- donno what this happens!!
if (typeof row !== 'string') {
return next();
}
const cells = row.split(',');
if (cells[0] === 'CODE') { // <-- header row
const { columnNames, columnIndexes } = processHeader(cells)); // <-- not relevant, works okay
return next();
}
console.log('processin content here', processRow(cells)); // <-- not relevant, works okay
push(null, processRow(cells));
return next();
})
.toArray(rows => console.log('ROWS: ', rows)); // <-- I don't get anything here
Thanks a lot!

Both consume and toArray consume a stream. You can only consume a stream once. If you try to consume it multiple times the "stream" will be empty.

Related

Is there a way to request more than one url in a get method in javascript?

I'm new to java script and I'm having trouble getting my code to work. My get functions need to get from one url, and then get from another and I continuously get header has already been set errors.
app.get('/api/movies/categories/:category',(req,res, next)=>{
// returns an array of all the movies that fit this year
//FUTURE IDEA: Check that is has also won an ocscar!
var inner=0;
var category = req.params.categories;
for (i=0; i < csvarray.length; i++){
if (csvarray[i]['categories'] == category){
var prevlength = list2.length;
next();
if(prevlength != list2.length){
list2[o--] = csvarray[i]['categories'];
}
}
}
if(list2.length == 0){
res.status(404).send(`The Movie category ${req.params} was not found`);
}
res.send(list2); //if you find the year then send it to the user
});
app.get('/api/movies/winners/:winner',(req, next)=>{
var Trues = req.params.winner;
if(csvarray[o]['winners'] == Trues){
list2[o++];
return;
}
});
'''
list2 and o are constants so that both get functions could use them. If there is a way to get a function with a req and takes a url that would be perfect, but so far I cannot find any way to do it.
Get in the habit of using return whenever you send a response. That way it only ever sends one response.
The reason why the error that headers have already been set is showing is because you are calling res.send() twice and both of them are being run in the same call to the API handler.
In order to fix it, just use:
return res.status(404).send(...
const async = require('async');
const request = require('request');
function httpGet(url, callback) {
const options = {
url : url,
json : true
};
request(options,
function(err, res, body) {
callback(err, body);
}
);
}
Use promise.all or manually do it by below code
const urls= [
"http://localhost:3010/alm/build_tool",
"http://localhost:3010/alm/development_tool",
"http://localhost:3010/alm/project_architecture"
];
async.map(urls, httpGet, function (err, res){
if (err) return console.log(err);
console.log(res);
});

How to parse Json data by chunks in NodeJs

I'm creating a REST API that gets raw data from the internet then apply a REGEX to it and return it in JSON format.
this is my function for getting Data as JSON.
first i'm using got() to get the raw data than I apply ANIME_LIST_REGEX.exec() to filter it with the regular expression to make it in JSON format.
async function getAnimeList(url) {
const {body} = await got(url);
let ANIME_LIST_DATA = ANIME_LIST_REGEX.exec(body)
if (!ANIME_LIST_DATA) {
return null;
}
return {
animeList: ANIME_LIST_DATA[1]
};
}
in this endpoint I'm retreiving the data from the 1st function and parsing the JSON, the return it as a response.
app.get('/anime-list', async (req, res, next) => {
const appData = await getAnimeList(URL_BASE_ANIME_LIST);
var listJson = JSON5.parse(appData.animeList)
res.json(listJson)
})
The issue is that the returned array is pretty big (5000 entries of js objects) and the request takes long time to return and show the array
What I want to do is to return a chunck of that array every time I call the function or reach the endpoint.
Tried several methods but none of them made sense.
Does anyone got an idea?
You can split the large array into a small chunk with Array.prototype.splice(). To determine the range of the chunk, you can pass queries to the endpoint.
app.get("/anime-list", async (req, res, next) => {
const appData = await getAnimeList(URL_BASE_ANIME_LIST)
var listJson = JSON5.parse(appData.animeList)
const from = req.query.from || 0
const count = req.query.count || 100
res.json(listJson.splice(from, count))
})
However, as others mention, calling getAnimeList() per request will cause another performance problem. I highly suggest you refactor the function to cache the result.

How can I update my dictionary with nested HTTP request?

I'm gonna try to explain this as clearly as I can, but it's very confusing to me so bear with me.
For this project, I'm using Node.js with the modules Axios and Cheerio.
I am trying to fetch HTML data from a webshop (similar to Amazon/eBay), and store the product information in a dictionary. I managed to store most things (title, price, image), but the product description is on a different page. To do a request to this page, I'm using the URL I got from the first request, so they are nested.
This first part is done with the following request:
let request = axios.get(url)
.then(res => {
// This gets the HTML for every product
getProducts(res.data);
console.log("Got products in HTML");
})
.then(res => {
// This parses the product HTML into a dictionary of product items
parseProducts(productsHTML);
console.log("Generated dictionary with all the products");
})
.then(res => {
// This loops through the products to fetch and add the description
updateProducts(products);
})
.catch(e => {
console.log(e);
})
I'll also provide the way I'm creating product objects, as it might clarify the function where I think the problem occurs.
function parseProducts(html) {
for (item in productsHTML) {
// Store the data from the first request
const $ = cheerio.load(productsHTML[item]);
let product = {};
let mpUrl = $("a").attr("href");
product["title"] = $("a").attr("title");
product["mpUrl"] = mpUrl;
product["imgUrl"] = $("img").attr("src");
let priceText = $("span.subtext").text().split("\xa0")[1].replace(",", ".");
product["price"] = parseFloat(priceText);
products.push(product);
}
}
The problem resides in the updateProducts function. If I console.log the dictionary afterwards, the description is not added. I think this is because the console will log before the description gets added. This is the update function:
function updateProducts(prodDict) {
for (i in prodDict) {
let request2 = axios.get(prodDict[i]["mpUrl"])
.then(res => {
const $ = cheerio.load(res.data);
description = $("div.description p").text();
prodDict[i]["descr"] = description;
// If I console.log the product here, the description is included
})
}
// If I console.log the product here, the description is NOT included
}
I don't know what to try anymore, I guess it can be solved with something like async/await or putting timeouts on the code. Can someone please help me with updating the products properly, and adding the product descriptions? Thank you SO much in advance.
To refactor this with async/await one would do:
async function fetchAndUpdateProducts() => {
try {
const response = await axios.get(url);
getProducts(response.data);
console.log("Got products in HTML");
parseProducts(productsHTML);
console.log("Generated dictionary with all the products");
await updateProducts(products);
} catch(e) {
console.log(e);
}
}
fetchAndUpdateProducts().then(() => console.log('Done'));
and
async function updateProducts(prodDict) {
for (i in prodDict) {
const response = await axios.get(prodDict[i]["mpUrl"]);
const $ = cheerio.load(response.data);
description = $("div.description p").text();
prodDict[i]["descr"] = description;
}
}
This will not proceed to conclude the call to fetchAndUpdateProducts unless the promise returned by updateProducts has been resolved.

How to print promised data to a webpage?

I am trying to bring an array of strings from a database to a dropdown menu on a website I have created. I have everything working properly except for the final transfer of the data from the retrieval method to the website. Right now the data is in the form of a Promise, and I cannot for the life of me figure out how to get it to print out on my webpage. right now I'm just sending it to localhost:3000, I'm not at the point where I'm putting it into the dropdown yet. How would I do this ?
I've found very very little on this issue online and thus have been mainly just trying hack fixes that haven't really worked (tacking on the resolve() method, all() method). both of those resulted in syntax errors. All Var names/SQL queries have been changed btw. My latest attempt is below:
//code that sends the names to the webpage
app.get('/formfetch', function(req, res) {
const data = async() => {
let rawDat = await dbFormFetch.getNames();
return rawDat;
}
}
const hNs = data();
hNs.then((names) => {
if (names === null) {
res.end("Error: Names list came through as null.");
} else if (names.length > 0) {
resolve(names);
for (var i = 0; i < names.length; i++) {
res.end(names[i]);
}
res.status('200');
}
})
.catch((err) => {
res.status('404').json(err)
console.log("conversion of promise failed")
})
});
//the getNames() method (in a different file)
async function getNames() {
console.log("trying to get Names");
let query = `select NAME from NAMESTAB`;
console.log("query: " + query);
const binds = {};
const result = await database.simpleExecute(query, binds);
var results = [];
console.log("for loop in formfetch.js: ");
for (var i = 0; i < result.rows.length; i++) {
results[i] = i + ": " + result.rows[i].NAME+ ' \n';
}
return results;
}
The res.send method from the app.get function prints out "Made it to the Web server:" on my localhost. I checked the console, and I didn't see anything hidden in the html or something like that.
**Note: all of the data that should be in the promise is in the code (I can print it to console at any point in the code), but when I put it on the website it won't print. **
so big surprise here, I was doing it all wrong. Lesson of the day: read up on Promises and how they work before running and gunning your way through some async code. It's not as intuitive as you would hope.
// I only had made changes to the first of the two methods.
app.get('/formfetch', function(req, res) {
async function data() {
let rawDat = await dbFormFetch.getNames();
return rawDat;
}
data().then((Names) => {
if (Names === undefined) {
res.end("Error: Names list came through as null.");
} else if (Names.length > 0) {
res.setHeader('Content-Type', 'application/json');
res.status(200).json({ "names": Names });
}
})
.catch((err) => {
res.status('404').send("name retrieval failed in server.js module")
console.log(err)
console.log("conversion of promise failed")
})
});
when you use res.end() , it sets the header status and renders it immutable after calling this method, so it was the wrong thing to use. Instead of this, I used the setHeader() method to tell the website what kind of information I'm sending it, and then filled in the content by chaining the .json() method to the status() response I sent. I've never worked with promises before and I'm fairly new to NodeJS so this was a bit of a learning curve, but hopefully this helps people who are where I was yesterday. if you're new to promises, see this article and this article before you try to use this coding tool. you'll save yourself hours of debugging and error tracing.

Express.js GET request not returning data on first call

I'm building an application using Node/Express/MongoDB (first time with all these) that will let me pull data from the DB and display it on an Express page. This is what the GET request looks like:
var str = "";
app.route('/view-reports').get(function(req, res) {
var cursor = collections.find({});
cursor.each(function(err, item) {
if (item != null) {
console.log(str);
str = str + "Substance: " + item.substance + "<br>";
}
if (err) {
console.log(err);
}
});
console.log(str);
res.send(str);
str = "";
});
I would expect this to return something like this:
Substance: a
Substance: b
Substance: c
However, the initial request does not return anything at all. The second request will return the above. If I enclose res.send(str) in an if conditional it simply will not load until a second request is made.
cursor.each() is asynchronous. That means it runs sometimes LATER, after your res.send(str), thus you get the previous version of str. You need to collect all the data first and then send your response only when you have all the data.
If you want all the data, then you could use promises and .toArray() like this:
app.route('/view-reports').get(function(req, res) {
collections.find({}).toArray().then(data => {
let result = data.map(item => {
return "Substance: " + item.substance + "<br>";
}).join("");
res.send(result);
}).catch(err => {
// database error
console.log(err);
res.sendStatus(500);
});
});
Note: This also wisely gets rid of the str variable which was outside the scope of the request and thus could easily lead to a concurrency bug when multiple requests were in flight at the same time (from different users).
Create a router specifically for substances and use it in app. Instead of breaks, you can create a ul, also, that processing should happen on the front end. Separate your concerns. The server shouldn't have to worry about any rendering and etc. One purpose per process.
The routers can be created per resource. Create a router for substances, for cats, for dogs. Each individual router has it's own get post delete and puts that allow you to modify that resource. app can use all the routers at once.
app.use(catRouter);
app.use(mooseRouter);
app.use(platypusRouter);
const { Router } = require('express');
const createError = require('http-errors');
let substanceRouter = new Router();
function buildElement(arr)
{
let start = '';
arr.forEach(val => {
if(!val) return;
start += `Substance : ${val}<br>`;
});
return start;
}
subtanceRouter.get('/endpoint/whatever', function(req, res, next) {
collectios.find({})
.then(results => {
if(!results) throw new Error('Resource Not Found');
let output = buildElement(results);
res.json(output);
next();
})
.catch(err => next(createError(404, err.message)));
})
app.use(substanceRouter);
Alternately we can write :
let output = results
.filter(sub => !!sub)
.join('<br>');
res.json(output);
But be advised this will add memory overhead, generates a completely new array to house the results, consuming at worst, O(n) memory.

Categories