I have the following implementation where everything works but this line:
lineNumber: line.lineNumber
this line returns undefined, I am adding a full fragment of code below, my question is: Does Readline provide a standard way to get the line number somehow? Or I have to implement my counter to keep track of the line number, which would be simple but I don't want if there's a standard way of doing it?
/**
* Search for occurrences of the specified pattern in the received list of files.
* #param filesToSearch - the list of files to search for the pattern
* #returns {Promise} - resolves with the information about the encountered matches for the pattern specified.
*/
const findPattern = (filesToSearch) => {
console.log(filesToSearch);
return new Promise((resolve, reject) => {
var results = [ ];
// iterate over the files
for(let theFile of filesToSearch){
let dataStream = fs.createReadStream(theFile);
let lineReader = readLine.createInterface({
input: dataStream
});
let count = 0; // this would do the trick but I'd rather use standard approach if there's one
// iterates over each line of the current file
lineReader.on('line',(line) => {
count++;
if(line.indexOf(searchPattern) > 0) {
let currLine = line.toString();
currLine = currLine.replace(/{/g, '');//cleanup { from string if present
results.push({
fileName: theFile,
value: currLine,
lineNumber: line.lineNumber //HERE: this results undefined
//lineNumber: count // this does the trick but I'd rather use standard approach.
});
}
});
// resolve the promise once the file scan is finished.
lineReader.on('close', () => resolve(results));
}
});
};
Unfortunately there isn't a way to find the line number using the readline node module, however, using ES6 it isn't difficult to roll your own counter in one line of code.
const line_counter = ((i = 0) => () => ++i)();
When we create the callback function we simply default the second parameter to the line_counter function, and we can act as though the line and the line number are both being passed when a line event occurs.
rl.on("line", (line, lineno = line_counter()) => {
console.log(lineno); //1...2...3...10...100...etc
});
Simply, using a variable increment together with foo(data, ++i) it will always pass the number of the new line to the function.
let i = 0
const stream = fs.createReadStream(yourFileName)
stream.pipe().on("data", (data) => foo(data, ++i))
const foo = (data, line) => {
consle.log("Data: ", data)
consle.log("Line number:", line)
}
You need to include the lineno param if you are using node linereader
lineReader.on('line', function (lineno, line) {
if(line.indexOf(searchPattern) > 0) {
let currLine = line.toString();
currLine = currLine.replace(/{/g, '');//cleanup { from string if present
results.push({
fileName: theFile,
value: currLine,
lineNumber: lineno
});
}
});
Related
I'm trying to read through a file using a stream. The file is a custom file format that has some number of sections, each being 66 bytes. I have to check the value of the the first 32 bytes against a given 32 bytes, and if they match return the remaining 34 bytes. Currently I'm doing the following
const fs = require('fs')
/**
* #param {string} known
*/
function check(known) {
let stream = fs.createReadStream('path/to/file', {highWaterMark: 66});
let out = '';
stream.on('data', (chunk) => {
let temp = '';
for (let i = 0; i < 32; i++) {
temp += String.fromCharCode(chunk.at(i));
}
if (temp === known) {
for (let i = 32; i < 66; i++) {
out += String.fromCharCode(chunk.at(i));
}
}
});
return out;
}
However (from checking with console.log) I know that the function in stream.on is run after check "finishes" so regardless of if the given string is found, the returned value will always be the same. How fix? Thanks in advance
Streams are non-blocking and asynchronous. They call their event handlers some time in the future, after your check() function has returned. That is how they work.
As such, you cannot directly return your result from the function because the function returns before the result is known. Instead, you have to return the result using an asynchronous mechanism such as an event, a callback or a promise (I would prefer a promise in this case). see How to return the response from an asynchronous call for more detail on returning your asynchronously retrieved value.
But, using a stream to read a file that is structured in 66 byte chunks is the hard way to write this code because data blocks are going to be randomly sized and are not guaranteed to line up with your 66 byte blocks. It would be simpler to use const fileHandle = await fs.promises.open(...) and fileHandle.read(...) to read a 66 byte chunk from the beginning of the file, check the 32 bytes you want to check and then communicate back the remaining 34 bytes using a promise.
Here's one such way to write it using handle.read() instead:
const fsp = require('fs').promises;
async function check(known) {
const kReadLen = 66;
const kHeaderLen = 32;
let handle = await fsp.open('path/to/file');
try {
let buffer = Buffer.alloc(kReadLen, 0);
let { bytesRead } = await handle.read(buffer, 0, kReadLen, 0);
if (bytesRead !== readLen) {
throw new Error("Insufficient data in the file");
}
const headerStr = buffer.toString('utf8', 0, kHeaderLen);
if (headerStr === known) {
return buffer.toString('utf8', kHeaderLen, kReadLen);
} else {
return "";
}
} finally {
handle.close();
}
}
Sample Usage:
check(testKnown).then(result => {
console.log(result);
}).catch(err => {
console.log(err);
})
consider this scenario:
I have 2 csv files, each one is sorted and contains the id filed.
I need to join the rows using the id field. Because the files are already sorted by the id I wanted to perform merge join (https://en.wikipedia.org/wiki/Sort-merge_join).
For that I need to have a way to load some portion of both files, process it and iteratively load more again from one or both files.
(The files are big and would not fit into memory so only streaming approach will work).
The problem is the Node API, what to use? readline will not work because of https://github.com/nodejs/node/issues/33463. Any other ideas?
I had to do something quite similar recently and decided to use the node-line-reader module that has a simpler interface than the built-in readline. I then created a little recursive function that determines which file to read from next by comparing the id of each csv-entry of each provided file. After that the corresponding line gets written out to the target file, and the method is called again until all lines of all files are processed. Here's the whole class I ended up with:
const fs = require('fs');
const LineReader = require('node-line-reader').LineReader;
class OrderedCsvFileMerger {
constructor(files, targetFile) {
this.lineBuffer = [];
this.initReaders(files);
this.initWriter(targetFile);
}
initReaders(files) {
this.readers = files.map(file => new LineReader(file));
}
initWriter(targetFile) {
this.writer = fs.createWriteStream(targetFile);
}
async mergeFiles() {
// initially read first line from all files
for (const reader of this.readers) {
this.lineBuffer.push(await this.nextLine(reader));
}
return this.merge();
}
async nextLine(reader) {
return new Promise((resolve, reject) => {
reader.nextLine(function (err, line) {
if (err) reject(err);
resolve(line);
});
})
}
async merge() {
if (this.allLinesProcessed()) {
return;
}
let currentBufferIndex = -1;
let minRowId = Number.MAX_VALUE;
for (let i = 0; i < this.lineBuffer.length; i++) {
const currentRowId = parseInt(this.lineBuffer[i]); // implement parsing logic if your lines do not start
// with an integer id
if (currentRowId < minRowId) {
minRowId = currentRowId;
currentBufferIndex = i;
}
}
const line = this.lineBuffer[currentBufferIndex];
this.writer.write(line + "\n");
this.lineBuffer[currentBufferIndex] = await this.nextLine(this.readers[currentBufferIndex]);
return this.merge();
}
allLinesProcessed() {
return this.lineBuffer.every(l => !l);
}
}
(async () => {
const input = ['./path/to/csv1.csv', './path/to/csv2.csv'];
const target = './path/to/target.csv';
const merger = new OrderedCsvFileMerger(files, output);
await merger.mergeFiles();
console.log("Files were merged successfully!")
})().catch(err => {
console.log(err);
});
I'm banging my head around async promises recursion. I have bunch of promises that resolve when async data is download (combined by Promise.all). But sometimes in the data that I just download there is link to another data, that must be download (recursion). The best explanation is showing code I guess. Comments are in code.
(I have tried various combinations to no avail.)
var urls = ['http://czyprzy.vdl.pl/file1.txt', 'http://czyprzy.vdl.pl/file2.txt', 'http://czyprzy.vdl.pl/file3.txt'];
var urlsPromise = [];
var secondPart = [];
var thirdPart = [];
function urlContent(url, number) {
return new Promise(function (resolve) {
var dl = request(url, function (err, resp, content) {
if (err || resp.statusCode >= 400) {
return resolve({number : number, url : url, error : 'err'});
}
if (!err && resp.statusCode == 200) {
if (content.indexOf('file') !== -1) // if there is 'file' inside content we need (would like to :) download this new file by recursion
{
content = content.slice(content.indexOf('file') + 4);
content =+ content; // (number to pass later on, so we know what file we are working on)
url = 'http://czyprzy.vdl.pl/file' + content + '.txt'; // (we build new address)
//urlsPromise.push(urlContent(url, content)); // this will perform AFTER Promise.all(urlsPromise) so we simply can't do recurention (like that) here
secondPart.push(urlContent(url, content)); // if we use another promise array that put resolved items to that array everything will work just fine - but only till first time, then we would need to add another (thirdPart) array and use another Promise.all(thirdPart)... and so on and so on... --- the problem is I don't know how many files there will be, so it means I have no idea how many 'parts' for Promise.all I need to create, some kind of asynchronous loop/recursion would save me here, but I don't know how to do that properly so the code can run in proper order
}
return resolve({number : number, url : url}); // this goes to 'urlsPromise' array
}
});
});
}
if (urls.length !== 0) {
for (var i = 0; i < urls.length; i++)
{urlsPromise.push(urlContent(urls[i], i + 1));}
}
Promise.all(urlsPromise).then(function(urlsPromise) {
console.log('=======================================');
console.log('urlsPromise:\n');
console.log(urlsPromise); // some code/calculations here
}).then(function() {
return Promise.all(secondPart).then(function(secondPart) {
console.log('=======================================');
console.log('secondPart:\n');
console.log(secondPart); // some code/calculations here
secondPart.forEach(function(item)
{
thirdPart.push(urlContent(item.url, item.number + 3));
});
});
}).then(function() {
return Promise.all(thirdPart).then(function(thirdPart) {
console.log('=======================================');
console.log('thirdPart:\n');
console.log(thirdPart); // some code/calculations here
});
}).then(function()
{
console.log();
console.log('and so on and so on...');
});
//// files LINKING (those files do exist on live server - just for testing purposes):
// file1->file4->file7->file10 /-/ file1 content: file4 /-/ file4 content: file7 /-/ file7 content: file10
// file2->file5->file8->file11 /-/ file2 content: file5 /-/ file5 content: file8 /-/ file8 content: file11
// file3->file6->file9->file12 /-/ file3 content: file6 /-/ file6 content: file9 /-/ file9 content: file12
//// the console.log output looks like this:
// =======================================
// urlsPromise:
// [ { number: 1, url: 'http://czyprzy.vdl.pl/file4.txt' },
// { number: 2, url: 'http://czyprzy.vdl.pl/file5.txt' },
// { number: 3, url: 'http://czyprzy.vdl.pl/file6.txt' } ]
// =======================================
// secondPart:
// [ { number: 4, url: 'http://czyprzy.vdl.pl/file7.txt' },
// { number: 5, url: 'http://czyprzy.vdl.pl/file8.txt' },
// { number: 6, url: 'http://czyprzy.vdl.pl/file9.txt' } ]
// =======================================
// thirdPart:
// [ { number: 7, url: 'http://czyprzy.vdl.pl/file10.txt' },
// { number: 8, url: 'http://czyprzy.vdl.pl/file11.txt' },
// { number: 9, url: 'http://czyprzy.vdl.pl/file12.txt' } ]
// and so on and so on...
The await keyword can massively simplify this. You won't need to use a self recursive function. This demo fakes the server call with a randomly sized array.
https://jsfiddle.net/mvwahq19/1/
// setup: create a list witha random number of options.
var sourceList = [];
var numItems = 10 + Math.floor(Math.random() * 20);
for (var i = 0; i < numItems; i++)
{
sourceList.push(i);
}
sourceList.push(100);
var currentIndex = 0;
// a function which returns a promise. Imagine it is asking a server.
function getNextItem() {
var item = sourceList[currentIndex];
currentIndex++;
return new Promise(function(resolve) {
setTimeout(function() {
resolve(item);
}, 100);
});
}
async function poll() {
var collection = [];
var done = false;
while(!done) {
var item = await getNextItem();
collection.push(item);
console.log("Got another item", item);
if (item >= 100) {
done = true;
}
}
console.log("Got all items", collection);
}
poll();
You can write a normal for loop except the contents use await.
This answer was provided thanks to trincot - https://stackoverflow.com/users/5459839/trincot
When I asked him this question directly, he support me with time and knowledge and give this excellent answer.
CODE:
//// files LINKING (those files do exist on live server - just for testing purposes):
// file1->file4(AND file101)->file7->file10 /-/ file1 content: file4 /-/ file4 content: file7 /-/ file7 content: file10 /-/ file10 content: EMPTY /-/ file101 content: EMPTY
// file2->file5(AND file102)->file8->file11 /-/ file2 content: file5 /-/ file5 content: file8 /-/ file8 content: file11 /-/ file11 content: EMPTY /-/ file102 content: EMPTY
// file3->file6(AND file103)->file9->file12 /-/ file3 content: file6 /-/ file6 content: file9 /-/ file9 content: file12 /-/ file12 content: EMPTY /-/ file103 content: EMPTY
var urls = ['http://czyprzy.vdl.pl/file1.txt', 'http://czyprzy.vdl.pl/file2.txt', 'http://czyprzy.vdl.pl/file3.txt'];
var urlsPromise = [];
function requestPromise(url) {
return new Promise(function(resolve, reject) {
request(url, function (err, resp, content) {
if (err || resp.statusCode != 200) reject(err || resp.statusCode);
else resolve(content);
});
});
}
async function urlContent(url, number) {
var arr = [];
let content = await requestPromise(url);
while (content.indexOf(';') !== -1)
{
var semiColon = content.indexOf(';');
var fileLink = content.slice(content.indexOf('file'), semiColon + 1);
content = content.replace(fileLink, ''); // we need to remove the file link so we won't iterate over it again, we will add to the array only new links
var fileLinkNumber = fileLink.replace('file', '');
fileLinkNumber = fileLinkNumber.replace(';', '');
fileLinkNumber =+ fileLinkNumber;
url = 'http://czyprzy.vdl.pl/file' + fileLinkNumber + '.txt'; // we build new address
arr.push({url, fileLinkNumber});
}
if (content.indexOf('file') !== -1)
{
var fileLinkNumber = content.slice(content.indexOf('file') + 4);
fileLinkNumber =+ fileLinkNumber;
url = 'http://czyprzy.vdl.pl/file' + fileLinkNumber + '.txt';
arr.push({url, fileLinkNumber});
}
var newArr = arr.map(function(item)
{
return urlContent(item.url, item.fileLinkNumber); // return IS important here
});
return [].concat(arr, ...await Promise.all(newArr));
}
async function doing() {
let urlsPromise = [];
for (let i = 0; i < urls.length; i++) {
urlsPromise.push(urlContent(urls[i], i + 1));
}
let results = [].concat(...await Promise.all(urlsPromise)); // flatten the array of arrays
console.log(results);
}
//// this is only to show Promise.all chaining - so you can do async loop, and then wait for some another async data - in proper chain.
var test_a = ['http://czyprzy.vdl.pl/css/1.css', 'http://czyprzy.vdl.pl/css/2.css', 'http://czyprzy.vdl.pl/css/cssa/1a.css', 'http://czyprzy.vdl.pl/css/cssa/2a.css'];
var promisesTest_a = [];
function requestStyle(url)
{
return new Promise(function(resolve, reject)
{
request(url, function(error, response, content)
{
if (response.statusCode === 200 && !error)
{resolve(content);}
else
{reject(error);}
});
});
}
for (var i = 0; i < test_a.length; i++)
{promisesTest_a.push(requestStyle(test_a[i]));}
Promise.all(promisesTest_a).then(function(promisesTest_a)
{
console.log(promisesTest_a);
}).then(function()
{
console.log('\nNow we start with #imports...\n');
}).then(function()
{
return doing();
}).then(function()
{
console.log('ALL DONE!');
});
COMMENT:
At first the explanation what is [...] - destructured rest parameters (just in case if you don't know it).
var arr = [];
var array1 = ['one', 'two', 'three']
var array2 = [['four', 'five', ['six', 'seven']], 'eight', 'nine', 'ten'];
arr = array1.concat(array2);
console.log(arr); // it does not flattern the array - it just concatenate them (join them together)
console.log('---');
// however
arr = array1.concat(...array2);
console.log(arr); // notice the [...] - as you can see it flatern the array - 'four' and 'five' are pull out of an array - think of it as level up :) remember that it pull up WHOLE array that is deeper - so 'six' and 'seven' are now 1 level deep (up from 2 levels deep, but still in another array).
console.log('---');
// so
arr = [].concat(...arr);
console.log(arr); // hurrrray our array is flat (single array without nested elements)
console.log();
All files (links) that are ready to be download (those 3 starting ones in a urls array) are downloaded almost immediately (synchronous loop over array that contain them - one after the other, but very fast, right away cause we simply iterate over them in synchronous way).
Then, when we have their contents (cause we Await till content is downloaded - so we got a resolved promise data here) we start to look for info about other possible urls (files) related to the one we already got, to download them (via async recursion).
When we found all the info about possible additional urls/files (presented in an array of regexs - matches), we push it to data array (named arr in our code) and download them (thanks to the mutation of url).
We download them by return the async urlContent function that need to Await for requestPromise promise (so we have the resolve/rejected data in urlContent so if needed we can mutate it - build proper url to get the next file/content).
And so on, so on, till we "iterate" (download) over all files. Every time the urlContent is called, it return an array of promises (promises variable) that initially are pending. When we await Promise.all(promises) the execution only resumes at that spot when ALL those promises have been resolved. And so, at that moment, we have the values for each of these promises. Each of these is an array. We use one big concat to nit all those arrays together into one big array, also including the elements of arr (we need to remmeber that it can be more then 1 file to download from file we have already download - that is why we store values in data array - named arr in code - which store promiseReques function resolved/rejected values). This "big" array is the value, with which a promise is resolved. Recall that this promise is the one that was returned already by this current function context, at the time the first await was executed.
This is important part - so it (urlContent) returns (await) a single promise and that (returned) promise is resolved with an array as value. Note that an async function returns the promise to the caller immediately, when the first await is encountered. The return statement in an async function determines what the value is with which that returned promise is resolved. In our case that is an array.
So urlContent at every call return an promise - resolved value in a array - [...] (destructured rest parameters - returns a promise that eventually resolves to an array), that is collected by our async doing function (cause 3 urls was fired at start - every one has it own urlContent function... path), that collect (Await!) all those arrays from Promise.all(urlsPromise), and when they are resolved (we await for them to be resolved and passed by Promise.all) it 'return' your data (results variable). To be precise, doing returns a promise (because it is async). But the way that we call doing, we show we are not interested in what this promise resolves to, and in fact, since doing does not have a return statement, that promise resolves to UNDEFINED (!). Anyway, we don't use it - we merely output the results to the console.
One thing that can be confusing with async functions is that the return statement is not executed when the function returns (what is in a name, right!? ;). The function has already returned when it executed the first await. When eventually it executes the return statement, it does not really return a value, but it resolves "its own" promise; the one it had returned earlier. If we would really want to separate output from logic, we should not do console.log(results) there, but do return results, and then, where we call doing, we could do doing.then(console.log); Now we do use the promise returned by doing!
I would reserve the verb "to return" for what the caller of a function gets back from it synchronously.
I would use "to resolve" for the action that sets a promise to a resolved state with a value, a value that can be accessed with await or .then().
I'm just starting to play around with Puppeteer (Headless Chrome) and Nodejs. I'm scraping some test sites, and things work great when all the values are present, but if the value is missing I get an error like:
Cannot read property 'src' of null (so in the code below, the first two passes might have all values, but the third pass, there is no picture, so it just errors out).
Before I was using if(!picture) continue; but I think it's not working now because of the for loop.
Any help would be greatly appreciated, thanks!
for (let i = 1; i <= 3; i++) {
//...Getting to correct page and scraping it three times
const result = await page.evaluate(() => {
let title = document.querySelector('h1').innerText;
let article = document.querySelector('.c-entry-content').innerText;
let picture = document.querySelector('.c-picture img').src;
if (!document.querySelector('.c-picture img').src) {
let picture = 'No Link'; } //throws error
let source = "The Verge";
let categories = "Tech";
if (!picture)
continue; //throws error
return {
title,
article,
picture,
source,
categories
}
});
}
let picture = document.querySelector('.c-picture img').src;
if (!document.querySelector('.c-picture img').src) {
let picture = 'No Link'; } //throws error
If there is no picture, then document.querySelector() returns null, which does not have a src property. You need to check that your query found an element before trying to read the src property.
Moving the null-check to the top of the function has the added benefit of saving unnecessary calculations when you are just going to bail out anyway.
async function scrape3() {
// ...
for (let i = 1; i <= 3; i++) {
//...Getting to correct page and scraping it three times
const result = await page.evaluate(() => {
const pictureElement = document.querySelector('.c-picture img');
if (!pictureElement) return null;
const picture = pictureElement.src;
const title = document.querySelector('h1').innerText;
const article = document.querySelector('.c-entry-content').innerText;
const source = "The Verge";
const categories = "Tech";
return {
title,
article,
picture,
source,
categories
}
});
if (!result) continue;
// ... do stuff with result
}
Answering comment question: "Is there a way just to skip anything blank, and return the rest?"
Yes. You just need to check the existence of each element that could be missing before trying to read a property off of it. In this case we can omit the early return since you're always interested in all the results.
async function scrape3() {
// ...
for (let i = 1; i <= 3; i++) {
const result = await page.evaluate(() => {
const img = document.querySelector('.c-picture img');
const h1 = document.querySelector('h1');
const content = document.querySelector('.c-entry-content');
const picture = img ? img.src : '';
const title = h1 ? h1.innerText : '';
const article = content ? content.innerText : '';
const source = "The Verge";
const categories = "Tech";
return {
title,
article,
picture,
source,
categories
}
});
// ...
}
}
Further thoughts
Since I'm still on this question, let me take this one step further, and refactor it a bit with some higher level techniques you might be interested in. Not sure if this is exactly what you are after, but it should give you some ideas about writing more maintainable code.
// Generic reusable helper to return an object property
// if object exists and has property, else a default value
//
// This is a curried function accepting one argument at a
// time and capturing each parameter in a closure.
//
const maybeGetProp = default => key => object =>
(object && object.hasOwnProperty(key)) ? object.key : default
// Pass in empty string as the default value
//
const getPropOrEmptyString = maybeGetProp('')
// Apply the second parameter, the property name, making 2
// slightly different functions which have a default value
// and a property name pre-loaded. Both functions only need
// an object passed in to return either the property if it
// exists or an empty string.
//
const maybeText = getPropOrEmptyString('innerText')
const maybeSrc = getPropOrEmptyString('src')
async function scrape3() {
// ...
// The _ parameter name is acknowledging that we expect a
// an argument passed in but saying we plan to ignore it.
//
const evaluate = _ => page.evaluate(() => {
// Attempt to retrieve the desired elements
//
const img = document.querySelector('.c-picture img');
const h1 = document.querySelector('h1')
const content = document.querySelector('.c-entry-content')
// Return the results, with empty string in
// place of any missing properties.
//
return {
title: maybeText(h1),
article: maybeText(article),
picture: maybeSrc(img),
source: 'The Verge',
categories: 'Tech'
}
}))
// Start with an empty array of length 3
//
const evaluations = Array(3).fill()
// Then map over that array ignoring the undefined
// input and return a promise for a page evaluation
//
.map(evaluate)
// All 3 scrapes are occuring concurrently. We'll
// wait for all of them to finish.
//
const results = await Promise.all(evaluations)
// Now we have an array of results, so we can
// continue using array methods to iterate over them
// or otherwise manipulate or transform them
//
results
.filter(result => result.title && result.picture)
.forEach(result => {
//
// Do something with each result
//
})
}
Try-catch worked for me:
try {
if (await page.$eval('element')!==null) {
const name = await page.$eval('element')
}
}catch(error){
name = ''
}
I have a file with 65,000,000 lines, that is about 2gb in size.
I want to read this file in N lines at a time, perform a db insert operation, and then read the next N, with N being, say, 1000 in this case. Insert order doesn't matter, so synchronous is fine.
What's the best way of doing this? I've only found was to either load in 1 line at a time, or methods that read the whole file into memory. Sample code below, that I've been using to read the file one line at a time. :
var singleFileParser = (file, insertIntoDB) => {
var lr = new LineByLineReader(file);
lr.on('error', function(err) {
// 'err' contains error object
console.error(err);
console.error("Error reading file!");
});
lr.on('line', function(line) {
insertIntoDB(line);
// 'line' contains the current line without the trailing newline character.
});
lr.on('end', function() {
// All lines are read, file is closed now.
});
};
Lines can only be parsed one at a time by someone. So, if you want 10 at once, then you just collect them one at a time until you have collected 10 and then process the 10.
I did not think Jarek's code quite worked right so here's a different version that collects 10 lines into an array and then calls dbInsert():
var tenLines = [];
lr.on('line', function(line) {
tenLines.push(line);
if (tenLines.length === 10) {
lr.pause();
dbInsert(<yourSQL>, function(error, returnVal){
if (error) {
// some sort of error handling here
}
tenLines = [];
lr.resume();
});
}
});
// process last set of lines in the tenLines buffer (if any)
lr.on('end', function() {
if (tenLines.length !== 0) {
// process last set of lines
dbInsert(...);
}
});
Jarek's version seems to call dbInsert() on every line event rather than only every 10th line event and did not process any left over lines at the end of the file if they weren't a perfect multiple of 10 lines long.
Something like this should do
var cnt = 0;
var tenLines = [];
lr.on('line', function(line) {
tenLines.push(line);
if (++cnt >= 10) {
lr.pause();
// prepare your SQL statements from tenLines
dbInsert(<yourSQL>, function(error, returnVal){
cnt = 0;
tenLines = [];
lr.resume();
});
}
});
This is my solution inside an async function:
let multipleLines = [];
const filepath = '<file>';
const numberLines = 50;
const lineReader = require('readline').createInterface({
input: require('fs').createReadStream(filepath)
});
// process lines by numberLines
for await (const line of lineReader) {
multipleLines.push(line);
if (multipleLines.length === numberLines) {
await dbInsert();
multipleLines = [];
}
}
// process last set of lines (if any)
if (multipleLines.length !== 0) {
await dbInsert();
}
This is my solution using built modules rather then NPM's line-by-line package
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream(fileToRead)
});
lines = []
lineReader.on('line', function (line) {
lines.push(line.split(","));
if (lines.length == 10) {
makeCall()
lines = []
}
});
lineReader.on('close', function () {
if (lines.length !== 0) {
makeCall()
lines = []
}
});
function makeCall() {
//Do something with lines
}