Storing pdf text items into an array in javascript - javascript

I have read elsewhere (Reading PDF file using javascript) how to read the texts in a pdf file, and showing them in the console. This is done using the following code:
var PdfReader = require("pdfreader").PdfReader;
new PdfReader().parseFileItems("sample.pdf", function(err, item){
if (item && item.text)
console.log(item.text);
});
My question is, instead of showing the texts in the console using console.log, how do I store them in an array, for use at a later stage of the script?

Initialize an array above the parse function, then push the items to the array:
var PdfReader = require("pdfreader").PdfReader;
var arr = [];
new PdfReader().parseFileItems("sample.pdf", function(err, item){
if (item && item.text){
arr.push(item.text);
}
});
console.log(arr);

const { PdfReader } = require("pdfreader");
var arr = [];
new PdfReader().parseFileItems("test/sample.pdf", (err, item) => {
if (err) console.error("error:", err);
else if (!item) console.log(arr);
else if (item.text) arr.push(item.text);
});
I modified code from official example: https://github.com/adrienjoly/npm-pdfreader#raw-pdf-reading

Related

Replace different strings in file from an object in nodejs

I have a file where I'm trying to replace multiple strings with other strings in an object.
Currently I have the following:
fs.readFile('myfile.txt', 'utf8', function(err, data) {
let formatted
for (var key in obj){
let re = new RegExp('(?<=config.' + key + ' = ).*(?=)', 'g');
formatted = data.replace(re, `'${obj[key]}'`)
}
fs.writeFile('myfile.txt', formatted, 'utf8', function(err) {
if (err) return console.log(err);
})
})
This works however writeFile does overwrite the entire file each time so only one string ends up getting changed and saved at the end of the loop instead of having all of them. Is there a way where I can add all the changes in at once?
I have tried using replace and doing something like from other answers I've seen.
let regexStr = Object.keys(obj).join("|")
let re = new RegExp(`(?<=config.${regexStr}+[ ]=).*(?=)`, 'g')
let format = data.replace(re, match => obj[match]);
This doesn't seem to work unless I use regexStr and not re. However, I need that specific regex that is shown in re.
I've also tried
let result = data.replace(re, function (match, key, value){
obj[key] = value || key
})
But that just results in undefined.
Is there a way to tweak that replace to get it right? Or perhaps to read the file, loop through the whole thing, and write it all at once with the changes?
Try this, this will replace all the content from file and then write the update content back.
fs.readFile('myfile.txt', 'utf8', (readErr, data) => {
if(readErr) {
return console.log(readErr);
} else {
for (var key in obj){
let re = new RegExp(`(?<=config.${key} = ).*(?=)`, 'g');
data = data.replace(re, `'${obj[key]}'`);
}
fs.writeFile('myfile.txt', data, 'utf8', (writeErr) => {
if(writeErr) {
return console.log(writeErr);
} else {
console.log('File was writen');
}
});
}
});

Asynchronously write data to GCS inside of a Promise

I'm trying to find a way to write json data to a file in a Google Cloud Storage bucket, inside of a promise.
What I'm finding is that if I try and .push() the values to an array one by one and then return that, it only gives me the first 3 results from the array (whereas console.log will return everything).
And if I try and write something within the local scope, it only returns the last value from the array (and overwrites all the previous values rather than appending them).
So essentially my question is: is there any way to write a promise or similar that will wait for all the looped through values to be gathered up, and once that's done return those values to a function that will then upload it all to GCS?
Or is there a way in which I can write these values to the .json file in GCS asynchronously, at the same time as the data is being scraped?
const urls = [/* 20+ URLs go here... */];
let promises = [];
// Build array of Promises
urls.map(function(url) {
promises.push(axios.get(url));
});
// Map through the array of promises and get the response results
axios.all(promises).then((results) => {
results.map((res) => {
try {
// Scrape the data
const $ = new JSDOM(res.data);
const data = {};
data.title = ($.window.document.querySelector('head > title') !== null ? $.window.document.querySelector('head > title').text : '');
data.description = ($.window.document.querySelector("meta[name='description']") !== null ? $.window.document.querySelector('meta[name="description"]').content : '');
data.robots = ($.window.document.querySelector("meta[name='robots']") !== null ? $.window.document.querySelector("meta[name='robots']").content : '');
const value = JSON.stringify(data) + '\n';
// Tried array.push(value) here but doesn't return all the values?
// Any way to return all the values and then bulk upload them to GCS outside of this code block?
const file = storage.bucket(bucketName).file(filename);
file.save(value, function(err) {
if (!err) {
// file written
}
})
} catch(e) {
console.log(e);
}
})
})
Sorry for the poor explanation, essentially I can't push all the values to an array and then upload that, and if I try to upload the values one by one I only get the last value in the looped through array.
Note: I'm not trying to save the data to a .json file locally with fs.writeFile() and then upload to GCS but send the JSON data directly to GCS without the step in between.
if i correctly understood what do you need it should work
axios.all(promises).then((results) => {
const uploads = results.map((res) => {
try {
// Scrape the data
const $ = new JSDOM(res.data);
const data = {};
data.title = ($.window.document.querySelector('head > title') !== null ? $.window.document.querySelector('head > title').text : '');
data.description = ($.window.document.querySelector("meta[name='description']") !== null ? $.window.document.querySelector('meta[name="description"]').content : '');
data.robots = ($.window.document.querySelector("meta[name='robots']") !== null ? $.window.document.querySelector("meta[name='robots']").content : '');
const value = JSON.stringify(data) + '\n';
return new Promise((resolve, reject) => {
const file = storage.bucket(bucketName).file(filename);
file.save(value, function(err) {
if (!err) {
resolve()
}
reject()
})
});
} catch(e) {
console.log(e);
}
})
return Promise.all(uploads);
})

JavaScript 'undefined' when assigning var to item in string array

I'm building a website using Airtable API to scrape data from an Airtable spreadsheet using JS.
I'm running into a fairly simple(?) problem using JavaScript.
I'm putting data from the spreadsheet into a string array names. console.log(names[0]) prints out the string value correctly, but when I try to do something like:
var test = names[0];
console.log(test),
It prints out undefined
Why does this happen?
My Code:
var names = [];
var locations = [];
//ACCESS AIRTABLE API (GET DATA FROM AIRTABLE SPREADSHEET)
var Airtable = require('airtable');
var base = new Airtable({apiKey: 'keyQ7YUX3SUECVL4C'}).base('appSmUKDnFdEAT1YF');
base('Members').select({
view: "Grid view"
}).eachPage(function page(records, fetchNextPage) {
//Fill arrays with data from spreadsheet
records.forEach(function(record) {
names.push(record.get('parsedName'));
locations.push(record.get('parsedLocation'));
});
fetchNextPage();
}, function done(err) {
if (err) { console.error(err); return; }
});

Convert SSID list to JSON/Array

I'm trying to make it possible to choose different SSID's to switch the Wlan you are connected to from Browser.
var sys = require('sys');
var exec = require('child_process').exec;
app.get(prefix + '/wlan', function(req, res){
child = exec("iwlist wlan0 scan | grep ESSID", function(error, stdout, stderr){
if(error !== null){
console.log('Exec error ' + error);
}
else {
res.send(stdout);
}
});
});
This is my code so far to get a SSID list..
The Output is like that:
ESSID:"WLAN-GUEST" ESSID:"WLAN1" ESSID:"WLAN-GUEST" ESSID:"WLAN1" ESSID:"WLAN2"
I have no idea why two ESSID's are listed twice but my main question is, how can I parse this to JSON or how can I access each entry like an array (wlanlist[0])?
Edit:
I tried to stdout.replace(" ",", "); and JSON.parse but as it's async it's sent without changes. (Not sure if that would work as sync)
Edit2: Trying to access the data like that:
$(document).ready(function() {
$.get(prefix + '/wlan', function(wlanlist){
document.getElementById("wlanoptions").options[0] = new Option("Select your WLAN:","");
document.getElementById("wlanoptions").options[1] = new Option(wlanlist[0],wlanlist[0])
});
});
Final Result:
var wlanlistarray = stdout.split("ESSID:");
res.send(wlanlistarray);
In addition:
//extract ssid and remove quotes
var wlanlist = new Array;
var step1 = stdout.split("ESSID:");
for(i = 1; i < step1.length; i++){
var arr = new Array;
arr = step1[i].split('"');
//if exists in array -> continue; else create new entry in wlanlist
if(wlanlist.indexOf(arr[1]) === -1){wlanlist.push(arr[1]);}
else{continue;}
}
res.send(wlanlist);
This should return an array of SSIDs:
stdout.split("ESSID:")
Now clean up the " and you are all done.

Node JS Loop Through Array Before Creating Property

I have a JSON input which contains data linking it to a secondary model (Users). I need to loop through listingData.Agents to get the index ID and then look up this index id to get the user. I push this to the user id to an array but due to the async the array is blank when the create property function is run. How you manipulate and get data from the array and then run the create once all your data is in place.
Thanks.
exports.createProperty = function(req,res,next) {
var listingData = req.body;
listingData.User = [];
_.forEach( listingData.Agents , function(n, key) {
User.findOne({ agentId : n.AgentId},function(err,user) {
listingData.User.push(user._id);
});
});
Property.create(listingData, function(err,property) {
if (err) {
res.status(400);
return res.send({reason:err.toString()});
}
res.send(req.property);
})}
If you don't mind introducing new library into your code, node-async could solve your problem.
Using node-async, you code would be:
var async = require('node-async')
exports.createProperty = function(req,res,next) {
var listingData = req.body;
listingData.User = [];
async.each(listingData.User,
function(n, key) {
User.findOne({ agentId : n.AgentId},function(err,user) {
listingData.User.push(user._id);
});
},
function (asyncErr){
//handle asyncErr first
Property.create(listingData, function(err,property) {
if (err) {
res.status(400);
return res.send({reason:err.toString()});
}
res.send(req.property);
});
});

Categories