Read pdf page one at a time - Pdf.js - javascript

I am trying to parse a pdf with more than 300 page. I am using pdf-parse npm package.
The pdf has 300 pages. But my application crashes to while parsing the pdf.
My question is that is there way by which i can parse one page at a time?
Below is the code I have tried.
function render_page(pageData) {
//check documents https://mozilla.github.io/pdf.js/
let render_options = {
//replaces all occurrences of whitespace with standard spaces (0x20). The default value is `false`.
normalizeWhitespace: false,
//do not attempt to combine same line TextItem's. The default value is `false`.
disableCombineTextItems: false
}
return pageData.getTextContent(render_options)
.then(function (textContent) {
return textContent.items.map(function (s) {
return s.str
}).join(''); // value page text
})
}
//textContent.items.map
//.map(function (s) { return s.str; }).join('{newline}'); // value page text
let dataBuffer = fs.readFileSync('male.pdf');
const options = {
// internal page parser callback
// you can set this option, if you need another format except raw text
pagerender: render_page,
// max page number to parse
max: 4,
//check https://mozilla.github.io/pdf.js/getting_started/
version: 'v1.10.100'
}
pdf(dataBuffer, options).then(function (data) {
res.send(data)
})

Related

Set cookies using js-cookies to array

newbie here regarding Javascript. I am following this thread to set cookies to array by clicking button. Product compare session. Its working but the problem is, when i reload or open new page, when i click the button on new page or refreshed page, the cookies doesn't add new value, it replace all cookies which has been set from previous page. Here is the script.
`
cookie_data_load = Cookies.get('compare_data');
$('.view__compare').attr("href", "https://shop.local/compare/?id=" + cookie_data_load);
var fieldArray = [];
$( ".product__actions-item--compare" ).click(function(){
fieldArray.push($(this).data("compare"));
var unique=fieldArray.filter(function(itm,i){
return i==fieldArray.indexOf(itm);
});
var str = unique.join('-');
Cookies.set('compare_data', str, { expires: 7, path: '/' });
cookie_data = Cookies.get('compare_data');
console.log(str);
console.log(unique);
alert(unique);
$('.view__compare').attr("href", "https://shop.local/compare/?id=" + cookie_data);
return false;
});
`
And second question is how to limit the number of cookies value (array) from above code? Many thanks
I have read the js-cookies github but cant understand single thing.
*** Updated code from https://stackoverflow.com/users/8422082/uladzimir
`
var fieldArray = (Cookies.get('compare_data') || '').split('-');
$(".product__actions-item--compare").click(function () { if
(fieldArray.length >= 3) {
alert("unfortunately limit exceeded :("); } else {
fieldArray.push($(this).data("compare"));
var unique = fieldArray.filter(function (itm, i) {
return i == fieldArray.indexOf(itm);
});
var str = unique.join('-');
Cookies.set("compare_data", str, { expires: 7, path: "/" });
cookie_data = Cookies.get("compare_data");
console.log(str);
console.log(unique);
alert(unique);
$(".view__compare").attr(
"href",
"https://shop.local/compare/?id=" + cookie_data
);
return false; } });
`
Ivan, whenever you reload a page, the array of data "fieldArray" is ALWAYS empty (despite there is data in "compare_data" cookie from previous browser session)
What you have to do is to initialize "fieldArray" with it's initial value taken from cookie:
var fieldArray = (Cookies.get('compare_data') || '').split('-')
Cookie stores string data with maximum size of 4kb. More over, cookie have no idea, if it stores serialized array, object, or anything else... It just keeps a string of text and that's it. So (as far as I know), there is no way to limit array length using cookie settings.
So, the only workaround here is to do this length-check programmatically, like following:
$('.product__actions-item--compare').click(function () {
if (fieldArray.length >= 3) {
alert('unfortunately limit exceeded :(');
} else {
// do your actions
}
});

copy a file/object and change the name

I'm doing an API for a gallery; so, I create a method that let copy an image from the database.
Now, I want to add a number at the end of the copy-image name. For example:
-original image name: image
-copy image name: image(1)
-2nd copy image name: image(2)
How can I add the number to the name of copied name automatically?
'use strict'
let imageObject= require('../models/image-object');
let fs=require('fs');
let path= require('path');
let gallery_controllers={
copyImage:function(req,res){
//get the id param of the image to copy
let imageId=req.params.id;
if(imageId==null) return res.status(404).send({message:"no ID defined"});
//I search the requiere image on the database
imageObject.findById(imageId,(err,image)=>{
if(err) return res.status(500).send({message:'err to response data'});
if(!image) return res.status(404).send({message:'image not found'});
if(image){
//set a new model-object
let imageCopied= new imageObject();
imageCopied.name= image.name;
imageCopied.image=image.image;
//save image copied on the database
imageCopied.save((err,image_copied)=>{
if(err) return res.status(500).send({message:"error 500"});
if(!image_copied) return res.status(404).send({message:"error 404"});
return res.status(200).send({
image:image_copied
})
})
}
})
},
}
Here's a function that looks in the directory passed to it for files of the name file(nnn) where nnn is some sequence of digits and returns back to you the full path of the next one in sequence (the one after the highest number that already exists).
This function pre-creates a placeholder file by that name to avoid concurrency issues with multiple asynchronous operations calling this function and potentially conflicting (if it only returned the filename without creating the file). To further handle conflicts, it creates the placeholder file in a mode that fails if it already exists (so only one invocation of this function will ever create that particular file) and it automatically retries to find a new number if it gets a conflict (e.g. someone else created the next available file before we got to it). All of this logic is to avoid the subtleties of possible race conditions in creating the next filename in the sequence.
Once the caller has a unique filename that this resolves to, then it is expected that they will overwrite the placeholder contents with their own contents.
// pass directory to look in
// pass base file name so it will look for next in sequence as in "file(3)"
// returns the full path of the unique placeholder file it has created
// the caller is then responsible for that file
// calling multiple times will create a new placeholder file each time
async function findNextName(dir, base) {
let cntr = 0;
const cntr_max = 5;
const regex = new RegExp(`^${base}\\((\\d+)\\)$`);
async function run() {
const files = await fs.promises.readdir(dir);
let highest = 0;
for (let f of files) {
let matches = f.match(regex);
if (matches) {
let num = parseInt(matches[1]);
if (num > highest) {
highest = num;
}
}
}
let name = `${base}(${highest + 1})`;
// create placeholder file with this name to avoid concurrency issues
// of another request also trying to use the same next file
try {
// write to file, fail if the file exists due to a concurrency issue
const fullPath = path.resolve(path.join(dir, name));
await fs.promises.writeFile(fullPath, "placeholder", { flag: "wx" });
return fullPath;
} catch (e) {
// if this fails because of a potential concurrency issue, then try again
// up to cntr_max times to avoid looping forever on a persistent error
if (++cntr < cntr_max) {
return run();
} else {
throw e;
}
}
}
return run();
}
You could call it like this:
findNextName(".", "file").then(filename=> {
console.log(filename);
}).catch(err => {
console.log(err);
});

How to increase browser localstorage size [duplicate]

I've written a webapp that allows you to store the images in the localStorage until you hit save (so it works offline, if signal is poor).
When the localStorage reaches 5MB Google Chrome produces an error in the javascript console log:
Uncaught Error: QUOTA_EXCEEDED_ERR: DOM Exception 22
How do I increase the size of the localStorage quota on Google Chrome?
5MB is a hard limit and that is stupid. IndexedDB gives you ~50MB which is more reasonable. To make it easier to use try Dexie.js https://github.com/dfahlander/Dexie.js
Update:
Dexie.js was actually still an overkill for my simple key-value purposes so I wrote this much simpler script https://github.com/DVLP/localStorageDB
with this you have 50MB and can get and set values like that
// Setting values
ldb.set('nameGoesHere', 'value goes here');
// Getting values - callback is required because the data is being retrieved asynchronously:
ldb.get('nameGoesHere', function (value) {
console.log('And the value is', value);
});
Copy/paste the line below so ldb.set() and ldb.get() from the example above will become available.
!function(){function e(t,o){return n?void(n.transaction("s").objectStore("s").get(t).onsuccess=function(e){var t=e.target.result&&e.target.result.v||null;o(t)}):void setTimeout(function(){e(t,o)},100)}var t=window.indexedDB||window.mozIndexedDB||window.webkitIndexedDB||window.msIndexedDB;if(!t)return void console.error("indexDB not supported");var n,o={k:"",v:""},r=t.open("d2",1);r.onsuccess=function(e){n=this.result},r.onerror=function(e){console.error("indexedDB request error"),console.log(e)},r.onupgradeneeded=function(e){n=null;var t=e.target.result.createObjectStore("s",{keyPath:"k"});t.transaction.oncomplete=function(e){n=e.target.db}},window.ldb={get:e,set:function(e,t){o.k=e,o.v=t,n.transaction("s","readwrite").objectStore("s").put(o)}}}();
You can't, it's hard-wired at 5MB. This is a design decision by the Chrome developers.
In Chrome, the Web SQL db and cache manifest also have low limits by default, but if you package the app for the Chrome App Store you can increase them.
See also Managing HTML5 Offline Storage - Google Chrome.
The quota is for the user to set, how much space he wishes to allow to each website.
Therefore since the purpose is to restrict the web pages, the web pages cannot change the restriction.
If storage is low, you can prompt the user to increase local storage.
To find out if storage is low, you could probe the local storage size by saving an object then deleting it.
You can't but if you save JSON in your localStorage you can use a library to compress data like : https://github.com/k-yak/JJLC
demo : http://k-yak.github.io/JJLC/
Here you can test your program , you should handle also the cases when the cuota is exceed
https://stackoverflow.com/a/5664344/2630686 The above answer is much amazing. I applied it in my project and implement a full solution to request all kinds of resource.
// Firstly reference the above ldb code in the answer I mentioned.
export function get_file({ url, d3, name, enable_request = false }) {
if (name === undefined) { // set saved data name by url parsing alternatively
name = url.split('?')[0].split('/').at(-1).split('.')[0];
}
const html_name = location.href.split('/').at(-1).split('.')[0]
name = `${html_name}_${name}`
let ret = null;
const is_outer = is_outer_net(url); // check outer net url by its start with http or //
// try to access data from local. Return null if not found
if (is_outer && !enable_request) {
if (localStorage[name]) {
ret = new Promise(resolve => resolve(JSON.parse(localStorage[name])));
} else {
ret = new Promise(r => {
ldb.get(name, function (value) {
r(value)
})
});
}
} else {
ret = new Promise(r => r(null))
}
ret.then(data => {
if (data) {
return data
} else {
const method = url.split('.').at(-1)
// d3 method supported
if (d3 && d3[method]) {
ret = d3[method](url)
} else {
if (url.startsWith('~/')) { // local files accessed supported. You need a local service that can return local file data by requested url's address value
url = `http://localhost:8010/get_file?address=${url}`
}
ret = fetch(url).then(data => {
// parse data by requested data type
if (url.endsWith('txt')) {
return data.text()
} else {
return data.json()
}
})
}
ret = ret.then(da => {
data = da
if (is_outer) { // save data to localStorage firstly
localStorage[name] = JSON.stringify(data);
}
}).catch(e => { // save to ldb if 5MB exceed
ldb.set(name, data);
}).finally(_ => {
return data;
});
}
})
return ret;
}

First property inaccsesible of objects parsed by csv-parse

I am parsing a csv file with following contents using csv-parse -
userID,sysID
20,50
30,71
However, on the objects returned it isn't possible to access the property created from the first column userID.
Here is my code --
async function main(){
let systemIDs = await getSystemIds('./systems.csv');
console.log(`Scanning data for ${systemIDs.length} systems..`);
console.log(systemIDs[0]);
console.log(systemIDs[0].userID); // This prints undefined
console.log(systemIDs[0].sysID); // This prints the correct value
}
async function getSystemIds(path){
let ids= [];
await new Promise ((resolve,reject)=>{
const csvParser = csvParse({columns:true, skip_empty_lines: true});
FS.createReadStream(path)
.pipe(csvParser)
.on('readable', ()=>{
let record ;
while(record = csvParser.read()) {
ids.push(record);
}
})
.on('finish',()=>{
resolve();
});
});
return ids;
}
Output -
Scanning data for 2 systems..
{ 'userID': '20', sysID: '50' }
undefined // <== The Problem
50
I notice the first column key userID has single quotes around it in the console output where as sysID doesn't. But don't know what is causing them.
Figured it out myself in the end...
I needed the BOM option. The documentation states it should be set to true for UTF-8 files. But it defaults to false.
Excel by default generates csv files with BOM as the first character in CSV files. This gets picked up as part of the header (and key name) by the parser.
With the bom option set to true, it can handle csv files generated from excel or other programs.
const csvParser = csvParse({
columns: true,
skip_empty_lines: true,
bom: true
});

Node.js transform stream only passes through data as-is

I'm trying to create a little Node.js app that can read Markdown text and converts it to HTML. To achieve this, I wanted to create a transform stream that would get a char at a time, figure the meaning, then return the HTML version of it.
So, for example, if I pass the transform stream a *, it should return <b>(or </b>).
However, the Transform stream does not actually transform data, whatever I push to it, it just comes back like I pushed it, and when I put a console.log statement into the transform method of the stream, I saw no output, as if the method isn't even called.
Here's the file with the Stream:
module.exports = function returnStream() {
const Transform = require('stream').Transform;
let trckr = {
bold: false
};
const compiler = new Transform({
transform(chunk, encoding, done) {
const input = chunk.toString();
console.log(input); // No output
let output;
switch (input) {
case '*':
if (trckr.bold === true) {
output = '</b>';
trckr.bold = false;
} else {
output = '<b>';
trckr.bold = true;
}
break;
default:
output = input;
}
done(null, output);
}
});
return compiler;
};
Example file that uses the Stream:
const transformS = require('./index.js')();
transformS.on('data', data => {
console.log(data.toString());
});
transformS.push('*');
Thanks!
done(null, output) and transformS.push() are performing the exact same function: they push data to the readable (the output) side of the Transform stream. What you need to do instead of calling transformS.push() is to write to the writable (the input) side of the Transform stream with transformS.write('*').
I should also point out that you should not make assumptions about the contents of chunk in your transform function. It could be a single character or a bunch of characters (in which case input may never equal '*').

Categories