How to read first line of a file in JavaScript - javascript

In JavaSctipt the FileReader object doesn't seem to have support for just reading the first line of a file. (up to the CR '\n'). I dont want to read in the whole file to save memory.
Is there a way to do it?
My code (note that readLine() function does not exists):
self.loadFirstLineFromFile = function (options, callback) {
var hiddenElement = document.createElement('input');
hiddenElement.id = 'hidden-tsv-file-loader';
hiddenElement.type = 'file';
hiddenElement.accept = options.extension;
hiddenElement.style.display = 'none';
hiddenElement.addEventListener('change', function (event) {
var file = event.target.files[0];
var reader = new FileReader(file);
var firstLine;
firstLine = reader.readLine();
callback(firstLine);
});
document.body.appendChild(hiddenElement);
hiddenElement.click();
};

There's nothing builtin for that, but it's simple to implement:
var file = event.target.files[0];
var sliced = file.slice(0, 2048); // Pick a size that you're ok with
// NOTE: `await` keyword requires transpiling (Babel) for IE11,
// and to be inside an async function. An alternative is:
// sliced.text().then(function(text) { console.log(text); });
var text = await sliced.text();
console.log(text);

Here's an interface that reads the data from the Blob decoded as text and chunked by a delimiter:
async function* readLines (blob, encoding = 'utf-8', delimiter = /\r?\n/g) {
const reader = blob.stream().getReader();
const decoder = new TextDecoder(encoding);
try {
let text = '';
while (true) {
const { value, done } = await reader.read();
if (done) break;
text += decoder.decode(value, { stream: true });
const lines = text.split(delimiter);
text = lines.pop();
yield* lines;
}
yield text;
} finally {
reader.cancel();
}
}
We can use this to read a single line and discard the rest without reading the entire file:
hiddenElement.addEventListener('change', async function (event) {
const file = event.target.files[0];
for await (const line of readLines(file, 'utf-8', '\n')) {
callback(line);
return; // signals reader.cancel() to the async iterator
}
});

Since I use Javascript with Knockout I refactored Patricks solution into this:
self.loadStream = function (options, callback) {
var hiddenElement = document.createElement('input');
hiddenElement.id = 'hidden-tsv-file-loader';
hiddenElement.type = 'file';
hiddenElement.accept = options.extension;
hiddenElement.style.display = 'none';
hiddenElement.addEventListener('change', function (event) {
var file = event.target.files[0];
var reader = file.stream().getReader();
var decoder = new TextDecoder('utf-8');
var data;
var readNextChunk = function () {
data = reader.read();
data.then(function (result) {
if (!result.value) {
callback({ chunk: '', done: true, shouldStop: true }, file);
} else {
var chunk = decoder.decode(result.value, { stream: true });
var args = {
chunk: chunk,
done: result.done,
shouldStop: true
};
callback(args, file);
if (!result.done && !args.shouldStop) {
readNextChunk();
}
}
});
};
readNextChunk();
hiddenElement.remove();
});
document.body.appendChild(hiddenElement);
hiddenElement.click();
};

Related

Why is writing the same data to IndexedDB a second time consistently slower?

I stored some jpeg files (exactly 350, same files same size. Total: 336.14 MB) as Blob in IndexedDB. It took around 1 second to complete the transaction. Then I read all the data from IndexedDB to an array and again sored to IndexedDB. But this time it takes around 15 Seconds. I observed this as a consistent behavior. Anything wrong here? I used performance.now() to get the time difference
Files: 350,
Size of each: 937 KB,
Browser: Chrome and Chromium Edge
//Open
var dbOpen = indexedDB.open(INDEXED_DB_NAME, INDEXED_DB_VERSION);
dbOpen.onupgradeneeded = function (e) {
console.log("onupgradeneeded");
var store = e.currentTarget.result.createObjectStore(
IMAGE_DATA_STORE, { autoIncrement: true });
};
dbOpen.onsuccess = function (e) {
image_data_db = dbOpen.result;
console.log("indexed DB opened");
};
//Initial Write
var inputFiles = document.getElementById('inputFiles');
for (var i = 0; i < inputFiles.files.length; i++) {
let file = inputFiles.files[i];
var b = new Blob([file], { type: file.type });
fileblobs.push(b);
}
StoreIdb(fileblobs); // < First write
//StoreIdb()
t0 = performace.now();
var trx = image_data_db.transaction(IMAGE_DATA_STORE, 'readwrite');
var imagestore = trx.objectStore(IMAGE_DATA_STORE);
for (i = 0; i < fileblobs.length; i++) {
request = imagestore.add(fileblobs[i]);
request.onsuccess = function (e) {
console.log('added');
};
request.onerror = function (e) {
console.error("Request Error", this.error);
};
}
trx.onabort = function (e) {
console.error("Exception:", this.error, this.error.name);
};
trx.oncomplete = function (e) {
console.log('completed');
t1 = performance.now();
timetaken = t1 - t0;
}
//Read
var objectStore = image_data_db.transaction(IMAGE_DATA_STORE).objectStore(IMAGE_DATA_STORE);
objectStore.openCursor().onsuccess = function (e) {
var cursor = e.target.result;
if (cursor) {
blobArray.push(cursor.value.blob);
cursor.continue();
}
else
{
// completed
}
}
// blobArray will be used for second time << Second Write
I figured it out. First time it was storing file instance blob.
I ve changed file instance blob to Array buffer just to want to ensure data type similar in both cases. Now it is taking same time.
for (var i = 0; i < inputFiles.files.length; i++) {
let file = inputFiles.files[i];
file.arrayBuffer().then((arrayBuffer) => {
let blob = new Blob([new Uint8Array(arrayBuffer)], {type: file.type });
blobs.push(blob);
if ( blobs.length == inputFiles.files.length){
callback(blobs);
}
});
}

js can I read a portion of a file with FileReader? [duplicate]

I have long file I need to parse. Because it's very long I need to do it chunk by chunk. I tried this:
function parseFile(file){
var chunkSize = 2000;
var fileSize = (file.size - 1);
var foo = function(e){
console.log(e.target.result);
};
for(var i =0; i < fileSize; i += chunkSize)
{
(function( fil, start ) {
var reader = new FileReader();
var blob = fil.slice(start, chunkSize + 1);
reader.onload = foo;
reader.readAsText(blob);
})( file, i );
}
}
After running it I see only the first chunk in the console. If I change 'console.log' to jquery append to some div I see only first chunk in that div. What about other chunks? How to make it work?
FileReader API is asynchronous so you should handle it with block calls. A for loop wouldn't do the trick since it wouldn't wait for each read to complete before reading the next chunk.
Here's a working approach.
function parseFile(file, callback) {
var fileSize = file.size;
var chunkSize = 64 * 1024; // bytes
var offset = 0;
var self = this; // we need a reference to the current object
var chunkReaderBlock = null;
var readEventHandler = function(evt) {
if (evt.target.error == null) {
offset += evt.target.result.length;
callback(evt.target.result); // callback for handling read chunk
} else {
console.log("Read error: " + evt.target.error);
return;
}
if (offset >= fileSize) {
console.log("Done reading file");
return;
}
// of to the next chunk
chunkReaderBlock(offset, chunkSize, file);
}
chunkReaderBlock = function(_offset, length, _file) {
var r = new FileReader();
var blob = _file.slice(_offset, length + _offset);
r.onload = readEventHandler;
r.readAsText(blob);
}
// now let's start the read with the first block
chunkReaderBlock(offset, chunkSize, file);
}
You can take advantage of Response (part of fetch) to convert most things to anything else blob, text, json and also get a ReadableStream that can help you read the blob in chunks đź‘Ť
var dest = new WritableStream({
write (str) {
console.log(str)
}
})
var blob = new Blob(['bloby']);
(blob.stream ? blob.stream() : new Response(blob).body)
// Decode the binary-encoded response to string
.pipeThrough(new TextDecoderStream())
.pipeTo(dest)
.then(() => {
console.log('done')
})
Old answer (WritableStreams pipeTo and pipeThrough was not implemented before)
I came up with a interesting idéa that is probably very fast since it will convert the blob to a ReadableByteStreamReader probably much easier too since you don't need to handle stuff like chunk size and offset and then doing it all recursive in a loop
function streamBlob(blob) {
const reader = new Response(blob).body.getReader()
const pump = reader => reader.read()
.then(({ value, done }) => {
if (done) return
// uint8array chunk (use TextDecoder to read as text)
console.log(value)
return pump(reader)
})
return pump(reader)
}
streamBlob(new Blob(['bloby'])).then(() => {
console.log('done')
})
The second argument of slice is actually the end byte. Your code should look something like:
function parseFile(file){
var chunkSize = 2000;
var fileSize = (file.size - 1);
var foo = function(e){
console.log(e.target.result);
};
for(var i =0; i < fileSize; i += chunkSize) {
(function( fil, start ) {
var reader = new FileReader();
var blob = fil.slice(start, chunkSize + start);
reader.onload = foo;
reader.readAsText(blob);
})(file, i);
}
}
Or you can use this BlobReader for easier interface:
BlobReader(blob)
.readText(function (text) {
console.log('The text in the blob is', text);
});
More information:
README.md
Docs
Revamped #alediaferia answer in a class (typescript version here) and returning the result in a promise. Brave coders would even have wrapped it into an async iterator…
class FileStreamer {
constructor(file) {
this.file = file;
this.offset = 0;
this.defaultChunkSize = 64 * 1024; // bytes
this.rewind();
}
rewind() {
this.offset = 0;
}
isEndOfFile() {
return this.offset >= this.getFileSize();
}
readBlockAsText(length = this.defaultChunkSize) {
const fileReader = new FileReader();
const blob = this.file.slice(this.offset, this.offset + length);
return new Promise((resolve, reject) => {
fileReader.onloadend = (event) => {
const target = (event.target);
if (target.error == null) {
const result = target.result;
this.offset += result.length;
this.testEndOfFile();
resolve(result);
}
else {
reject(target.error);
}
};
fileReader.readAsText(blob);
});
}
testEndOfFile() {
if (this.isEndOfFile()) {
console.log('Done reading file');
}
}
getFileSize() {
return this.file.size;
}
}
Example printing a whole file in the console (within an async context)
const fileStreamer = new FileStreamer(aFile);
while (!fileStreamer.isEndOfFile()) {
const data = await fileStreamer.readBlockAsText();
console.log(data);
}
Parsing the large file into small chunk by using the simple method:
//Parse large file in to small chunks
var parseFile = function (file) {
var chunkSize = 1024 * 1024 * 16; //16MB Chunk size
var fileSize = file.size;
var currentChunk = 1;
var totalChunks = Math.ceil((fileSize/chunkSize), chunkSize);
while (currentChunk <= totalChunks) {
var offset = (currentChunk-1) * chunkSize;
var currentFilePart = file.slice(offset, (offset+chunkSize));
console.log('Current chunk number is ', currentChunk);
console.log('Current chunk data', currentFilePart);
currentChunk++;
}
};

How to decode a base64 string properly in javascript

I tried to convert a base64 string generated from pdf file using FileReader.readAsDataURL() to its original format.In NodeJS I did it like this and it was able generated the pdf to its initial state.
filebuffer = "data:application/pdf;base64,JVBERi0xLjQKJSDi48/..........."
let base64file = fileBuffer.split(';base64,').pop();
fs.writeFileSync('download.pdf',base64file,{encoding:'base64'},function(err){
if(err === null){
console.log("file created");
return;
}
else{
console.log(err);
return;
}
})
But i tried to do it in HTML + Javascript in this way.But in this way , pdf was empty/no letter wasn't in it
let stringval = "data:application/pdf;base64,JVBERi0xLjQKJSDi48/..........."
let encodedString = stringval.split(';base64,').pop();
let data = atob(encodedString);
let blob = new Blob([data]);
// //if you need a literal File object
let file = new File([blob], "filename");
link.href = URL.createObjectURL(file);
link.download = 'filename';
I was Capturing file and converting to base64 string in this way:
captureFile: function () {
event.preventDefault();
const file = event.target.files[0];
$("#labelinput1").html(file.name);
const reader = new window.FileReader();
reader.readAsDataURL(file);
reader.onloadend = () => {
var x = reader.result.toString();
App.buffer2 = x;
console.log("buffer", App.buffer);
};}
Then after clicking a button , I added the buffer to IPFS node
addfile: async function () {
if (App.buffer2 === null) return;
App.node = await window.Ipfs.create()
App.node.add(App.buffer2, function (errx, resipfs) {
if (errx === null) {
console.log(resipfs[0].hash);
App.buffer2 = null;
return App.showInfo(resipfs[0].hash);
}
else {
return App.showError(errx.message.toString() + errx.stack.toString());
}
});
}
using the IPFS HASH i can get back the base64 encoded string , I retrieved this string in this way:
ipfsfiledownload: async function () {
var filebuffer = await App.node.cat(hashtext);
var stringval = filebuffer.toString();
//convert this string to main file
}
I used Truffle Petshop and write those functions in top of it. Here is a IPFS hash QmfSefUiwjV44hpfnHyUngGATyHm9M4vN3PzF1mpe59Nn1. you can try out this Hash value in nodejs with this code
const IPFS = require('ipfs');
const fs = require('fs');
const main = async() => {
const node = await IPFS.create()
var fileBuffer = await
node.cat('QmfSefUiwjV44hpfnHyUngGATyHm9M4vN3PzF1mpe59Nn1')
fileBuffer = fileBuffer.toString()
let base64file = fileBuffer.split(';base64,').pop();
fs.writeFileSync('download.pdf',base64file, {encoding:'base64'},function(err){
if(err === null){
console.log("file created");
return;
}
else{
console.log(err);
return;
}
})
}
main()
You can find the full code here.
What I am doing wrong and how to solve it?
After converting the base64 string using atob() , I converted it to Uint8Array Then created the blob and file . It seems to work now ..
Here is the full code :
ipfsfiledownload: async function () {
var hashtext = document.getElementById("id_ipfshash").value //getting the IPFS HASH
var link = document.getElementById("downloadLink");
if (hashtext === null) return
var filebuffer = await App.node.cat(hashtext); //getting the base64 string from IPFS
var stringval = filebuffer.toString();
console.log(stringval);
let encodedString = stringval.split(',')[1]; //getting the base64 hash
let mimetype = stringval.split(',')[0].split(':')[1].split(';')[0]; //getting the mime type
let data = atob(encodedString); //ascii to binary
var ab = new ArrayBuffer(data.length);
var ia = new Uint8Array(ab);
//converting to Uint8Array
for(var i = 0;i<data.length;i++){
ia[i] = data.charCodeAt(i);
}
let blob = new Blob([ia],{ "type": mimetype});
let filename = 'filename.' + App.getExtension(mimetype);
let file = new File([blob], filename);
link.href = window.URL.createObjectURL(file);
link.download = filename;
link.click();
}

Map function returns an empty array

I was trying to convert a blob to base64, and I found my way around, but while waiting the result from the function displayBase64String the map function in submitOffre returns an empty string even though console.log prints some data.
I'll appreciate any solution
here is my code.
submitOffre = (saleData) => {
debugger ;
var result = base64Service.displayBase64String(saleData);
console.log("========", result);
const rs = result.map(value => value.file); // Doesn't work.
console.log(rs); // rs is empty
}
class Base64Service {
blobToBase64 = (blob, callback) => {
var reader = new FileReader();
var data = '';
reader.onload = function () {
var dataUrl = reader.result;
var base64 = dataUrl.split(',')[1];
callback(base64);
};
reader.readAsDataURL(blob);
}
displayBase64String(formProps) {
const result = [];
const outbut = Object.entries(formProps.imageToUpload).map(([key, value]) => {
this.blobToBase64(value, (data) => {
result.push({ "file": `data:${value.type};base64,${data}` })
})
});
return result;
};
}
export default new Base64Service();
Something like that might help:
I've modified your code a bit, just to show you the basic pattern.
If you're doing more than 1 image at a time, you will need to use Promise.all, to keep track of more than 1 promise at once.
submitOffre = async (saleData) => { // SEE THE async KEYWORD
debugger ;
var result = await blobToBase64(saleData); // SEE THE await KEYWORD
console.log("========", result);
const rs = result.map(value => value.file); // Doesn't work.
console.log(rs); // rs is empty
}
I'll treat as if you were converting only 1 image.
blobToBase64 = (blob, callback) => new Promise((resolve,reject) => {
var reader = new FileReader();
var data = '';
reader.onload = function () {
var dataUrl = reader.result;
var base64 = dataUrl.split(',')[1];
callback(base64);
resolve(base64); // NOTE THE resolve() FUNCTION TO RETURN SOME VALUE TO THE await
};
reader.readAsDataURL(blob);
});

javascript FileReader - parsing long file in chunks

I have long file I need to parse. Because it's very long I need to do it chunk by chunk. I tried this:
function parseFile(file){
var chunkSize = 2000;
var fileSize = (file.size - 1);
var foo = function(e){
console.log(e.target.result);
};
for(var i =0; i < fileSize; i += chunkSize)
{
(function( fil, start ) {
var reader = new FileReader();
var blob = fil.slice(start, chunkSize + 1);
reader.onload = foo;
reader.readAsText(blob);
})( file, i );
}
}
After running it I see only the first chunk in the console. If I change 'console.log' to jquery append to some div I see only first chunk in that div. What about other chunks? How to make it work?
FileReader API is asynchronous so you should handle it with block calls. A for loop wouldn't do the trick since it wouldn't wait for each read to complete before reading the next chunk.
Here's a working approach.
function parseFile(file, callback) {
var fileSize = file.size;
var chunkSize = 64 * 1024; // bytes
var offset = 0;
var self = this; // we need a reference to the current object
var chunkReaderBlock = null;
var readEventHandler = function(evt) {
if (evt.target.error == null) {
offset += evt.target.result.length;
callback(evt.target.result); // callback for handling read chunk
} else {
console.log("Read error: " + evt.target.error);
return;
}
if (offset >= fileSize) {
console.log("Done reading file");
return;
}
// of to the next chunk
chunkReaderBlock(offset, chunkSize, file);
}
chunkReaderBlock = function(_offset, length, _file) {
var r = new FileReader();
var blob = _file.slice(_offset, length + _offset);
r.onload = readEventHandler;
r.readAsText(blob);
}
// now let's start the read with the first block
chunkReaderBlock(offset, chunkSize, file);
}
You can take advantage of Response (part of fetch) to convert most things to anything else blob, text, json and also get a ReadableStream that can help you read the blob in chunks đź‘Ť
var dest = new WritableStream({
write (str) {
console.log(str)
}
})
var blob = new Blob(['bloby']);
(blob.stream ? blob.stream() : new Response(blob).body)
// Decode the binary-encoded response to string
.pipeThrough(new TextDecoderStream())
.pipeTo(dest)
.then(() => {
console.log('done')
})
Old answer (WritableStreams pipeTo and pipeThrough was not implemented before)
I came up with a interesting idéa that is probably very fast since it will convert the blob to a ReadableByteStreamReader probably much easier too since you don't need to handle stuff like chunk size and offset and then doing it all recursive in a loop
function streamBlob(blob) {
const reader = new Response(blob).body.getReader()
const pump = reader => reader.read()
.then(({ value, done }) => {
if (done) return
// uint8array chunk (use TextDecoder to read as text)
console.log(value)
return pump(reader)
})
return pump(reader)
}
streamBlob(new Blob(['bloby'])).then(() => {
console.log('done')
})
The second argument of slice is actually the end byte. Your code should look something like:
function parseFile(file){
var chunkSize = 2000;
var fileSize = (file.size - 1);
var foo = function(e){
console.log(e.target.result);
};
for(var i =0; i < fileSize; i += chunkSize) {
(function( fil, start ) {
var reader = new FileReader();
var blob = fil.slice(start, chunkSize + start);
reader.onload = foo;
reader.readAsText(blob);
})(file, i);
}
}
Or you can use this BlobReader for easier interface:
BlobReader(blob)
.readText(function (text) {
console.log('The text in the blob is', text);
});
More information:
README.md
Docs
Revamped #alediaferia answer in a class (typescript version here) and returning the result in a promise. Brave coders would even have wrapped it into an async iterator…
class FileStreamer {
constructor(file) {
this.file = file;
this.offset = 0;
this.defaultChunkSize = 64 * 1024; // bytes
this.rewind();
}
rewind() {
this.offset = 0;
}
isEndOfFile() {
return this.offset >= this.getFileSize();
}
readBlockAsText(length = this.defaultChunkSize) {
const fileReader = new FileReader();
const blob = this.file.slice(this.offset, this.offset + length);
return new Promise((resolve, reject) => {
fileReader.onloadend = (event) => {
const target = (event.target);
if (target.error == null) {
const result = target.result;
this.offset += result.length;
this.testEndOfFile();
resolve(result);
}
else {
reject(target.error);
}
};
fileReader.readAsText(blob);
});
}
testEndOfFile() {
if (this.isEndOfFile()) {
console.log('Done reading file');
}
}
getFileSize() {
return this.file.size;
}
}
Example printing a whole file in the console (within an async context)
const fileStreamer = new FileStreamer(aFile);
while (!fileStreamer.isEndOfFile()) {
const data = await fileStreamer.readBlockAsText();
console.log(data);
}
Parsing the large file into small chunk by using the simple method:
//Parse large file in to small chunks
var parseFile = function (file) {
var chunkSize = 1024 * 1024 * 16; //16MB Chunk size
var fileSize = file.size;
var currentChunk = 1;
var totalChunks = Math.ceil((fileSize/chunkSize), chunkSize);
while (currentChunk <= totalChunks) {
var offset = (currentChunk-1) * chunkSize;
var currentFilePart = file.slice(offset, (offset+chunkSize));
console.log('Current chunk number is ', currentChunk);
console.log('Current chunk data', currentFilePart);
currentChunk++;
}
};

Categories