resumable downloader using fetch streams - javascript

I m trying to create a resumable file downloader based only on the client side. The server is beyond my control and on an ajax request i get the file which is a very very big binary data file (100mgb).
After long research i have understood that i cannot use the xhr element to stream the response and i cannot read chunks of the file before it is completely cached... So I looked some more and found the fetch api which is quite new but i cannot find any proper documentation or tutorials. I would very much appreciate if someone could illustrate a simple example of fetching some url and reading the stream chunk by chunk

Here's an example from this blog post:
fetch('/big-data.csv').then(function(response) {
var reader = response.body.getReader();
var partialCell = '';
var returnNextCell = false;
var returnCellAfter = "Jake";
var decoder = new TextDecoder();
function search() {
return reader.read().then(function(result) {
partialCell += decoder.decode(result.value || new Uint8Array, {
stream: !result.done
});
// Split what we have into CSV 'cells'
var cellBoundry = /(?:,|\r\n)/;
var completeCells = partialCell.split(cellBoundry);
if (!result.done) {
// Last cell is likely incomplete
// Keep hold of it for next time
partialCell = completeCells[completeCells.length - 1];
// Remove it from our complete cells
completeCells = completeCells.slice(0, -1);
}
for (var cell of completeCells) {
cell = cell.trim();
if (returnNextCell) {
reader.cancel("No more reading needed.");
return cell;
}
if (cell === returnCellAfter) {
returnNextCell = true;
}
}
if (result.done) {
throw Error("Could not find value after " + returnCellAfter);
}
return search();
})
}
return search();
}).then(function(result) {
console.log("Got the result! It's '" + result + "'");
}).catch(function(err) {
console.log(err.message);
});
Note that streaming responses aren't supported yet in all browsers, check the compatibility table on MDN.

Related

Get WebAssembly instantiateStreaming progress

WebAssembly.instantiateStreaming is the fastest way to download and instantiate a .wasm module however for large .wasm files it can still take a long time. Simply displaying a spinner does not provide enough user feedback in this case.
Is there a way to use the WebAssembly.instantiateStreaming api and get some form of progress event so that an eta can displayed to the user? Ideally I would like to be able to display a percentage progress bar / estimated time left indicator so user's know how long they will have to wait.
Building off the answer here.
To get the progress of WebAssembly.instantiateStreaming / WebAssembly.compileStreaming create a new Fetch Response with a custom ReadableStream which implements it's own controller.
Example:
// Get your normal fetch response
var response = await fetch('https://www.example.com/example.wasm');
// Note - If you are compressing your .wasm file the Content-Length will be incorrect
// One workaround is to use a custom http header to manually specify the uncompressed size
var contentLength = response.headers.get('Content-Length');
var total = parseInt(contentLength, 10);
var loaded = 0;
function progressHandler(bytesLoaded, totalBytes)
{
// Do what you want with this info...
}
var res = new Response(new ReadableStream({
async start(controller) {
var reader = response.body.getReader();
for (;;) {
var {done, value} = await reader.read();
if (done)
{
progressHandler(total, total)
break
}
loaded += value.byteLength;
progressHandler(loaded, total)
controller.enqueue(value);
}
controller.close();
},
}, {
"status" : response.status,
"statusText" : response.statusText
}));
// Make sure to copy the headers!
// Wasm is very picky with it's headers and it will fail to compile if they are not
// specified correctly.
for (var pair of response.headers.entries()) {
res.headers.set(pair[0], pair[1]);
}
// The response (res) can now be passed to any of the streaming methods as normal
var promise = WebAssembly.instantiateStreaming(res)
Building off of various other SO answers, here is what I ended up with.
My solution also has decent fallback for Firefox, which doesn't yet have proper stream support. I opted for falling back to a good old XHR and WebAssembly.Instantiate there, as I really do want to show a loading bar, even if it means slightly slower startup just on FF.
async function fetchWithProgress(path, progress) {
const response = await fetch(path);
// May be incorrect if compressed
const contentLength = response.headers.get("Content-Length");
const total = parseInt(contentLength, 10);
let bytesLoaded = 0;
const ts = new TransformStream({
transform (chunk, ctrl) {
bytesLoaded += chunk.byteLength;
progress(bytesLoaded / total);
ctrl.enqueue(chunk)
}
});
return new Response(response.body.pipeThrough(ts), response);
}
async function initWasmWithProgress(wasmFile, importObject, progress) {
if (typeof TransformStream === "function" && ReadableStream.prototype.pipeThrough) {
let done = false;
const response = await fetchWithProgress(wasmFile, function() {
if (!done) {
progress.apply(null, arguments);
}
});
await WebAssembly.InstantiateStreaming(response, importObject);
done = true;
progress(1);
} else {
// xhr fallback, this is slower and doesn't use WebAssembly.InstantiateStreaming,
// but it's only happening on Firefox, and we can probably live with the game
// starting slightly slower there...
const xhr = new XMLHttpRequest();
await new Promise(function(resolve, reject) {
xhr.open("GET", wasmFile);
xhr.responseType = "arraybuffer";
xhr.onload = resolve;
xhr.onerror = reject;
xhr.onprogress = e => progress(e.loaded / e.total);
xhr.send();
});
await WebAssembly.Instantiate(xhr.response, importObject);
progress(1);
}
}
const wasmFile = "./wasm.wasm";
await initWasmWithProgress(wasmFile, importObject, p => console.log(`progress: ${p*100}%`));
console.log("Initialized wasm");

Javascript Await Changes Local Variables? [closed]

Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed 3 years ago.
Improve this question
Anyone able to explain what I'm doing wrong with my use of asynchronous functions in Javascript?
Basically, I must use an asynchronous in my Node.js code to grab an open port for me to use. There is a local variable that is being set outside of the asynchronous call that I can access/use just fine until I await for the asynchronous function to return. After that, the local variable is undefined.
(async () => {
console.log("CHECK AFTER ASYNC1: " + csvFilePath);
// First, grab a valid open port
var port;
while (!port || portsInProcess.indexOf(port) >= 0) {
console.log("CHECK AFTER ASYNC2: " + csvFilePath);
port = await getPort();
console.log(port);
}
console.log("CHECK AFTER ASYNC3: " + csvFilePath);
portsInProcess.push(port);
// ... more code below...
Checks #1 and 2 are fine for the csvFilePath variable, but check #3 shows that it's undefined. The port number, however, is fine. This leads me to believe that there's some weirdness with asynchronous function calls in Javascript that ONLY affects local variables; the global variables I use further down are just fine. Unfortunately here, I cannot make the csvFilePath variable global since that will introduce race conditions on that variable too (which I'm preventing elsewhere; the while loop is to help prevent race conditions on the port number, which is basically unused in my simple tests on localhost).
Just in case it's helpful, here's the output I'm getting:
CHECK AFTER ASYNC1: data/text/crescent_topics.csv
CHECK AFTER ASYNC2: data/text/crescent_topics.csv
58562
CHECK AFTER ASYNC3: null
It might also be worth mentioning it's really only those first few lines of code to dynamically grab an open port that are the lines of code I added. The code that I had before which used a fixed port number worked just fine (including this csvFilePath variable remaining stable).
My understanding of the await functionality was that it makes the asynchronous function act more or less synchronously, which is what seems to be happening here; the code I have farther down that uses the port number is not running until after the port number is set. (But even if that wasn't the case, why is the csvFilePath variable being unset since I'm not altering it or using it in any way here?)
EDIT: Here's some more code to provide additional context
var spawn = require('child_process').spawn;
var fs = require("fs");
var async = require('async');
var zmq = require('zmq');
var readline = require('readline');
const getPort = require('get-port');
/* Export the Nebula class */
module.exports = Nebula;
/* Location of the data for the Crescent dataset */
var textDataPath = "data/text/";
var crescentRawDataPath = textDataPath + "crescent_raw";
var crescentTFIDF = textDataPath + "crescent tfidf.csv";
var crescentTopicModel = textDataPath + "crescent_topics.csv";
/* Location of the data for the UK Health dataset */
var ukHealthRawDataPath = textDataPath + "uk_health_raw";
var ukHealthTFIDF = textDataPath + "uk_health.csv";
/* Map CSV files for text data to raw text location */
var textRawDataMappings = {};
textRawDataMappings[crescentTFIDF] = crescentRawDataPath;
textRawDataMappings[crescentTopicModel] = crescentRawDataPath;
textRawDataMappings[ukHealthTFIDF] = ukHealthRawDataPath;
textRawDataMappings[textDataPath + "uk_health_sm.csv"] = ukHealthRawDataPath;
/* The pipelines available to use */
var flatTextUIs = ["cosmos", "composite", "sirius", "centaurus"];
var pipelines = {
andromeda: {
file: "pipelines/andromeda.py",
defaultData: "data/highD/Animal_Data_study.csv"
},
cosmos: {
file: "pipelines/cosmos.py",
defaultData: textDataPath + "crescent tfidf.csv"
},
sirius: {
file: "pipelines/sirius.py",
defaultData: "data/highD/Animal_Data_paper.csv"
},
centaurus: {
file: "pipelines/centaurus.py",
defaultData: "data/highD/Animal_Data_paper.csv"
},
twitter: {
file: "pipelines/twitter.py",
},
composite: {
file: "pipelines/composite.py",
defaultData: textDataPath + "crescent tfidf.csv"
},
elasticsearch: {
file: "pipelines/espipeline.py",
args: []
}
};
/* The locations of the different types of datasets on the server */
var textDataFolder = "data/text/";
var highDDataFolder = "data/highD/";
var customCSVFolder = "data/customCSV/";
var sirius_prototype = 2;
// An array to track the ports being processed to eliminate race conditions
// as much as possible
var portsInProcess = [];
var nextSessionNumber = 0;
var usedSessionNumbers = [];
/* Nebula class constructor */
function Nebula(io, pipelineAddr) {
/* This allows you to use "Nebula(obj)" as well as "new Nebula(obj)" */
if (!(this instanceof Nebula)) {
return new Nebula(io);
}
/* The group of rooms currently active, each with a string identifier
* Each room represents an instance of a visualization that can be shared
* among clients.
*/
this.rooms = {};
this.io = io;
/* For proper use in callback functions */
var self = this;
/* Accept new WebSocket clients */
io.on('connection', function(socket) {
// Skipped some irrelevant Socket.io callbacks
**// Use the csvFilePath to store the name of a user-defined CSV file
var csvFilePath = null;**
/* Helper function to tell the client that the CSV file is now ready for them
* to use. They are also sent a copy of the data
*/
var csvFileReady = function(csvFilePath) {
// Let the client know that the CSV file is now ready to be used on
// the server
socket.emit("csvDataReady");
// Prepare to parse the CSV file
var csvData = [];
const rl = readline.createInterface({
input: fs.createReadStream(csvFilePath),
crlfDelay: Infinity
});
// Print any error messages we encounter
rl.on('error', function (err) {
console.log("Error while parsing CSV file: " + csvFilePath);
console.log(err);
});
// Read each line of the CSV file one at a time and parse it
var columnHeaders = [];
var firstColumnName;
rl.on('line', function (data) {
var dataColumns = data.split(",");
// If we haven't saved any column names yet, do so first
if (columnHeaders.length == 0) {
columnHeaders = dataColumns;
firstColumnName = columnHeaders[0];
}
// Process each individual line of data in the CSV file
else {
var dataObj = {};
var i;
for (i = 0; i < dataColumns.length; i++) {
var key = columnHeaders[i];
var value = dataColumns[i];
dataObj[key] = value
}
csvData.push(dataObj);
}
});
// All lines are read, file is closed now.
rl.on('close', function () {
// On certain OSs, like Windows, an extra, blank line may be read
// Check for this and remove it if it exists
var lastObservation = csvData[csvData.length-1];
var lastObservationKeys = Object.keys(lastObservation);
if (lastObservationKeys.length = 1 && lastObservation[lastObservationKeys[0]] == "") {
csvData.pop();
}
// Provide the CSV data to the client
socket.emit("csvDataReadComplete", csvData, firstColumnName);
});
};
**/* Allows the client to specify a CSV file already on the server to use */
socket.on("setCSV", function(csvName) {
console.log("setCSV CALLED");
csvFilePath = "data/" + csvName;
csvFileReady(csvFilePath);
console.log("CSV FILE SET: " + csvFilePath);
});**
// Skipped some more irrelevant callbacks
/* a client/ a room. If the room doesn't next exist yet,
* initiate it and send the new room to the client. Otherwise, send
* the client the current state of the room.
*/
socket.on('join', function(roomName, user, pipeline, args) {
console.log("Join called for " + pipeline + " pipeline; room " + roomName);
socket.roomName = roomName;
socket.user = user;
socket.join(roomName);
console.log("CSV FILE PATH: " + csvFilePath);
var pipelineArgsCopy = [];
if (!self.rooms[roomName]) {
var room = {};
room.name = roomName;
room.count = 1;
room.points = new Map();
room.similarity_weights = new Map();
if (pipeline == "sirius" || pipeline == "centaurus") {
room.attribute_points = new Map();
room.attribute_similarity_weights = new Map();
room.observation_data = [];
room.attribute_data = [];
}
/* Create a pipeline client for this room */
console.log("CHECK BEFORE ASYNC: " + csvFilePath);
**// Here's the code snippet I provided above**
**(async () => {
console.log("CHECK AFTER ASYNC1: " + csvFilePath);
// First, grab a valid open port
var port;
while (!port || portsInProcess.indexOf(port) >= 0) {
console.log("CHECK AFTER ASYNC2: " + csvFilePath);
port = await getPort();
console.log(port);
}
console.log("CHECK AFTER ASYNC3: " + csvFilePath);**
portsInProcess.push(port);
console.log("CHECK AFTER ASYNC4: " + csvFilePath);
if (!pipelineAddr) {
var pythonArgs = ["-u"];
if (pipeline in pipelines) {
// A CSV file path should have already been set. This
// file path should be used to indicate where to find
// the desired file
console.log("LAST CHECK: " + csvFilePath);
if (!csvFilePath) {
csvFilePath = pipelines[pipeline].defaultData;
}
console.log("FINAL CSV FILE: " + csvFilePath);
pipelineArgsCopy.push(csvFilePath);
// If the UI supports reading flat text files, tell the
// pipeline where to find the files
if (flatTextUIs.indexOf(pipeline) >= 0) {
pipelineArgsCopy.push(textRawDataMappings[csvFilePath]);
}
// Set the remaining pipeline args
pythonArgs.push(pipelines[pipeline].file);
pythonArgs.push(port.toString());
if (pipeline != "twitter" && pipeline != "elasticsearch") {
pythonArgs = pythonArgs.concat(pipelineArgsCopy);
}
}
else {
pythonArgs.push(pipelines.cosmos.file);
pythonArgs.push(port.toString());
pythonArgs.push(pipelines.cosmos.defaultData);
pythonArgs.push(crescentRawDataPath);
}
// used in case of CosmosRadar
for (var key in args) {
if (args.hasOwnProperty(key)) {
pythonArgs.push("--" + key);
pythonArgs.push(args[key]);
}
}
// Dynamically determine which distance function should be
// used
if (pythonArgs.indexOf("--dist_func") < 0) {
if (pipeline === "twitter" || pipeline === "elasticsearch" ||
csvFilePath.startsWith(textDataPath)) {
pythonArgs.push("--dist_func", "cosine");
}
else {
pythonArgs.push("--dist_func", "euclidean");
}
}
console.log(pythonArgs);
console.log("");
var pipelineInstance = spawn("python2.7", pythonArgs, {stdout: "inherit"});
pipelineInstance.on("error", function(err) {
console.log("python2.7.exe not found. Trying python.exe");
pipelineInstance = spawn("python", pythonArgs,{stdout: "inherit"});
pipelineInstance.stdout.on("data", function(data) {
console.log("Pipeline: " + data.toString());
});
pipelineInstance.stderr.on("data", function(data) {
console.log("Pipeline error: " + data.toString());
});
});
/* Data received by node app from python process,
* ouptut this data to output stream(on 'data'),
* we want to convert that received data into a string and
* append it to the overall data String
*/
pipelineInstance.stdout.on("data", function(data) {
console.log("Pipeline STDOUT: " + data.toString());
});
pipelineInstance.stderr.on("data", function(data) {
console.log("Pipeline error: " + data.toString());
});
room.pipelineInstance = pipelineInstance;
}
/* Connect to the pipeline */
pipelineAddr = pipelineAddr || "tcp://127.0.0.1:" + port.toString();
room.pipelineSocket = zmq.socket('pair');
room.pipelineSocket.connect(pipelineAddr);
pipelineAddr = null;
portsInProcess.splice(portsInProcess.indexOf(port), 1);
/* Listens for messages from the pipeline */
room.pipelineSocket.on('message', function (msg) {
self.handleMessage(room, msg);
});
self.rooms[roomName] = socket.room = room;
invoke(room.pipelineSocket, "reset");
})();
}
else {
socket.room = self.rooms[roomName];
socket.room.count += 1;
if (pipeline == "sirius" || pipeline == "centaurus") {
socket.emit('update', sendRoom(socket.room, true), true);
socket.emit('update', sendRoom(socket.room, false), false);
}
else {
socket.emit('update', sendRoom(socket.room));
}
}
// Reset the csvFilePath to null for future UIs...
// I don't think this is actually necessary since
// csvFilePath is local to the "connections" message,
// which is called for every individual room
csvFilePath = null;
});
// Skipped the rest of the code; it's irrelevant
});
}
Full printouts:
setCSV CALLED
CSV FILE SET: data/text/crescent_topics.csv
Join called for sirius pipeline; room sirius0
CSV FILE PATH: data/text/crescent_topics.csv
CHECK BEFORE ASYNC: data/text/crescent_topics.csv
CHECK AFTER ASYNC1: data/text/crescent_topics.csv
CHECK AFTER ASYNC2: data/text/crescent_topics.csv
58562
CHECK AFTER ASYNC3: null
CHECK AFTER ASYNC4: null
LAST CHECK: null
FINAL CSV FILE: data/highD/Animal_Data_paper.csv
[ '-u',
'pipelines/sirius.py',
'58562',
'data/highD/Animal_Data_paper.csv',
undefined,
'--dist_func',
'euclidean' ]
Since bolding of code doesn't work, just search for the "**" to find the relevant pieces I've marked.
TL;DR There's a lot of communication happening between the client and server to establish an individualized communication that is directly linked to a specific dataset. The user has the ability to upload a custom CSV file to the system, but the code I'm working with right now is just trying to select an existing CSV file on the server, so I omitted the callbacks for the custom CSV file. Once the file has been selected, the client asks to "join" a room/session. The case I'm working with right now assumes that this is a new room/session as opposed to trying to do some shared room/session with another client. (Yes, I know, the code is messy for sharing rooms/sessions, but it works for the most part for now and is not my main concern.) Again, all this code worked just fine before the asynchronous code was added (and using a static port variable), so I don't know what changed so much by adding it.
Since you now included the whole code context, we can see that the issue is that the code after your async IIFE is what is causing the problem.
An async function returns a promise as soon as it hits an await. And, while that await is waiting for its asynchronous operation, the code following the call to the async function runs. In your case, you're essentially doing this:
var csvFilePath = someGoodValue;
(async () => {
port = await getPort();
console.log(csvFilePath); // this will be null
})();
csvFilePath = null; // this runs as soon as the above code hits the await
So, as soon as you hit your first await, the async function returns a promise and the code following it continues to run, hitting the line of code that resets your csvFilePath.
There are probably cleaner ways to restructure your code, but a simple thing you could do is this:
var csvFilePath = someGoodValue;
(async () => {
port = await getPort();
console.log(csvFilePath); // this will be null
})().finally(() => {
csvFilePath = null;
});
Note: .finally() is supported in node v10+. If you're using an older version, you can reset the path in both .then() and .catch().
Or, as your comment says, maybe you can just remove the resetting of the csvFilePath entirely.
I realized after some silly tests I tried that I'm resetting csvFilePath to null outside the asynchronous call, which is what is causing the error... Oops!

How can I open multiple WebSocket streams

I am trying to stream data from the Binance WebSocket API, I have it working for one symbol at a time.
if ("WebSocket" in window) {
//open websocket
var symbols = getSymbol();
//console.log(symbols);
symbols.forEach(function(entry) {
console.log(entry);
})
var ws = new WebSocket("wss://stream.binance.com:9443/ws/btcusdt#miniTicker")
ws.onopen = function() {
console.log("Binance connected...");
};
ws.onmessage = function(evt) {
var r_msg = evt.data;
var jr_msg = JSON.parse(r_msg);
}
ws.onclose = function() {
console.log("Binance disconnected");
}
} else {
alert("WebSocket is NOT supported");
}
the line var symbols = getSymbol(); creates an array of 431 symbols, my logic (and what I am trying to achieve) is to add the new websocket() to the forEach and stream price data from all of the currency pairs.
I'm not sure if this is possible at all or what a better solution would be but I wish to stream and display live data from the api.
Your idea about putting the new WebSocket() inside the for-each should work. However,
I'm not sure if you are allowed to opening hundreds of web sockets from the same tab, and there could also be some performance issues related to it.
According to the API documentation, it is possible to open just one web socket which will send you data from a list of streams, or even just all streams. Just construct the URLs like this:
Specific streams: wss://stream.binance.com:9443/ws/stream1/stream2/stream3
All streams: wss://stream.binance.com:9443/ws/!miniTicker#arr
Here is a code sample that takes these things into consideration. By default this code uses the URL for all streams, but it also has the code (commented out) that uses specific streams.
let streams = [
"ethbtc#miniTicker","bnbbtc#miniTicker","wavesbtc#miniTicker","bchabcbtc#miniTicker",
"bchsvbtc#miniTicker","xrpbtc#miniTicker","tusdbtc#miniTicker","eosbtc#miniTicker",
"trxbtc#miniTicker","ltcbtc#miniTicker","xlmbtc#miniTicker","bcptbtc#miniTicker",
"adabtc#miniTicker","zilbtc#miniTicker","xmrbtc#miniTicker","stratbtc#miniTicker",
"zecbtc#miniTicker","qkcbtc#miniTicker","neobtc#miniTicker","dashbtc#miniTicker","zrxbtc#miniTicker"
];
let trackedStreams = [];
//let ws = new WebSocket("wss://stream.binance.com:9443/ws/" + streams.join('/'));
let ws = new WebSocket("wss://stream.binance.com:9443/ws/!miniTicker#arr");
ws.onopen = function() {
console.log("Binance connected...");
};
ws.onmessage = function(evt) {
try {
let msgs = JSON.parse(evt.data);
if (Array.isArray(msgs)) {
for (let msg of msgs) {
handleMessage(msg);
}
} else {
handleMessage(msgs)
}
} catch (e) {
console.log('Unknown message: ' + evt.data, e);
}
}
ws.onclose = function() {
console.log("Binance disconnected");
}
function handleMessage(msg) {
const stream = msg.s;
if (trackedStreams.indexOf(stream) === -1) {
document.getElementById('streams').innerHTML += '<br/>' + stream + ': <span id="stream_' + stream + '"></span>';
trackedStreams.push(stream);
document.getElementById('totalstreams').innerText = trackedStreams.length;
}
document.getElementById('stream_' + stream).innerText = msg.v;
}
<span id="totalstreams"></span> streams tracked<br/>
Total traded base asset volume:<br/>
<div id="streams"></div>

Use Fetch Streams API to consume chunked data asynchronously without using recursion

I'm using the JavaScript fetch streams API to consume chunked JSON asynchronously like in this answer.
My application may be receiving up to 25 small JSON objects per second (one for each frame in a video) over the span of an hour.
When the incoming chunks are large (1000+ JSON objects per chunk), my code functions well - fast, minimal memory use - it can easily receive 1,000,000 JSON objects reliably.
When the incoming chunks are smaller (5 JSON objects per chunk), my code functions poorly - slow, lots of memory consumption. The browser dies at about 50,000 JSON objects.
After doing a lot of debugging in the Developer tools, it appears the problem lies in the recursive nature of the code.
I tried to remove the recursion, but it seems required because the API is reliant on my code returning a promise to chain?!
How do I remove this recursion, or should I use something other than fetch?
Code with recursion (works)
String.prototype.replaceAll = function(search, replacement) {
var target = this;
return target.replace(new RegExp(search, 'g'), replacement);
};
results = []
fetch('http://localhost:9999/').then(response => {
const reader = response.body.getReader();
td = new TextDecoder("utf-8");
buffer = "";
reader.read().then(function processText({ done, value }) {
if (done) {
console.log("Stream done.");
return;
}
try {
decoded = td.decode(value);
buffer += decoded;
if (decoded.length != 65536){
toParse = "["+buffer.trim().replaceAll("\n",",")+"]";
result = JSON.parse(toParse);
results.push(...result);
console.log("Received " + results.length.toString() + " objects")
buffer = "";
}
}
catch(e){
// Doesn't need to be reported, because partial JSON result will be parsed next time around (from buffer).
//console.log("EXCEPTION:"+e);
}
return reader.read().then(processText);
})
});
Code without recursion (doesn't work)
String.prototype.replaceAll = function(search, replacement) {
var target = this;
return target.replace(new RegExp(search, 'g'), replacement);
};
results = []
finished = false
fetch('http://localhost:9999/').then(response => {
const reader = response.body.getReader();
td = new TextDecoder("utf-8");
buffer = "";
lastResultSize = -1
while (!finished)
if (lastResultSize < results.length)
{
lastResultSize = results.length;
reader.read().then(function processText({ done, value }) {
if (done) {
console.log("Stream done.");
finished = true;
return;
}
else
try {
decoded = td.decode(value);
//console.log("Received chunk " + decoded.length.toString() + " in length");
buffer += decoded;
if (decoded.length != 65536){
toParse = "["+buffer.trim().replaceAll("\n",",")+"]";
result = JSON.parse(toParse);
results.push(...result);
console.log("Received " + results.length.toString() + " objects")
buffer = "";
//console.log("Parsed chunk " + toParse.length.toString() + " in length");
}
}
catch(e) {
// Doesn't need to be reported, because partial JSON result will be parsed next time around (from buffer).
//console.log("EXCEPTION:"+e);
}
})
}
});
For completeness, here is the python code I'm using on the test server. Note the line containing sleep which changes chunking behavior:
import io
import urllib
import inspect
from http.server import HTTPServer,BaseHTTPRequestHandler
from time import sleep
class TestServer(BaseHTTPRequestHandler):
def do_GET(self):
args = urllib.parse.parse_qs(self.path[2:])
args = {i:args[i][0] for i in args}
response = ''
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Transfer-Encoding', 'chunked')
self.end_headers()
for i in range (1000000):
self.wfile.write(bytes(f'{{"x":{i}, "text":"fred!"}}\n','utf-8'))
sleep(0.001) # Comment this out for bigger chunks sent to the client!
def main(server_port:"Port to serve on."=9999,server_address:"Local server name."=''):
httpd = HTTPServer((server_address, server_port), TestServer)
print(f'Serving on http://{httpd.server_name}:{httpd.server_port} ...')
httpd.serve_forever()
if __name__ == '__main__':
main()
The part you're missing is that the function passed to .then() is always called asynchronously, i.e. with an empty stack. So there is no actual recursion here. This is also why your 'without recursion' version doesn't work.
The simple solution to this is to use async functions and the await statement. If you call read() like this:
const {value, done} = await reader.read();
...then you can call it in a loop and it will work how you would expect.
I don't know specifically where your memory leak is, but your use of global variables looks like a problem. I recommend you always put 'use strict'; at the top of your code so the compiler will catch these problems for you. Then use let or const whenever you declare a variable.
I recommend you use TextDecoderStream to avoid problems when a character is split between multiple chunks. You will also have issues when a JSON object is split between multiple chunks.
See Append child writable stream demo for how to do this safely (but note that you need TextDecoderStream where that demo has "TextDecoder").
Note also the use of a WritableStream in that demo. Firefox doesn't support it yet AFAIK, but WritableStream provides much easier syntax to consume chunks without having to explicitly loop or recurse. You can find the web streams polyfill here.

How to write a file from an ArrayBuffer in JS

I am trying to write a file uploader for Meteor framework.
The principle is to split the fileon the client from an ArrayBuffer in small packets of 4096 bits that are sent to the server through a Meteor.method.
The simplified code below is the part of the client that sends a chunk to the server, it is repeated until offset reaches data.byteLength :
// data is an ArrayBuffer
var total = data.byteLength;
var offset = 0;
var upload = function() {
var length = 4096; // chunk size
// adjust the last chunk size
if (offset + length > total) {
length = total - offset;
}
// I am using Uint8Array to create the chunk
// because it can be passed to the Meteor.method natively
var chunk = new Uint8Array(data, offset, length);
if (offset < total) {
// Send the chunk to the server and tell it what file to append to
Meteor.call('uploadFileData', fileId, chunk, function (err, length) {
if (!err) {
offset += length;
upload();
}
}
}
};
upload(); // start uploading
The simplified code below is the part on the server that receives the chunk and writes it to the file system :
var fs = Npm.require('fs');
var Future = Npm.require('fibers/future');
Meteor.methods({
uploadFileData: function(fileId, chunk) {
var fut = new Future();
var path = '/uploads/' + fileId;
// I tried that with no success
chunk = String.fromCharCode.apply(null, chunk);
// how to write the chunk that is an Uint8Array to the disk ?
fs.appendFile(path, chunk, 'binary', function (err) {
if (err) {
fut.throw(err);
} else {
fut.return(chunk.length);
}
});
return fut.wait();
}
});
I failed to write a valid file to the disk, actually the file is saved but I cannot open it, when I see the content in a text editor, it is similar to the original file (a jpg for example) but some characters are different, I think that could be an encoding problem as the file size is not the same, but I don't know how to fix that...
Saving the file was as easy as creating a new Buffer with the Uint8Array object :
// chunk is the Uint8Array object
fs.appendFile(path, Buffer.from(chunk), function (err) {
if (err) {
fut.throw(err);
} else {
fut.return(chunk.length);
}
});
Building on Karl.S answer, this worked for me, outside of any framework:
fs.appendFileSync(outfile, Buffer.from(arrayBuffer));
Just wanted to add that in newer Meteor you could avoid some callback hell with async/await. Await will also throw and push the error up to client
Meteor.methods({
uploadFileData: async function(file_id, chunk) {
var path = 'somepath/' + file_id; // be careful with this, make sure to sanitize file_id
await fs.appendFile(path, new Buffer(chunk));
return chunk.length;
}
});

Categories