filehandle.read() genrating null characters in destination file - javascript

I am trying to read "src.txt" and copy it to "des.txt".I got it working 2 ways and both are producing pretty much the same result.
When I try the below code at end of the des.txt file I get weird characters. (all the data of "src.txt" is being copied successfully)
// Producing weird characters in the des.txt file at the end
let readhandle = await fs.open("src.txt", "r");
let writehandle = await fs.open("des.txt", "w");
let bytesread = -1;
while (bytesread != 0) {
let data = await readhandle.read(); //default = 16384
bytesread = data.bytesRead;
writehandle.write(data.buffer);
}
on the other hand, if I try this I get no extra weird characters
let readhandle = await fs.open("src.txt", "r");
let writehandle = await fs.open("des.txt", "w");
let buffer = Buffer.alloc(16384);
let stats = await readhandle.stat();
let itrs = stats.size / 16384;
for (let i = 0; i < itrs; i++) {
let data = await readhandle.read(buffer, 0, 16384, null);
writehandle.write(data.buffer);
}
Why am I getting weird characters at end of des.txt with the first method but not with the second?
I tried using buffers of different sizes in the second method but the result is the same.

Related

Count the number of items in Array in nodejs

I have big file almost 2GB and text also includes so many countries, sometimes twise and more. I should create script what will write to stdout all countries from the file and also will show how many times the country name was used. for example if there is germany five times in file code Should show us: Germany: 5 (something like that)
const fs = require("fs");
readline = require("readline");
stream = require("stream");
const filename = process.argv[2];
const instream = fs.createReadStream(filename);
const outstream = new stream();
outstream.readable = true;
outstream.writable = true;
const rl = readline.createInterface({
input: instream,
output: outstream,
terminal: false,
});
rl.on("line", function (line) {
const [country] = line.split(",", 1);
Str = country;
var obj = new Object();
for (var i = 0; i < Str.length; i++) {
if (obj[Str] != null) {
obj[Str] += 1;
} else {
obj[Str] = 1;
}
}
console.log(obj);
});
I wrote this but it shows the number of letters in word.
Thank you (link for download file is in comments)
Here is a part of text:
united
states,2001,dinner-and-a-murder-mystery-games,retail,linkedin.com/company/dinner-and-a-murder-mystery-games,"",dinner
and a murder mystery games,tennessee,1-10,dinnerandamurder.com
netherlands,2013,jennifer-campbell,management
consulting,linkedin.com/company/jennifer-campbell,houten,jennifer
campbell,utrecht,1-10,jennifercampbell.com united
states,"",imtec-corp,marketing and
advertising,linkedin.com/company/imtec-corp,ardmore,imtec corp
italy,1977,bo.ma-s.r.l.,research,linkedin.com/company/bo.ma-s.r.l.
Your problem is probably, that you have a variable "country" that contains the country as string and then you store it to Str and do:
"for (var i = 0; i < Str.length; i++) {
this loops over every char in the stream.
Also you need to define the "obj" outside of the callback otherwise it gets recreated for every line.
Just try:
var obj = {};
rl.on("line", function (line) {
const [country] = line.split(",", 1);
if (obj[country]) {
obj[country]++;
} else {
obj[country] = 1;
}
console.log(obj);
});

Node/Puppeteer/ChromeDevTools: Shell says variable's property undefined even though defined! Why?

trying to use puppeteer/node to extract only the used CSS and JS into separate .css and .js files but powershell keeps returning the following error and I cannot seem to figure out why it's doing this as the variable should clearly have a length since the script should have fed some data to it. Perhaps one of the processes preceding it is causing this? I'm not sure what the problem is.
The shell returns:
css_total_bytes += entry.text.length;
^
TypeError: Cannot read property 'length' of undefined
const puppeteer = require('puppeteer');
//Include to be able to export files w/ node
const fs = require('fs');
const iPhone = puppeteer.devices['iPhone 6'];
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.emulate(iPhone);
//Begin collecting CSS coverage data
await Promise.all([
page.coverage.startCSSCoverage(),
page.coverage.startJSCoverage()
]);
//Visit desired page
await page.goto('http://127.0.0.1:5500/index.html');
//Stop collection and retrieve the coverage iterator
const [cssCoverage, jsCoverage] = await Promise.all([
page.coverage.stopCSSCoverage(),
page.coverage.stopJSCoverage()
]);
//Investigate CSS Coverage and Extract Used CSS
var css_coverage = [...cssCoverage];
var css_used_bytes = 0;
var css_total_bytes = 0;
var covered_css = "";
for (var entry in css_coverage[0]) {
css_total_bytes += entry.text.length;
console.log(`Total Bytes for ${entry.url}: ${entry.text.length}`);
for (var range in entry.ranges){
css_used_bytes += range.end - range.start - 1;
covered_css += entry.text.slice(range.start, range.end) + "\n";
}
}
//Investigate JS Coverage and Extract Used JS
var js_coverage = [...jsCoverage];
var js_used_bytes = 0;
var js_total_bytes = 0;
var js_css = "";
for (var entry in js_coverage[0]) {
js_total_bytes += entry.text.length;
console.log(`Total Bytes for ${entry.url}: ${entry.text.length}`);
for (var range in entry.ranges){
js_used_bytes += range.end - range.start - 1;
js_css += entry.text.slice(range.start, range.end) + "\n";
}
}
console.log(`Total Bytes of CSS: ${css_total_bytes}`);
console.log(`Used Bytes of CSS: ${css_used_bytes}`);
fs.writeFile("./exported_css.css", covered_css, function(err) {
if(err) {
return console.log(err);
}
console.log("The CSS file was saved!");
});
console.log(`Total Bytes of JS: ${js_total_bytes}`);
console.log(`Used Bytes of JS: ${js_used_bytes}`);
fs.writeFile("./exported_js.js", covered_js, function(err) {
if(err) {
return console.log(err);
}
console.log("The JS file was saved!");
});
await browser.close();
})();
If I understand the docs correctly, these are some issues:
var css_coverage = [...cssCoverage]; — this seems redundant as cssCoverage is already an array.
for (var entry in css_coverage[0]) {:
css_coverage[0] is a coverage entry, you need not iterate over its content.
for ... in loop iterates over object properties, you need for ... of loop to iterate over array entries (the same for ranges iteration).
So try something like this (the same for JS coverage):
//Investigate CSS Coverage and Extract Used CSS
var css_used_bytes = 0;
var css_total_bytes = 0;
var covered_css = "";
for (var entry of cssCoverage) {
css_total_bytes += entry.text.length;
console.log(`Total Bytes for ${entry.url}: ${entry.text.length}`);
for (var range of entry.ranges){
css_used_bytes += range.end - range.start - 1;
covered_css += entry.text.slice(range.start, range.end) + "\n";
}
}

How to chunk string using javascript

I have a string which is more than 32kb it needs to be chunked, with every chunk having a size limit of 32kb.is it possible ? using JavaScript , I can only find codes like cutting the string or splitting the string in which, I think is not related to my task
stringChop = function(str, size){
if (str == null)
return [];
str = String(str);
return size > 0 ? str.match(new RegExp('.{1,' + size + '}', 'g')) : [str];
}
I also have code the check the bytes
const byteSize = str => new Blob([str]).size;
const result = byteSize("sample")
You really don't want to "spend time" splitting large strings in Node.
If you have to use vanilla
This is entirely possible with JavaScript (and you're pretty close). Though this is more elegant without regular expressions and with generators:
function* chunk(str, size = 3) {
for(let i = 0; i < str.length; i+= size ) yield str.slice(i, i + size);
}
[...chunk('hello world')]; // ["hel", "lo ", "wor", "ld"];
If you can use Node.js
I'd read the file you want to split with a createReadStream and then write it to different files when it reaches the limit. This is much more effective since you don't create many small strings or keep all the data in memory:
(async () => {
let currentFileIndex = 0, currentBytes = 0;
let currentFile = fs.createWriteStream(`${currentFileIndex}.csv`);
for await(const chunk of fs.createReadStream('input.csv') {
currentBytes += chunk.length;
if (currentBytes > 32000) { // or whatever limit you want
currentFile.end(); // probably wait for the allback here
currentBytes = 0;
currentFile = fs.createWriteStream(`${++currentFileIndex}.csv`)
}
await util.promisify(cb => currentFile.write(chunk, cb)();
}
})();

How to Directly Instantiate WebAssembly Module in JavaScript

The examples I've seen show essentially this:
fetch('simple.wasm').then(response =>
response.arrayBuffer()
).then(bytes =>
WebAssembly.instantiate(bytes, {})
).then(result =>
result.instance.exports...
)
But I would like to do it without making that extra HTTP request. Wondering if the only way is this (or some variation of it, which would be helpful to know):
var binary = '...mywasmbinary...'
var buffer = new ArrayBuffer(binary.length)
var view = new DataView(buffer)
for (var i = 0, n = binary.length; i < n; i++) {
var x = binary[i]
view.setInt8(i * 8, x)
}
Wondering if I have to worry about endianess or anything like that.
Or perhaps doing something with URL and blobs might be better, I'm not sure.
Yes, you are correct, in order to inline wasm modules and avoid the HTTP request, you'll have to perform some sort of encoding. I'd recommend using Base64 encoded strings as they are the most compact form.
You can encode as follows:
const readFileSync = require('fs').readFileSync;
const wasmCode = readFileSync(id);
const encoded = Buffer.from(wasmCode, 'binary').toString('base64');
You can then load the module as follows:
var encoded = "... contents of encoded from above ...";
function asciiToBinary(str) {
if (typeof atob === 'function') {
// this works in the browser
return atob(str)
} else {
// this works in node
return new Buffer(str, 'base64').toString('binary');
}
}
function decode(encoded) {
var binaryString = asciiToBinary(encoded);
var bytes = new Uint8Array(binaryString.length);
for (var i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
var module = WebAssembly.instantiate(decode(encoded), {});

Convert large array of integers to unicode string and then back to array of integers in node.js

I have some data which is represented as an array of integers and can be up to 200 000 elements. The integer value can vary from 0 to 200 000.
To emulate this data (for debugging purposes) I can do the following:
let data = [];
let len = 200000
for (let i = 0; i < len; i++) {
data[i] = i;
}
To convert this array of integers as an unicode string I perform this:
let dataAsText = data.map((e) => {
return String.fromCodePoint(e);
}).join('');
When I want to convert back to an array of integers the array appears to be longer:
let dataBack = dataAsText.split('').map((e) => {
return e.codePointAt(e);
});
console.log(dataBack.length);
How does it come ? What is wrong ?
Extra information:
I use codePointAt/fromCodePoint because it can deal with all unicode values (up to 21 bits) while charCodeAt/fromCharCode fails.
Using, for example, .join('123') and .split('123') will make that the variable dataBack is the same length as data. But this isn't an elegant solution because the size of the string dataAsText will unnecessarily be very large.
If let len is equal or less to 65536 (which is 2^16 or 16 bits max value) then everything works fine. Which is strange ?
EDIT:
I use codePoint because I need to convert the data as unicode text so that the data is short.
More about codePoint vs charCode with an example:
If we convert 150000 to a character then back to an integer with codePoint:
console.log(String.fromCodePoint("150000").codePointAt(0));
this gives us 150000 which is correct. Doing the same with charCode fails and prints 18928 (and not 150000):
console.log(String.fromCharCode("150000").charCodeAt(0));
That's because higher code point values will yield 2 words, as can be seen in this snippet:
var s = String.fromCodePoint(0x2F804)
console.log(s); // Shows one character
console.log('length = ', s.length); // 2, because encoding is \uD87E\uDC04
var i = s.codePointAt(0);
console.log('CodePoint value at 0: ', i); // correct
var i = s.codePointAt(1); // Should not do this, it starts in the middle of a sequence!
console.log('CodePoint value at 1: ', i); // misleading
In your code things go wrong when you do split, as there the words making up the string are all split, discarding the fact that some pairs are intended to combine into a single character.
You can use the ES6 solution to this, where the spread syntax takes this into account:
let dataBack = [...dataAsText].map((e, i) => {
// etc.
Now your counts will be the same.
Example:
// (Only 20 instead of 200000)
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.map(e => String.fromCodePoint(e)).join("");
console.log("String length: " + dataAsText.length);
let dataBack = [...dataAsText].map(e => e.codePointAt(0));
console.log(dataBack);
Surrogates
Be aware that in the range 0 ... 65535 there are ranges reserved for so-called surrogates, which only represent a character when combined with another value. You should not iterate over those expecting that these values represent a character on their own. So in your original code, this will be another source for error.
To fix this, you should really skip over those values:
for (let i = 0; i < len; i++) {
if (i < 0xd800 || i > 0xdfff) data.push(i);
}
In fact, there are many other code points that do not represent a character.
I have a feeling split doesn't work with unicode values, a quick test above 65536 shows that they become double the length after splitting
Perhaps look at this post and answers, as they ask a similar question
I don't think you want charPointAt (or charCodeAt) at all. To convert a number to a string, just use String; to have a single delimited string with all the values, use a delimiter (like ,); to convert it back to a number, use the appropriate one of Number, the unary +, parseInt, or parseFloat (in your case, Number or + probably):
// Only 20 instead of 200000
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.join(",");
console.log(dataAsText);
let dataBack = dataAsText.split(",").map(Number);
console.log(dataBack);
If your goal with codePointAt is to keep the dataAsText string short, then you can do that, but you can't use split to recreate the array because JavaScript strings are UTF-16 (effectively) and split("") will split at each 16-bit code unit rather than keeping code points together.
A delimiter would help there too:
// Again, only 20 instead of 200000
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.map(e => String.fromCodePoint(e)).join(",");
console.log("String length: " + dataAsText.length);
let dataBack = dataAsText.split(",").map(e => e.codePointAt(0));
console.log(dataBack);
If you're looking for a way to encode a list of integers so that you can safely transmit it over a network, node Buffers with base64 encoding might be a better option:
let data = [];
for (let i = 0; i < 200000; i++) {
data.push(i);
}
// encoding
var ta = new Int32Array(data);
var buf = Buffer.from(ta.buffer);
var encoded = buf.toString('base64');
// decoding
var buf = Buffer.from(encoded, 'base64');
var ta = new Uint32Array(buf.buffer, buf.byteOffset, buf.byteLength >> 2);
var decoded = Array.from(ta);
// same?
console.log(decoded.join() == data.join())
Your original approach won't work because not every integer has a corresponding code point in unicode.
UPD: if you don't need the data to be binary-safe, no need for base64, just store the buffer as is:
// saving
var ta = new Int32Array(data);
fs.writeFileSync('whatever', Buffer.from(ta.buffer));
// loading
var buf = fs.readFileSync('whatever');
var loadedData = Array.from(new Uint32Array(buf.buffer, buf.byteOffset, buf.byteLength >> 2));
// same?
console.log(loadedData.join() == data.join())

Categories