Hash large files with crypto.subtle.digest("SHA-256", buffer) - javascript

i have developed a web application where a user can select multiple files via a input field. Then the sha-256 checksums are calculated by the following code. The code (taken from developer.mozilla.org) only works for small files. What do I have to change to handle large files (e.g. 1GB+) too?
function sha256(buffer){
return crypto.subtle.digest("SHA-256", buffer).then(function (hash) {
return hex(hash);
});
}
function hex(buffer) {
var hexCodes = [];
var view = new DataView(buffer);
for (var i = 0; i < view.byteLength; i += 4) {
// Using getUint32 reduces the number of iterations needed (we process 4 bytes each time)
var value = view.getUint32(i)
// toString(16) will give the hex representation of the number without padding
var stringValue = value.toString(16)
// We use concatenation and slice for padding
var padding = '00000000'
var paddedValue = (padding + stringValue).slice(-padding.length)
hexCodes.push(paddedValue);
}
// Join all the hex strings into one
return hexCodes.join("");
}

Related

Looping through an array that has binary numbers and writing true if the number is 1 or false if the number is 0

I am trying to make a webpage that makes an encrypted letter by first parsing a single character in ascii then parsing the ascii into binary then putting the binary into an array. After putting it into an array I have to loop through the array and write true for "1" or false for "0". Then I have to output to the page. an example of what the output would look like if you put in the letter "a" would be "false,true,true,false,false,false,false,true"
Update: I have added the "loop" in order to make sense of my problem
$(document).ready(function()
{
var output = document.getElementById("output");
var strQuestion = "Enter ONE character, matey!";
var strStandard = "J";
var chrCharacter = "";
var chrLength = 0;
var array = [];
var arrayLength = 0;
while (chrLength != 1)
{
chrCharacter = prompt(strQuestion, strStandard);
chrLength = chrCharacter.length;
}
intAscii = parseAscii(chrCharacter);
strBin = parseBin(intAscii);
array = strBin.split("");
for (i = 0; i < arrayLength; i++ )
{
if (array[i] = 0)
{
array[i] = false;
}
else if (array[i] = 1)
{
array[i] = true;
}
}
output.innerHTML = array;
}); //end document.ready
/*****
Purpose: Converts a character into ascii
Parameters: single character / letter
Return: integer representing an ascii value
*****/
function parseAscii(chrCharacter)
{
intAscii = chrCharacter.charCodeAt(0);
return intAscii;
}
/*****
Purpose: Takes the ascii code and turns it into binary
Parameters: single integer representing an ascii value
Return: binary, base 2 representation of the number passed to this function
*****/
function parseBin(intAscii)
{
strBin = parseInt(intAscii, 10).toString(2);
if(strBin.length < 8)
{
var intPlaceHolders = 8 - strBin.length;
for(var i = 0; i < intPlaceHolders; i++)
{
strBin = "0" + strBin;
}
}
return strBin;
}
I would convert the array with binaries to an array with boolean values wich you can joint together to a string that can be shown on the webpage.
array = [1,1,0,0,1]
// This will map over the items and perform an type conversion
var booleanArray = array.map(Boolean)
// Join all the items together as a string
Var booleanString = booleanArray.join(", ")
output.innerHTML = booleanString
`
I didn't test it, but it should work if I didn't make any typo's.
Btw, I dont think that this is what they ment with looping. But it's definitely a way to get the job done.
If I understand your question correctly, you can convert your array of ones and zeros (binary) to values of ture and false using the map function and using innerHTML to add the output to the DOM:
See example below:
// Populate myBinaryArray using your ascii method to get the follow:
let myBinaryArray = [1, 0, 0, 1, 1, 0, 1];
document.body.innerHTML += myBinaryArray.map(bit => !(!bit));

Convert large array of integers to unicode string and then back to array of integers in node.js

I have some data which is represented as an array of integers and can be up to 200 000 elements. The integer value can vary from 0 to 200 000.
To emulate this data (for debugging purposes) I can do the following:
let data = [];
let len = 200000
for (let i = 0; i < len; i++) {
data[i] = i;
}
To convert this array of integers as an unicode string I perform this:
let dataAsText = data.map((e) => {
return String.fromCodePoint(e);
}).join('');
When I want to convert back to an array of integers the array appears to be longer:
let dataBack = dataAsText.split('').map((e) => {
return e.codePointAt(e);
});
console.log(dataBack.length);
How does it come ? What is wrong ?
Extra information:
I use codePointAt/fromCodePoint because it can deal with all unicode values (up to 21 bits) while charCodeAt/fromCharCode fails.
Using, for example, .join('123') and .split('123') will make that the variable dataBack is the same length as data. But this isn't an elegant solution because the size of the string dataAsText will unnecessarily be very large.
If let len is equal or less to 65536 (which is 2^16 or 16 bits max value) then everything works fine. Which is strange ?
EDIT:
I use codePoint because I need to convert the data as unicode text so that the data is short.
More about codePoint vs charCode with an example:
If we convert 150000 to a character then back to an integer with codePoint:
console.log(String.fromCodePoint("150000").codePointAt(0));
this gives us 150000 which is correct. Doing the same with charCode fails and prints 18928 (and not 150000):
console.log(String.fromCharCode("150000").charCodeAt(0));
That's because higher code point values will yield 2 words, as can be seen in this snippet:
var s = String.fromCodePoint(0x2F804)
console.log(s); // Shows one character
console.log('length = ', s.length); // 2, because encoding is \uD87E\uDC04
var i = s.codePointAt(0);
console.log('CodePoint value at 0: ', i); // correct
var i = s.codePointAt(1); // Should not do this, it starts in the middle of a sequence!
console.log('CodePoint value at 1: ', i); // misleading
In your code things go wrong when you do split, as there the words making up the string are all split, discarding the fact that some pairs are intended to combine into a single character.
You can use the ES6 solution to this, where the spread syntax takes this into account:
let dataBack = [...dataAsText].map((e, i) => {
// etc.
Now your counts will be the same.
Example:
// (Only 20 instead of 200000)
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.map(e => String.fromCodePoint(e)).join("");
console.log("String length: " + dataAsText.length);
let dataBack = [...dataAsText].map(e => e.codePointAt(0));
console.log(dataBack);
Surrogates
Be aware that in the range 0 ... 65535 there are ranges reserved for so-called surrogates, which only represent a character when combined with another value. You should not iterate over those expecting that these values represent a character on their own. So in your original code, this will be another source for error.
To fix this, you should really skip over those values:
for (let i = 0; i < len; i++) {
if (i < 0xd800 || i > 0xdfff) data.push(i);
}
In fact, there are many other code points that do not represent a character.
I have a feeling split doesn't work with unicode values, a quick test above 65536 shows that they become double the length after splitting
Perhaps look at this post and answers, as they ask a similar question
I don't think you want charPointAt (or charCodeAt) at all. To convert a number to a string, just use String; to have a single delimited string with all the values, use a delimiter (like ,); to convert it back to a number, use the appropriate one of Number, the unary +, parseInt, or parseFloat (in your case, Number or + probably):
// Only 20 instead of 200000
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.join(",");
console.log(dataAsText);
let dataBack = dataAsText.split(",").map(Number);
console.log(dataBack);
If your goal with codePointAt is to keep the dataAsText string short, then you can do that, but you can't use split to recreate the array because JavaScript strings are UTF-16 (effectively) and split("") will split at each 16-bit code unit rather than keeping code points together.
A delimiter would help there too:
// Again, only 20 instead of 200000
let data = [];
for (let i = 199980; i < 200000; i++) {
data.push(i);
}
let dataAsText = data.map(e => String.fromCodePoint(e)).join(",");
console.log("String length: " + dataAsText.length);
let dataBack = dataAsText.split(",").map(e => e.codePointAt(0));
console.log(dataBack);
If you're looking for a way to encode a list of integers so that you can safely transmit it over a network, node Buffers with base64 encoding might be a better option:
let data = [];
for (let i = 0; i < 200000; i++) {
data.push(i);
}
// encoding
var ta = new Int32Array(data);
var buf = Buffer.from(ta.buffer);
var encoded = buf.toString('base64');
// decoding
var buf = Buffer.from(encoded, 'base64');
var ta = new Uint32Array(buf.buffer, buf.byteOffset, buf.byteLength >> 2);
var decoded = Array.from(ta);
// same?
console.log(decoded.join() == data.join())
Your original approach won't work because not every integer has a corresponding code point in unicode.
UPD: if you don't need the data to be binary-safe, no need for base64, just store the buffer as is:
// saving
var ta = new Int32Array(data);
fs.writeFileSync('whatever', Buffer.from(ta.buffer));
// loading
var buf = fs.readFileSync('whatever');
var loadedData = Array.from(new Uint32Array(buf.buffer, buf.byteOffset, buf.byteLength >> 2));
// same?
console.log(loadedData.join() == data.join())

Knapsack variant in JavaScript

I have tried to implement this knapsack problem solution algorithm in JavaScript, but the solutions s_opt I get has a total weight greater than the L_max.
What am I doing wrong?
I suspect it could be something related to Closures in recursion.
/*
GENERAL:
Assume we have a knapsack and we want to bring as much stuff as possible.
Of each thing we have several variants to choose from. Each of these variants have
different value and takes different amount of space.
DEFINITIONS:
L_max = integer, size of the knapsack for the entire problem having N items
l = matrix, having the elements l[i-1][j-1] representing the space taken
by variant j of item i (-1 since indexing the matrices has index starting on zero, i.e. item i is stored at position i-1)
p = matrix, having the elements p[i-1][j-1] representing the value given by
by variant j of item i
n = total number of items (used in a sub-problem)
N = total number of items (used in the full problem, N >= n)
s_opt = vector having the optimal combination of variant selections s_i, i.e. s_opt = arg max p_sum
*/
function knapsack(L_max,l,p) {
// constructing (initializing) - they are private members
var self = this; // in order for private functions to be able read variables
this.N = l.length;
var DCached = []; // this is only used by a private function so it doesnt need to made public using this.*
this.s_opt = [];
this.p_mean = null;
this.L_max = L_max;
// define public optimization function for the entire problem
// when this is completed the user can read
// s_opt to get the solution and
// p_mean to know the quality of the solution
this.optimize = function() {
self.p_mean = D(self.N,self.L_max) / Math.max(1,self.N);
}
// define private sub-problem optimization function
var D = function(n,r) {
if (r<0)
return -Infinity;
if (n==0)
return 0;
if(DCached[n-1] != null) {
if(DCached[n-1][r-1] != null) {
return DCached[n-1][r-1];
}
}
var p_max = -Infinity;
var p_sum;
var J = l[n-1].length;
for(var j = 0; j < J; j++) {
p_sum = p[n-1][j] + D( n-1 , r - l[n-1][j] );
if(p_sum>p_max) {
p_max = p_sum;
self.s_opt[n-1] = j;
}
}
DCached[n-1] = [];
DCached[n-1][r-1] = p_max;
return p_max;
}
}
The client using this knapsack solver does the following:
var knapsackSolution = new knapsack(5,l,p);
knapsackSolution.optimize();
// now the client can access knapsackSolution.s_opt containing the solution.
I found a solution. When solving a sub-problem D(n,r) the code in the question returned the optimized value, but it didn't really manage the array s_opt in a proper way. In the modified solution, pasted below, I fixed this. Instead of only returning the optimized value of the knapsack also an array of chosen variants (e.g. the arg of the max) are returned. The cache is also modified to manage these two parts of the solution (both max value and arg max value).
The code below also contains an additional feature addition. The user can now also pass a value maxComputingComplexity controlling the computational size of the problem in some kind of heuristic manner.
/*
GENERAL:
Assume we have a knapsack and we want to bring as much stuff as possible.
Of each thing we have several variants to choose from. Each of these variants have
different value and takes different amount of space.
The quantity of each variant is one.
DEFINITIONS:
L_max = integer, size of the knapsack, e.g. max number of letters, for the entire problem having N items
l = matrix, having the elements l[i-1][j-1] representing the space taken
by variant j of item i (-1 since indexing the matrices has index starting on zero, i.e. item i is stored at position i-1)
p = matrix, having the elements p[i-1][j-1] representing the value given by
by variant j of item i
maxComputingComplexity = value limiting the product L_max*self.N*M_max in order to make the optimization
complete in limited amount of time. It has a serious implication, since it may cut the list of alternatives
so that only the first alternatives are used in the computation, meaning that the input should be well
ordered
n = total number of items (used in a sub-problem)
N = total number of items (used in the full problem, N >= n)
M_i = number of variants of item i
s_i = which variant is chosen to pack of item i
s = vector of elements s_i representing a possible solution
r = maximum total space in the knapsack, i.e. sum(l[i][s_i]) <= r
p_sum = sum of the values of the selected variants, i.e. sum(p[i][s_i]
s_opt = vector having the optimal combination of variant selections s_i, i.e. s_opt = arg max p_sum
In order to solve this, let us see p_sum as a function
D(n,r) = p_sum (just seeing it as a function of the sub-problem n combined with the maximum total space r)
RESULT:
*/
function knapsack(L_max,l,p,maxComputingComplexity) {
// constructing (initializing) - they are private members
var self = this; // in order for private functions to be able read variables
this.N = l.length;
var DCached = []; // this is only used by a private function so it doesnt need to made public using this.*
//this.s_opt = [];
//this.p_mean = null;
this.L_max = L_max;
this.maxComputingComplexity = maxComputingComplexity;
//console.log("knapsack: Creating knapsack. N=" + N + ". L_max=" + L_max + ".");
// object to store the solution (both big problem and sub-problems)
function result(p_max,s_opt) {
this.p_max = p_max; //max value
this.s_opt = s_opt; //arg max value
}
// define public optimization function for the entire problem
// when this is completed the user can read
// s_opt to get the solution and
// p_mean to know the quality of the solution
// computing complexity O(L_max*self.N*M_max),
// think O=L_max*N*M_max => M_max=O/L_max/N => 3=x/140/20 => x=3*140*20 => x=8400
this.optimize = function() {
var M_max = Math.max(maxComputingComplexity / (L_max*self.N),2); //totally useless if not at least two
console.log("optimize: Setting M_max =" + M_max);
return D(self.N,self.L_max,M_max);
//self.p_mean = mainResult.D / Math.max(1,self.N);
// console.log...
}
// Define private sub-problem optimization function.
// The function reads to "global" variables, p and l
// and as arguments it takes
// n delimiting the which sub-set of items to be able to include (from p and l)
// r setting the max space that this sub-set of items may take
// Based on these arguments the function optimizes D
// and returns
// D the max value that can be obtained by combining the things
// s_opt the selection (array of length n) of things optimizing D
var D = function(n,r,M_max) {
// Start by checking whether the value is already cached...
if(DCached[n-1] != null) {
if(DCached[n-1][r-1] != null) {
//console.log("knapsack.D: n=" + n + " r=" + r + " returning from cache.");
return DCached[n-1][r-1];
}
}
var D_result = new result(-Infinity, []); // here we will manage the result
//D_result.s_opt[n-1] = 0; // just put something there to start with
if (r<0) {
//D_result.p_max = -Infinity;
return D_result;
}
if (n==0) {
D_result.p_max = 0;
return D_result;
}
var p_sum;
//self.s_opt[n] = 0; not needed
var J = Math.min(l[n-1].length,M_max);
var D_minusOneResult; //storing the result when optimizing all previous items given a max length
for(var j = 0; j < J; j++) {
D_minusOneResult = D( n-1 , r - l[n-1][j] , M_max)
p_sum = p[n-1][j] + D_minusOneResult.p_max;
if(p_sum > D_result.p_max) {
D_result.p_max = p_sum;
D_result.s_opt = D_minusOneResult.s_opt;
D_result.s_opt[n-1] = j;
}
}
DCached[n-1] = [];
DCached[n-1][r-1] = D_result;
//console.log("knapsack.D: n=" + n + " r=" + r + " p_max= "+ p_max);
return D_result;
}
}

MongoDB ObjectId parsing in JavaScript

Read the link:
http://docs.mongodb.org/manual/reference/object-id/
The link says that the ObjectId will have Time, Machine, Process Id & Counter values.
Then, how to parse a ObjectId in JavaScript and get those details?
In node we can make use of buffers to grab integers from a hex string.
.findOne(cond, function(err, doc){
// create a 12 byte buffer by parsing the id
var ctr = 0;
var b = new Buffer(doc._id.str, 'hex');
// read first 4 bytes as an integer
var epoch = b.readUInt32BE(0);
ctr += 4;
// node doesn't have a utility for 'read 3 bytes' so hack it
var machine = new Buffer([0, b[ctr], b[ctr+1], b[ctr+2]]).readUInt32BE(0);
ctr += 3;
// read the 2 byte process
var process = b.readUInt16BE(ctr);
ctr += 2;
// another 3 byte one
var counter = new Buffer([0, b[ctr], b[ctr+1], b[ctr+2]]).readUInt32BE(0);
});
For driver version <2.2 change doc._id.str to doc._id.toHexString().
The potentially simpler technique is to just use parseInt and slice. Because hex digits are half of a byte our offsets are twice as high.
var id = doc._id.str, ctr = 0;
var epoch = parseInt(id.slice(ctr, (ctr+=8)), 16);
var machine = parseInt(id.slice(ctr, (ctr+=6)), 16);
var process = parseInt(id.slice(ctr, (ctr+=4)), 16);
var counter = parseInt(id.slice(ctr, (ctr+=6)), 16);

Address to WCHAR_T to pass to ReadProcessMemory

I'm having trouble passing a WCHAR_T to ReadProcessMemory
This is how to succesfully pass a pointers address to ReadProcessMemory, I can do it with structures:
remote_tbb = ralloc_alloc(struct_TBButton.size);
var rez = SendMessage(hToolbar, TB_GETBUTTON, i, ctypes.voidptr_t(remote_tbb));
if (!rez) { throw new Error('Failed on SendMessage of TB_GETBUTTON') }
var local_tbb = new struct_TBButton();
var retRead = ralloc_read(remote_tbb, local_tbb.address());
var freed = ralloc_free(remote_tbb);
But now I need to do with WCHAR_T, so this is what I have:
var chars = SendMessage(hToolbar, TB_GETBUTTONTEXTW, local_tbb.idCommand, ctypes.voidptr_t(0));
console.log('chars=', chars, chars.toString(), uneval(chars));
if (chars && parseInt(chars.toString()) > 0) {
var remote_buf = ralloc_alloc(parseInt(chars.toString()));
var charsRe = SendMessage(hToolbar, TB_GETBUTTONTEXTW, local_tbb.idCommand, ctypes.voidptr_t(remote_buf));
console.log('charsRe=', charsRe);
var local_buf = ctypes.jschar; //WCHAR_T
var retRead = ralloc_read(remote_buf, local_buf.address()); ///PROBLEM LINE
console.log('retRead=', retRead);
var freed = ralloc_free(remote_buf);
console.log('freed=', freed);
console.log('Button Text = ', local_buf, local_buf.toString());
} else {
console.log('Button Text = NONE');
}
So my problem is on line:
var retRead = ralloc_read(remote_buf, local_buf.address());`
and it is specifically on the local_buf.address()
Errors in my experimenting that get thrown are:
expected type pointer, got ctypes.jschar
local_buf.address is not a function
So how to pass WCHAR_T as reference?
Edit:
Here is my ralloc_read implemetnation:
function ralloc_read(remote_address, local_buffer) {
var found_addr;
for (var i = 0; i < buffers.length; i++) {
if (buffers[i][0] == remote_address) {
found_addr = buffers[i]
break;
}
}
if (!found_addr) {
return null;
}
/*using the found remote address(found_addr[0]),
*i read size bytes (found_addr[1]) into my local_buffer*/
//console.info('found_addr[0]', found_addr[0].toString());
var rez = ReadProcessMemory(proc, found_addr[0], local_buffer, found_addr[1], 0);
return rez;
}
If ralloc_read calls ReadProcessMemory, then you'll need to allocate a jschar array that will receive the result.
var local_buf = ctypes.jschar.array()(chars);
ralloc_read(remote_buf, local_buf.address());
var str = local_buf.readString();
Edit However, the allocation call is wrong:
ralloc_alloc(parseInt(chars.toString()));
This will allocate chars bytes, e.g. chars = 11, 11 bytes.
A wchar_t/jschar however is not 1 byte but 2 bytes.
ctypes.jschar.size
// 2
So you'll actually need to allocate a remote memory buffer that is larger:
ralloc_alloc(parseInt(chars.toString()) * ctypes.jschar.size);
// That would be ralloc_alloc(count * sizeof(wchar_t*)) in C/C++
The local_buf stuff is correct, though as js-ctypes arrays will automatically calculate the required storage if it knows the size of the array element type, so a ctypes.jschar.array()(11) buffer will actually have 11 elements of size 2 bytes, i.e. 11 items * 2 bytes/item == 22 bytes.

Categories