JavaScript CRC32 - javascript

JavaScript CRC32 - javascript

I'm looking for a modern JavaScript implementation of CRC32.
This implementation, which may have originated from here, and is now here, there and everywhere, is unacceptable because it's slow (500ms/MB), and depends on over 2KB of space delimited table, accessed using substr. Yuck!
There appears to be a few variations of CRC32, so I need to match this output:
mysql> SELECT CRC32('abcde');
> 2240272485
Function doesn't actually need to accept a string however, since I'm working with byte arrays.

Update
I added a helper function to create the CRCTable instead of having this enormous literal in the code. It could also be used to create the table once and save it in an object or variable and have the crc32 function use that (or as W3C's example, check for the existence and create if necessary). I also updated the jsPerf to compare using a CRCtable with the literal string, literal array, saved window variable and dynamic pointer (the example shown here).
var makeCRCTable = function(){
var c;
var crcTable = [];
for(var n =0; n < 256; n++){
c = n;
for(var k =0; k < 8; k++){
c = ((c&1) ? (0xEDB88320 ^ (c >>> 1)) : (c >>> 1));
}
crcTable[n] = c;
}
return crcTable;
}
var crc32 = function(str) {
var crcTable = window.crcTable || (window.crcTable = makeCRCTable());
var crc = 0 ^ (-1);
for (var i = 0; i < str.length; i++ ) {
crc = (crc >>> 8) ^ crcTable[(crc ^ str.charCodeAt(i)) & 0xFF];
}
return (crc ^ (-1)) >>> 0;
};
Here's a link to the performance difference: http://jsperf.com/js-crc32
Well here's my ameatur shot at this. I figured reading off an array is faster than substringing it.
Warning though, I forwent the use of the Utf8Encode function in these examples to simplify the tests. After all, these are just examples and pretty rough ones at that.

The code you link to do a substring and parses the result as number for each char. That's very inefficient.
I was able to make it 20 x faster with a simple optimization.
var a_table = "00000000 77073096 EE0E612C 990951BA 076DC419 706AF48F E963A535 9E6495A3 0EDB8832 79DCB8A4 E0D5E91E 97D2D988 09B64C2B 7EB17CBD E7B82D07 90BF1D91 1DB71064 6AB020F2 F3B97148 84BE41DE 1ADAD47D 6DDDE4EB F4D4B551 83D385C7 136C9856 646BA8C0 FD62F97A 8A65C9EC 14015C4F 63066CD9 FA0F3D63 8D080DF5 3B6E20C8 4C69105E D56041E4 A2677172 3C03E4D1 4B04D447 D20D85FD A50AB56B 35B5A8FA 42B2986C DBBBC9D6 ACBCF940 32D86CE3 45DF5C75 DCD60DCF ABD13D59 26D930AC 51DE003A C8D75180 BFD06116 21B4F4B5 56B3C423 CFBA9599 B8BDA50F 2802B89E 5F058808 C60CD9B2 B10BE924 2F6F7C87 58684C11 C1611DAB B6662D3D 76DC4190 01DB7106 98D220BC EFD5102A 71B18589 06B6B51F 9FBFE4A5 E8B8D433 7807C9A2 0F00F934 9609A88E E10E9818 7F6A0DBB 086D3D2D 91646C97 E6635C01 6B6B51F4 1C6C6162 856530D8 F262004E 6C0695ED 1B01A57B 8208F4C1 F50FC457 65B0D9C6 12B7E950 8BBEB8EA FCB9887C 62DD1DDF 15DA2D49 8CD37CF3 FBD44C65 4DB26158 3AB551CE A3BC0074 D4BB30E2 4ADFA541 3DD895D7 A4D1C46D D3D6F4FB 4369E96A 346ED9FC AD678846 DA60B8D0 44042D73 33031DE5 AA0A4C5F DD0D7CC9 5005713C 270241AA BE0B1010 C90C2086 5768B525 206F85B3 B966D409 CE61E49F 5EDEF90E 29D9C998 B0D09822 C7D7A8B4 59B33D17 2EB40D81 B7BD5C3B C0BA6CAD EDB88320 9ABFB3B6 03B6E20C 74B1D29A EAD54739 9DD277AF 04DB2615 73DC1683 E3630B12 94643B84 0D6D6A3E 7A6A5AA8 E40ECF0B 9309FF9D 0A00AE27 7D079EB1 F00F9344 8708A3D2 1E01F268 6906C2FE F762575D 806567CB 196C3671 6E6B06E7 FED41B76 89D32BE0 10DA7A5A 67DD4ACC F9B9DF6F 8EBEEFF9 17B7BE43 60B08ED5 D6D6A3E8 A1D1937E 38D8C2C4 4FDFF252 D1BB67F1 A6BC5767 3FB506DD 48B2364B D80D2BDA AF0A1B4C 36034AF6 41047A60 DF60EFC3 A867DF55 316E8EEF 4669BE79 CB61B38C BC66831A 256FD2A0 5268E236 CC0C7795 BB0B4703 220216B9 5505262F C5BA3BBE B2BD0B28 2BB45A92 5CB36A04 C2D7FFA7 B5D0CF31 2CD99E8B 5BDEAE1D 9B64C2B0 EC63F226 756AA39C 026D930A 9C0906A9 EB0E363F 72076785 05005713 95BF4A82 E2B87A14 7BB12BAE 0CB61B38 92D28E9B E5D5BE0D 7CDCEFB7 0BDBDF21 86D3D2D4 F1D4E242 68DDB3F8 1FDA836E 81BE16CD F6B9265B 6FB077E1 18B74777 88085AE6 FF0F6A70 66063BCA 11010B5C 8F659EFF F862AE69 616BFFD3 166CCF45 A00AE278 D70DD2EE 4E048354 3903B3C2 A7672661 D06016F7 4969474D 3E6E77DB AED16A4A D9D65ADC 40DF0B66 37D83BF0 A9BCAE53 DEBB9EC5 47B2CF7F 30B5FFE9 BDBDF21C CABAC28A 53B39330 24B4A3A6 BAD03605 CDD70693 54DE5729 23D967BF B3667A2E C4614AB8 5D681B02 2A6F2B94 B40BBE37 C30C8EA1 5A05DF1B 2D02EF8D";
var b_table = a_table.split(' ').map(function(s){ return parseInt(s,16) });
function b_crc32 (str) {
var crc = -1;
for(var i=0, iTop=str.length; i<iTop; i++) {
crc = ( crc >>> 8 ) ^ b_table[( crc ^ str.charCodeAt( i ) ) & 0xFF];
}
return (crc ^ (-1)) >>> 0;
};
Performances comparison
JsBin to check it gives the same result

For ready-to-useness, here is a minified version of Alex's answer:
var crc32=function(r){for(var a,o=[],c=0;c<256;c++){a=c;for(var f=0;f<8;f++)a=1&a?3988292384^a>>>1:a>>>1;o[c]=a}for(var n=-1,t=0;t<r.length;t++)n=n>>>8^o[255&(n^r.charCodeAt(t))];return(-1^n)>>>0};
console.log(crc32('abc'));
console.log(crc32('abc').toString(16).toUpperCase()); // hex

I needed this ASAP so I wrote my own. Hope someone finds it useful.
196 bytes (After closure compiler). 16ms/MB
Edit: With improvements from everyone’s input.
var crc32 = (function()
{
var table = new Uint32Array(256);
// Pre-generate crc32 polynomial lookup table
// http://wiki.osdev.org/CRC32#Building_the_Lookup_Table
// ... Actually use Alex's because it generates the correct bit order
// so no need for the reversal function
for(var i=256; i--;)
{
var tmp = i;
for(var k=8; k--;)
{
tmp = tmp & 1 ? 3988292384 ^ tmp >>> 1 : tmp >>> 1;
}
table[i] = tmp;
}
// crc32b
// Example input : [97, 98, 99, 100, 101] (Uint8Array)
// Example output : 2240272485 (Uint32)
return function( data )
{
var crc = -1; // Begin with all bits set ( 0xffffffff )
for(var i=0, l=data.length; i<l; i++)
{
crc = crc >>> 8 ^ table[ crc & 255 ^ data[i] ];
}
return (crc ^ -1) >>> 0; // Apply binary NOT
};
})();

Based on #Denys Séguret's answer, I put together a crc32-adler implementation for a project of mine. It's a bit verbose, but is quite fast:
crc.js:
/* jslint node: true */
'use strict';
const CRC32_TABLE = new Int32Array([
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535,
0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd,
0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d,
0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac,
0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab,
0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb,
0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea,
0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce,
0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409,
0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739,
0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268,
0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0,
0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8,
0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703,
0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7,
0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae,
0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6,
0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d,
0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5,
0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
]);
exports.CRC32 = class CRC32 {
constructor() {
this.crc = -1;
}
update(input) {
input = Buffer.isBuffer(input) ? input : Buffer.from(input, 'binary');
return input.length > 10240 ? this.update_8(input) : this.update_4(input);
}
update_4(input) {
const len = input.length - 3;
let i = 0;
for(i = 0; i < len;) {
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
}
while(i < len + 3) {
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++] ) & 0xff ];
}
}
update_8(input) {
const len = input.length - 7;
let i = 0;
for(i = 0; i < len;) {
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++]) & 0xff ];
}
while(i < len + 7) {
this.crc = (this.crc >>> 8) ^ CRC32_TABLE[ (this.crc ^ input[i++] ) & 0xff ];
}
}
finalize() {
return (this.crc ^ (-1)) >>> 0;
}
};
You can then use this for streams and the like where data may come in chunks. Perform update on each chunk then finalize to get the result.
BTW: I found this to be a good source for checksum validation in addition to the built in *nix crc32 command.

Dystroy optimized, but still Alex is faster. Note that making the string from the document makes an unfair benchmark as the document grows as the testing progresses, since Alex is last he was the most data to process.
Table setup optimization is great but has debatable effect on the use time execution.
http://jsperf.com/alex-variations

Related

Is it possible to reverse final single number output of multiple XOR and bits shifting operations

I have a code written in JS of parsing x,y,z into 1 number.
Is it possible to somehow revert the operation and get x,y,z back by only knowing the final number and the operations made on it?
I have hardcoded x,y,z in the rever function in order to test the reverse process and it works. But what I need is getting the x,y,z back from the parsedOutput
let ParseWithXor = () => {
let x = 25;
let y = 8;
let z = 110;
let finalOutput = 0;
finalOutput = finalOutput ^ (x << 9);
console.log(` finalOutput ^ (${x} << 9) = ${finalOutput}`);
finalOutput = finalOutput ^ (y << 5);
console.log(` finalOutput ^ (${y} << 5) = ${finalOutput}`);
finalOutput = finalOutput ^ z;
console.log(`finalOutput ^ ${z} = ${finalOutput}`);
return finalOutput;
};
let Revert = (parsedOutput) => {
console.log(parsedOutput);
parsedOutput = parsedOutput ^ 110;
console.log(parsedOutput);
parsedOutput = parsedOutput ^ (8 << 5);
console.log(parsedOutput);
parsedOutput = parsedOutput ^ (25 << 9);
console.log(parsedOutput);
};
ParseWithXor();
console.log("-------------------------------------");
Revert(13166);
finalOutput ^ (25 << 9) = 12800
finalOutput ^ (8 << 5) = 13056
finalOutput ^ 110 = 13166
--------------------------------------
13166
13056
12800
0

if you xor an integer with integer twice you will get original number
(a ^ b) ^ b = a
it does not matter the order of xor operations
a ^ b ^ c = b ^ a ^ c
so if you have
a ^ c0 ^ c1 ^ c2 = b
then
a = b ^ c0 ^ c1 ^ c2
so the answer is yes you just xor back in reverse order to obtain the sub results... or in any order if you want just original value.
so you have:
w0 = 0;
w1 = w0 ^ (x << 9);
w2 = w1 ^ (y << 5);
w3 = w2 ^ (z );
I would revers it like this:
w3 = ...;
// x,y,z from w(i) | w(i) from x,y,z
// ---------------------------------------
z = (w3 ^ w2); | w2 = w3 ^ (z );
y = (w2 ^ w1) >> 5; | w1 = w2 ^ (y << 5);
x = (w1 ^ w0) >> 9; | w0 = w1 ^ (x << 9);
// x,y,z from w3,w0 but x,y,z must not overlap bits
// z = <0,31>
// y = <0,15>
// x = <0,(max/512)-1>
// ----------------------------------------------
w = w0 ^ w3;
z = w & 31; w >>= 5;
y = w & 15; w >>= 4;
x = w;

How to do popcount or count bits on arbitrarily long bit sequence in JavaScript

This looks like it works well on integers that are within the max integer size in JavaScript:
function bitCount (n) {
var bits = 0
while (n !== 0) {
bits += bitCount32(n | 0)
n /= 0x100000000
}
return bits
}
function bitCount32 (n) {
n = n - ((n >> 1) & 0x55555555)
n = (n & 0x33333333) + ((n >> 2) & 0x33333333)
return ((n + (n >> 4) & 0xF0F0F0F) * 0x1010101) >> 24
}
I'm wondering though how to count bits generally, on any sized bit stream, efficiently, ideally without converting to string.

The comment with the lookup table, roughly:
var lookup=new Uint8Array(256);
for(var i=0;i<256;i++){
var c=0;
for(var j=i;j;j>>=1)
if(j&1)c++;
lookup[i]=c;
}
function count(arr){
var arr8=new Uint8Array(arr);
return arr8.reduce((a,e)=>a+lookup[e],0);
}
console.log(count(new Uint8Array([0x12,0x34,0x56,0x78,0x9A,0xBC,0xDE,0xF])));
//11 21 22 31 22 32 33 4 = 32
Of course the table-generation could use your magic too.

If you know your buffer will have length divisible by 4,
let array32 = new Uint32Array(buffer);
let numBits = array32.reduce((a, e) => a + bitCount32(e), 0);
Otherwise probably tevemadar's suggestion is better, use Uint8Array and count bits in a byte, not in a dword.

De-interlace bytes

Given an interlaced bit sequence of:
ABABABABABABABAB
What javascript bitwise operation can I use to convert it to be in the sequence:
AAAAAAAABBBBBBBB

That's known as an unshuffle (see also Hacker's Delight 7.2, shuffling bits).
The algorithm given in Hacker's Delight is:
t = (x ^ (x >> 1)) & 0x22222222; x = x ^ t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F0; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF00; x = x ^ t ^ (t << 8);
Those right shifts can be either logical or arithmetic, the AND with the mask ensures that bits affected by that difference do no appear in t anyway.
This is for 32bit numbers, for 16 bit numbers you can chop off the left half of every mask and skip the last step.
This is a sequence of delta swaps, see The Art of Computer Programming volume 4A, Bitwise tricks and techniques, bitswapping.

Check out this algorithm, if it's good for you:
function deinterlace(input) {
var maskOdd = 1;
var maskEven = 2;
var result = 0;
for (var i = 0; i < 8; i++) {
result = result << 1;
if(maskOdd & input) {
result += 1;
}
maskOdd = maskOdd << 2;
}
for (var j = 0; j < 8; j++) {
result = result << 1;
if(maskEven & input) {
result += 1;
console.log(result);
}
}
return result;
}
Working fiddle.

Compressing a blob in javascript

I need to send a blob to the server with ajax, but it can end up getting somewhat large, and I'd like to decrease upload time. I've tried jszip already, but that just gave me an empty file inside the zip. I've also tried btoa(), but it turns out that the encoded value just ends up being [object Blob] instead of the actual blob data. What are my options for compressing blobs?
Here was the code I used for jszip:
var zip = new JSZip();
zip.file("recording.wav", blobFile);
var content = zip.generate();
I then appended "content" to a FormData object and sent it to the server. On the server side, I decoded the POST data (from base64). The zip file opened just fine, but recording.wav was a 0 length file.
Additionally, I've tried using the LZW implementation found here. This was the additional code I used to compress it:
var compressed;
var reader = new FileReader();
reader.onload = function(event){
compressed = LZW.compress(event.target.result);
};
reader.readAsText(blobFile);
However, decompressing it returns null.

Caveat: compressing things like audio files would be better done using an algorithm meant specifically for that type of data, perhaps something lossy. However, knowing how hard it was to find a reasonable lossless implementation as provided below, I'm very concerned that it will be hard to find a good implementation in Javascript for that type of data specifically that meets your needs.
In any case, I've had this general need for compression/decompression in Javascript as well, and I needed the same algorithm to work both client (browser) and server-side (node.js) and I needed it to work on very large files. I had checked out jszip and I also tried that LZW algorithm among at least five or six others none of which satisfied the requirements. I can't remember what the issue was with each specifically, but suffice to say it is surprisingly hard to find a good and FAST compressor/decompressor in javascript that works both server and client side and handles large files! I tried at least a dozen different implementations of various compression algorithms, and finally settled with this one - it hasn't failed me yet!
UPDATE
This is the original source:
https://code.google.com/p/jslzjb/source/browse/trunk/Iuppiter.js?r=2
By someone named Bear - thanks Bear, whoever you are, you're the best.
It is LZJB: http://en.wikipedia.org/wiki/LZJB
UPDATE 2
Corrected a problem with missing semicolon - should not give the object not a function error any longer.
This implementation stops working on data less than about 80 characters in length. So I updated the example to reflect that.
Realized the base64 encode/decode methods are in fact exposed on the object passed in for this version, so...
Currently seeing what we can do about specific blob types - what for example the best approach would be for a image versus audio etc as that would be useful for JS folks in general... will update here with what is found.
UPDATE 3
There is a much better wrapper around the original Iuppiter source from Bear than the one I posted below. It is written by cscott and on github here: https://github.com/cscott/lzjb
I'll be switching to this one, as it does streams as well.
Below is an example in Node.js of its use with a wav file. But before copying the example, let me give you the terrible news first, at least for this one wav file that I tried:
63128 Jun 19 14:09 beep-1.wav
63128 Jun 19 17:47 beep-2.wav
89997 Jun 19 17:47 beep-2.wav.compressed
So it successfully regenerated the wav (and it played). However, the compressed one appears to be larger than the original. Well shoot. In any case, might be good to try on your data, you never know, you might get lucky. Here's the code I used:
var fs = require('fs');
var lzjb = require('lzjb');
fs.readFile('beep-1.wav', function(err, wav){
// base 64 first
var encoded = wav.toString('base64');
// then utf8 - you don't want to go utf-8 directly
var data = new Buffer(encoded, 'utf8');
// now compress
var compressed = lzjb.compressFile(data, null, 9);
// the next two lines are unnecessary, but to see what kind of
// size is written to disk to compare with the original binary file
var compressedBuffer = new Buffer(compressed, 'binary');
fs.writeFile('beep-2.wav.compressed', compressedBuffer, 'binary', function(err) {});
// decompress
var uncompressed = lzjb.decompressFile(compressed);
// decode from utf8 back to base64
var encoded2 = new Buffer(uncompressed).toString('utf8');
// decode back to binary original from base64
var decoded = new Buffer(encoded2, 'base64');
// write it out, make sure it is identical
fs.writeFile('beep-2.wav', decoded, function(err) {});
});
At the end of the day, I think its going to be too difficult to achieve any level of compression on most forms of binary data that isn't clobbered by the resulting base64 encoding. The days of control characters for terminals still haunt us to this day. You could try upping to a different base, but that has its risks and issues as well.
See this for example:
What is the most efficient binary to text encoding?
And this:
Why don't people use base128?
One thing though, definitely before you accept the answer, please please try it out on your blob, I've mainly used it for compressing utf-8, and I'd like to be sure it works on your specific data.
In any case, here it is!
/**
$Id: Iuppiter.js 3026 2010-06-23 10:03:13Z Bear $
Copyright (c) 2010 Nuwa Information Co., Ltd, and individual contributors.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of Nuwa Information nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
$Author: Bear $
$Date: 2010-06-23 18:03:13 +0800 (星期三, 23 六月 2010) $
$Revision: 3026 $
*/
var fastcompressor = {};
(function (k) {
k.toByteArray = function (c) {
var h = [],
b, a;
for (b = 0; b < c.length; b++) a = c.charCodeAt(b), 127 >= a ? h.push(a) : (2047 >= a ? h.push(a >> 6 | 192) : (65535 >= a ? h.push(a >> 12 | 224) : (h.push(a >> 18 | 240), h.push(a >> 12 & 63 | 128)), h.push(a >> 6 & 63 | 128)), h.push(a & 63 | 128));
return h
};
k.Base64 = {
CA: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
CAS: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
IA: Array(256),
IAS: Array(256),
init: function () {
var c;
for (c = 0; 256 > c; c++) k.Base64.IA[c] = -1, k.Base64.IAS[c] = -1;
c = 0;
for (iS = k.Base64.CA.length; c < iS; c++) k.Base64.IA[k.Base64.CA.charCodeAt(c)] = c, k.Base64.IAS[k.Base64.CAS.charCodeAt(c)] = c;
k.Base64.IA["="] = k.Base64.IAS["="] = 0
},
encode: function (c, h) {
var b, a, d, e, m, g, f, l, j;
b = h ? k.Base64.CAS : k.Base64.CA;
d = c.constructor == Array ? c : k.toByteArray(c);
e = d.length;
m = 3 * (e / 3);
g = (e - 1) / 3 + 1 << 2;
a = Array(g);
for (l = f = 0; f < m;) j = (d[f++] & 255) << 16 | (d[f++] & 255) << 8 | d[f++] & 255, a[l++] = b.charAt(j >> 18 & 63), a[l++] = b.charAt(j >> 12 & 63), a[l++] = b.charAt(j >> 6 & 63), a[l++] = b.charAt(j & 63);
f = e - m;
0 < f && (j = (d[m] &
255) << 10 | (2 == f ? (d[e - 1] & 255) << 2 : 0), a[g - 4] = b.charAt(j >> 12), a[g - 3] = b.charAt(j >> 6 & 63), a[g - 2] = 2 == f ? b.charAt(j & 63) : "=", a[g - 1] = "=");
return a.join("")
},
decode: function (c, h) {
var b, a, d, e, m, g, f, l, j, p, q, n;
b = h ? k.Base64.IAS : k.Base64.IA;
c.constructor == Array ? (d = c, m = !0) : (d = k.toByteArray(c), m = !1);
e = d.length;
g = 0;
for (f = e - 1; g < f && 0 > b[d[g]];) g++;
for (; 0 < f && 0 > b[d[f]];) f--;
l = "=" == d[f] ? "=" == d[f - 1] ? 2 : 1 : 0;
a = f - g + 1;
j = 76 < e ? ("\r" == d[76] ? a / 78 : 0) << 1 : 0;
e = (6 * (a - j) >> 3) - l;
a = Array(e);
q = p = 0;
for (eLen = 3 * (e / 3); p < eLen;) n = b[d[g++]] << 18 | b[d[g++]] <<
12 | b[d[g++]] << 6 | b[d[g++]], a[p++] = n >> 16 & 255, a[p++] = n >> 8 & 255, a[p++] = n & 255, 0 < j && 19 == ++q && (g += 2, q = 0);
if (p < e) {
for (j = n = 0; g <= f - l; j++) n |= b[d[g++]] << 18 - 6 * j;
for (b = 16; p < e; b -= 8) a[p++] = n >> b & 255
}
if (m) return a;
for (n = 0; n < a.length; n++) a[n] = String.fromCharCode(a[n]);
return a.join("")
}
};
k.Base64.init();
NBBY = 8;
MATCH_BITS = 6;
MATCH_MIN = 3;
MATCH_MAX = (1 << MATCH_BITS) + (MATCH_MIN - 1);
OFFSET_MASK = (1 << 16 - MATCH_BITS) - 1;
LEMPEL_SIZE = 256;
k.compress = function (c) {
var h = [],
b, a = 0,
d = 0,
e, m, g = 1 << NBBY - 1,
f, l, j = Array(LEMPEL_SIZE);
for (b = 0; b < LEMPEL_SIZE; b++) j[b] =
3435973836;
c = c.constructor == Array ? c : k.toByteArray(c);
for (b = c.length; a < b;) {
if ((g <<= 1) == 1 << NBBY) {
if (d >= b - 1 - 2 * NBBY) {
f = b;
for (d = a = 0; f; f--) h[d++] = c[a++];
break
}
g = 1;
m = d;
h[d++] = 0
}
if (a > b - MATCH_MAX) h[d++] = c[a++];
else if (e = (c[a] + 13 ^ c[a + 1] - 13 ^ c[a + 2]) & LEMPEL_SIZE - 1, l = a - j[e] & OFFSET_MASK, j[e] = a, e = a - l, 0 <= e && e != a && c[a] == c[e] && c[a + 1] == c[e + 1] && c[a + 2] == c[e + 2]) {
h[m] |= g;
for (f = MATCH_MIN; f < MATCH_MAX && c[a + f] == c[e + f]; f++);
h[d++] = f - MATCH_MIN << NBBY - MATCH_BITS | l >> NBBY;
h[d++] = l;
a += f
} else h[d++] = c[a++]
}
return h
};
k.decompress = function (c,
h) {
var b, a = [],
d, e = 0,
m = 0,
g, f, l = 1 << NBBY - 1,
j;
b = c.constructor == Array ? c : k.toByteArray(c);
for (d = b.length; e < d;) {
if ((l <<= 1) == 1 << NBBY) l = 1, f = b[e++];
if (f & l)
if (j = (b[e] >> NBBY - MATCH_BITS) + MATCH_MIN, g = (b[e] << NBBY | b[e + 1]) & OFFSET_MASK, e += 2, 0 <= (g = m - g))
for (; 0 <= --j;) a[m++] = a[g++];
else break;
else a[m++] = b[e++]
}
if (!("undefined" == typeof h ? 0 : h)) {
for (b = 0; b < m; b++) a[b] = String.fromCharCode(a[b]);
a = a.join("")
}
return a
}
})(fastcompressor);
And if memory serves... here's how you use it:
var compressed = fastcompressor.compress("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"); // data less than this length poses issues.
var decompressed = fastcompressor.decompress(compressed);
Rgds....Hoonto/Matt
Also, what I've posted is minified but beautified, and very slightly adapted for ease-of-use. Check the link in the update above for the original stuff.

JS Zip will work fine just correct your syntax..
function create_zip() {
var zip = new JSZip();
zip.add("recording.wav", blobfile);//here you have to give blobFile in the form of raw bits >> convert it in json notation.. or stream ..
zip.add("hello2.txt", "Hello Second World\n");//this is optional..
content = zip.generate();
location.href="data:application/zip;base64," + content;
}
you can add multiple files too..
Just zip.file will become zip.add
and then zip.generate() will do the rest.. as you have done,
or refer old Post its last part of JavaScript, and NativeBridge will be helpful if you can utilize, in this post user records using Objective C that you can ignore, but sends this object using JavaScript and Socket that you can/may utilize..
I hope this will do ... :)

How to decode byte of array data using javascript [duplicate]

How do I convert a byte array into a string?
I have found these functions that do the reverse:
function string2Bin(s) {
var b = new Array();
var last = s.length;
for (var i = 0; i < last; i++) {
var d = s.charCodeAt(i);
if (d < 128)
b[i] = dec2Bin(d);
else {
var c = s.charAt(i);
alert(c + ' is NOT an ASCII character');
b[i] = -1;
}
}
return b;
}
function dec2Bin(d) {
var b = '';
for (var i = 0; i < 8; i++) {
b = (d%2) + b;
d = Math.floor(d/2);
}
return b;
}
But how do I get the functions working the other way?
Thanks.
Shao

You need to parse each octet back to number, and use that value to get a character, something like this:
function bin2String(array) {
var result = "";
for (var i = 0; i < array.length; i++) {
result += String.fromCharCode(parseInt(array[i], 2));
}
return result;
}
bin2String(["01100110", "01101111", "01101111"]); // "foo"
// Using your string2Bin function to test:
bin2String(string2Bin("hello world")) === "hello world";
Edit: Yes, your current string2Bin can be written more shortly:
function string2Bin(str) {
var result = [];
for (var i = 0; i < str.length; i++) {
result.push(str.charCodeAt(i).toString(2));
}
return result;
}
But by looking at the documentation you linked, I think that the setBytesParameter method expects that the blob array contains the decimal numbers, not a bit string, so you could write something like this:
function string2Bin(str) {
var result = [];
for (var i = 0; i < str.length; i++) {
result.push(str.charCodeAt(i));
}
return result;
}
function bin2String(array) {
return String.fromCharCode.apply(String, array);
}
string2Bin('foo'); // [102, 111, 111]
bin2String(string2Bin('foo')) === 'foo'; // true

ES6 update
Now, string 'foo' also equals
String.fromCharCode(...[102, 111, 111])
Original answer
Simply apply your byte array to String.fromCharCode. For example
String.fromCharCode.apply(null, [102, 111, 111])
equals 'foo'.
MDN docs here.
Caveat: works for arrays shorter than 65535 - MDN docs here.

Try the new Text Encoding API:
// create an array view of some valid bytes
let bytesView = new Uint8Array([104, 101, 108, 108, 111]);
console.log(bytesView);
// convert bytes to string
// encoding can be specfied, defaults to utf-8 which is ascii.
let str = new TextDecoder().decode(bytesView);
console.log(str);
// convert string to bytes
// encoding can be specfied, defaults to utf-8 which is ascii.
let bytes2 = new TextEncoder().encode(str);
// look, they're the same!
console.log(bytes2);
console.log(bytesView);

This should work:
String.fromCharCode(...array);
Or
String.fromCodePoint(...array)

That string2Bin can be written even more succinctly, and without any loops, to boot!
function string2Bin ( str ) {
return str.split("").map( function( val ) {
return val.charCodeAt( 0 );
} );
}

String to byte array: "FooBar".split('').map(c => c.charCodeAt(0));
Byte array to string: [102, 111, 111, 98, 97, 114].map(c => String.fromCharCode(c)).join('');

I think this would be more efficient:
function toBinString (arr) {
var uarr = new Uint8Array(arr.map(function(x){return parseInt(x,2)}));
var strings = [], chunksize = 0xffff;
// There is a maximum stack size. We cannot call String.fromCharCode with as many arguments as we want
for (var i=0; i*chunksize < uarr.length; i++){
strings.push(String.fromCharCode.apply(null, uarr.subarray(i*chunksize, (i+1)*chunksize)));
}
return strings.join('');
}

Even if I'm a bit late, I thought it would be interesting for future users to share some one-liners implementations I did using ES6.
One thing that I consider important depending on your environment or/and what you will do with with the data is to preserve the full byte value. For example, (5).toString(2) will give you 101, but the complete binary conversion is in reality 00000101, and that's why you might need to create a leftPad implementation to fill the string byte with leading zeros. But you may not need it at all, like other answers demonstrated.
If you run the below code snippet, you'll see the first output being the conversion of the abc string to a byte array and right after that the re-transformation of said array to it's corresponding string.
// For each byte in our array, retrieve the char code value of the binary value
const binArrayToString = array => array.map(byte => String.fromCharCode(parseInt(byte, 2))).join('')
// Basic left pad implementation to ensure string is on 8 bits
const leftPad = str => str.length < 8 ? (Array(8).join('0') + str).slice(-8) : str
// For each char of the string, get the int code and convert it to binary. Ensure 8 bits.
const stringToBinArray = str => str.split('').map(c => leftPad(c.charCodeAt().toString(2)))
const array = stringToBinArray('abc')
console.log(array)
console.log(binArrayToString(array))

If your array is encoded in UTF-8 and you can't use the TextDecoder API because it is not supported on IE:
You can use the FastestSmallestTextEncoderDecoder polyfill recommended by the Mozilla Developer Network website;
You can use this function also provided at the MDN website:
function utf8ArrayToString(aBytes) {
var sView = "";
for (var nPart, nLen = aBytes.length, nIdx = 0; nIdx < nLen; nIdx++) {
nPart = aBytes[nIdx];
sView += String.fromCharCode(
nPart > 251 && nPart < 254 && nIdx + 5 < nLen ? /* six bytes */
/* (nPart - 252 << 30) may be not so safe in ECMAScript! So...: */
(nPart - 252) * 1073741824 + (aBytes[++nIdx] - 128 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 247 && nPart < 252 && nIdx + 4 < nLen ? /* five bytes */
(nPart - 248 << 24) + (aBytes[++nIdx] - 128 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 239 && nPart < 248 && nIdx + 3 < nLen ? /* four bytes */
(nPart - 240 << 18) + (aBytes[++nIdx] - 128 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 223 && nPart < 240 && nIdx + 2 < nLen ? /* three bytes */
(nPart - 224 << 12) + (aBytes[++nIdx] - 128 << 6) + aBytes[++nIdx] - 128
: nPart > 191 && nPart < 224 && nIdx + 1 < nLen ? /* two bytes */
(nPart - 192 << 6) + aBytes[++nIdx] - 128
: /* nPart < 127 ? */ /* one byte */
nPart
);
}
return sView;
}
let str = utf8ArrayToString([50,72,226,130,130,32,43,32,79,226,130,130,32,226,135,140,32,50,72,226,130,130,79]);
// Must show 2H₂ + O₂ ⇌ 2H₂O
console.log(str);

If you are using node.js you can do this:
yourByteArray.toString('base64');

Too late to answer but if your input is in form of ASCII bytes, then you could try this solution:
function convertArrToString(rArr){
//Step 1: Convert each element to character
let tmpArr = new Array();
rArr.forEach(function(element,index){
tmpArr.push(String.fromCharCode(element));
});
//Step 2: Return the string by joining the elements
return(tmpArr.join(""));
}
function convertArrToHexNumber(rArr){
return(parseInt(convertArrToString(rArr),16));
}

I had some decrypted byte arrays with padding characters and other stuff I didn't need, so I did this (probably not perfect, but it works for my limited use)
var junk = String.fromCharCode.apply(null, res).split('').map(char => char.charCodeAt(0) <= 127 && char.charCodeAt(0) >= 32 ? char : '').join('');

> const stringToBin = (str) => [...str].map(item=>item.charCodeAt())
> undefined
> stringToBin('hello')
> (5) [104, 101, 108, 108, 111]
> const binToString = (array) => String.fromCharCode(...array)
> undefined
> binToString(stringToBin('hello'))
> 'hello'

What you are looking for is String.fromCharCode
What you want to do is loop through the array of bytes (represented as integers), create the string equivalent and add it to the result:
function bin2String(array) {
var result = "";
for (const char of array) {
result += String.fromCharCode(char);
}
return result;
}
console.log(bin2String([116, 104, 101, 32, 114, 101, 115, 117, 108, 116]));
You can also use the Array.Map function to convert the array of bytes into an array of strings, then join them all.
function string2Bin(array) {
return array.map(byte => String.fromCharCode(byte)).join("");
}
console.log(string2Bin([116, 104, 101, 32, 114, 101, 115, 117, 108, 116]));

UPDATE
#rosberg-linhares posted best solution so far to handle UTF8.
Didn't find any solution that would work with UTF-8 characters. String.fromCharCode is good until you meet 2 byte character.
For example word Hüser can come over the wire in form of arraybuffer as [0x48,0xc3,0xbc,0x73,0x65,0x72] (e.g. through websocket connection)
But if you go through it with String.fromCharCode you will have HÃ¼ser as each byte will be converted to a char separately, and letter ü is encoded in two bytes.
Solution
Currently I'm using following solution:
function pad(n) { return (n.length < 2 ? '0' + n : n); }
function decodeUtf8(data) {
return decodeURIComponent(
data.map(byte => ('%' + pad(byte.toString(16)))).join('')
);
}

The simplest solution I've found is:
var text = atob(byteArray);

We Keep Coding

JavaScript is the programming language of the Web.

JavaScript CRC32 - javascript

Related

Is it possible to reverse final single number output of multiple XOR and bits shifting operations

How to do popcount or count bits on arbitrarily long bit sequence in JavaScript

De-interlace bytes

Compressing a blob in javascript

How to decode byte of array data using javascript [duplicate]

Categories

Resources