Javascript Blob object saved as file contains extra bytes - javascript

I have a program which reads an array of bytes. Those bytes are supposed to be ISO-8859-2 decimal codes of characters. My test array has two elements: 103 which is letter g and 179 which is letter ł (l with tail). I then create a Blob object from it and check its content using two methods:
FileReader
objectURL
The first method gives correct results but the second method gives an extra character in the saved blob file.
Here is the code:
var bytes = [103, 179];
var chr1 = String.fromCharCode(bytes[0]);
var chr2 = String.fromCharCode(bytes[1]);
var str = '';
str += chr1;
str += chr2;
console.log(str.charCodeAt(0)); //103
console.log(str.charCodeAt(1)); //179
console.log(str.charCodeAt(2)); //NaN
var blob = new Blob([str]);
console.log(blob.size); //3
//Checking Blob contents using first method - FileReader
var reader = new FileReader();
reader.addEventListener("loadend", function() {
var str1 = this.result;
console.log(str1); //g³
console.log(str1.charCodeAt(0)); //103
console.log(str1.charCodeAt(1)); //179
console.log(str1.charCodeAt(2)); //NaN
});
reader.readAsText(blob);
//Checking Blob contents using second method - objectURL
var url = URL.createObjectURL(blob);
$('<a>',{
text: 'Download the blob',
title: 'Download',
href: url
}).appendTo('#my');
In order to use the second method I created a fiddle. In the fiddle, when you click the "Download" link and save and then open the file in a binary editor, it consists of the following bytes: 103, 194, 179.
My question is, where does the 194 come from and how to create a blob file (using the createobjectURL method) containing only bytes given in the original array ([103, 179] in this case).

The extra 194 comes from an encoding issue :
179 is the unicode code point of "SUPERCRIPT THREE" so the string str will contains "g³". After creating the blob, you will get this string encoded in utf8 : 0x67 for g, 0xC2 0xB3 for ³ (194, 179 in decimal) and it takes 3 bytes. Of course, if you use a FileReader, you will get back 2 characters, "g³".
To avoid that situation (and if you don't want to put everything in utf8), you can use a typed array to construct the blob :
var u8 = new Uint8Array(bytes);
var blob = new Blob([u8]);
That way, you will keep exactly the bytes you want.

Related

How to create a PDF file from any Base64 string?

I want to input any Base64 string to function and get the PDF from there. So tried this way, It download the PDF but there is a error
"Failed to load PDF document."
This is the way I tried,
let data = "SGVsbG8gd29ybGQ=" //hello world
var bufferArray = this.base64ToArrayBuffer(data);
var binary_string = window.atob(data)
var len = bufferArray.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
let blob = new Blob([bytes.buffer], { type: 'application/pdf' })
var url = URL.createObjectURL(blob);
window.open(url);
//convert base64 string to arraybuffer
base64ToArrayBuffer(data) {
var bString = window.atob(data);
var bLength = bString.length;
var bytes = new Uint8Array(bLength);
for (var i = 0; i < bLength; i++) {
var ascii = bString.charCodeAt(i);
bytes[i] = ascii;
}
return bytes;
};
Base64 is not pdf so hello.b64 will never morph into hello.pdf
It needs a pdf header page and trailer in decimal bytes, those cannot be easily added as base64 object wrapping as too many variables.
The text/pdf needs careful script as text to wrap around the hello text see hello example https://stackoverflow.com/a/70748286/10802527
So as Base64 for example
JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgSGVsbG8gV29ybGQgICApJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G
<iframe type="application/pdf" width="95%" height=150 src="data:application/pdf;base64,JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgSGVsbG8gV29ybGQgICApJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G">frame</iframe>
Try above but may be blocked by security it will look like this for some users but not ALL !
In comments you asked how text could be manipulated in java script, and my stock answer is java script cannot generally be easily used to build PDF or edit Base64 content. However if you have prepared placeholders it can be changed by find and replace. But must be done with care as the total file length should never be changed.
As an example take the above as a prior template and switch the content to.
JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgRmFyZS10aGVlLXdlbGwpJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G
So by find and replace SGVsbG8gV29ybGQgICAp with RmFyZS10aGVlLXdlbGwp we get a text change:- (it is important the string length is a multiple of 4 and the length is the same)
<iframe type="application/pdf" width="95%" height=150 src="data:application/pdf;base64,JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgRmFyZS10aGVlLXdlbGwpJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G">frame</iframe>
and the result be
There are strict rules to be followed when using this method:-
Hello World ) is the template, note the inclusion of white space before the ) limit thus
Fare-thee-well) is as far as substitution is allowed in this case
so source field must be pre-planned to be big enough for largest replacement and is based on a plain text length of multiples of 3 (matches base64 blocks of 4)

How to edit a byte at a given index in a "ArrayBuffer"

I can not found a way to change a value in an ArrayBuffer.
I am able to print the content of an ArrayBuffer by using a TextDecoder. When printing the buffer there is a string. I want to change one char on the string by editing one byte on the buffer. I was able to access one byte on the buffer by converting it to an Int8Array but I am not sure on how to converting it again to an ArrayBuffer after editing the byte
var enc = new TextDecoder("utf-8");
console.log('ArrayBuffer string content : ', enc.decode(data));
let view = new Int8Array(data);
view[56] = 48;
view[57] = 48;
// Convert view again to an ArrayBuffer
//console.log('ArrayBuffer string new content : ', enc.decode(data));
You can use the set method on your Int8Array directly :
var enc = new TextDecoder("utf-8");
console.log('ArrayBuffer string content : ', enc.decode(data));
let view = new Int8Array(data);
view.set([48], 56);
view.set([48], 57);
As stated in the top answer:
var enc = new TextDecoder("utf-8");
console.log('ArrayBuffer string content : ', enc.decode(data));
let view = new Int8Array(data);
view.set([48], 56);
view.set([48], 57);
It actually does change the byte value in the data variable.
I spend the last couple of hours trying to figure out how to do this, myself.
Then I finally understood that when you create the new Int8Array(), you're not creating a copy of the data - you're just creating a different type of pointer to it.
It's messy, it's confusing, but it works.
Cheers!

Javascript: how to convert hex data to binary and write it into a file

I have a bunch of hex values and I have to convert it into binary data before write them into a file.
I trasformed the hex string in an array of integers, then I convert each integer to a char:
// bytes contains the integers
str = String.fromCharCode.apply(String, bytes);
now I create the blob file and download it:
var blob = new Blob([str], {type: "application/octet-stream"});
saveAs(blob, "file.bin");
but something goes wrong: if I print the length of bytes and the length of str I have the same value (512), but the file contains 684 chars, and of course it isn't how I expect it.
So I have:
512 pairs of hex values ->
512 integers ->
512 chars ->
I save the file ->
684 chars inside the file.
What am I doing wrong? I even tried to add the charset to the blob file, ie:
var blob = new Blob([str], {type: "application/octet-stream;charset=UTF-8,"});
but with no success.
EDIT:
Original HEX:
Saved file:
Thanks to Andrey I found the solution:
I have to write in binary mode, so:
var ab = new ArrayBuffer(bytes.length); //bytes is the array with the integer
var ia = new Uint8Array(ab);
for (var i = 0; i < bytes.length; i++) {
ia[i] = bytes[i];
}
var blob = new Blob([ia], {type: "application/octet-stream"});
saveAs(blob, id + "_<?php echo $report['md5']; ?>.bin");

Concatenating hex bytes and strings in JavaScript while preserving bytes

I would like to concatenate a hex value and a string using JavaScript.
var hexValue = 0x89;
var png = "PNG";
The string "PNG" is equivalent to the concatenation of 0x50, 0x4E, and 0x47.
Concatenating hexValue and png via
var concatHex = String.fromCharCode(0x89) + String.fromCharCode(0x50)
+ String.fromCharCode(0x4E) + String.fromCharCode(0x47);
...give a result with a byte count of 5 because of the first hex value needing a control character:
C2 89 50 4E 47
I am working with raw image data where I have hexValue and png and need to concatenate them without this control character being included.
Is there a way to trim off the control character?
Given I have an array of bytes, is there a better way to concatenate them and a string while preserving the bytes?
Well i was investigating and i found that in javascript to achieve this eficienly JavaScript typed arrays is used.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Typed_arrays
http://msdn.microsoft.com/en-us/library/br212485(v=vs.94).aspx
Here i wrote a code (not tested) to perform what you want:
var png = "PNG";
var hexValue = 0x89;
var lenInBytes = (png.Length + 1) * 8; //left an extra space to concat hexValue
var buffer = new ArrayBuffer(lenInBytes); //create chunk of memory whose bytes are all pre-initialized to 0
var int8View = new Int8Array(buffer); //treat this memory like int8
for(int i = 0; i < png.Length ; i++)
int8View[i] = png[i] //here convert the png[i] to bytes
//at this point we have the string png as array of bytes
int8View[png.Length] = hexValue //here the concatenation is performed
Well hope it helps.

Downloading generated binary content contains utf-8 encoded chars in disk-file

I am trying to save a generated zip-file to disk from within a chrome extension with the follwing code:
function sendFile (nm, file) {
var a = document.createElement('a');
a.href = window.URL.createObjectURL(file);
a.download = nm; // file name
a.style.display = 'none';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
function downloadZip (nm) {
window.URL = window.webkitURL || window.URL;
var content;
content = zip.generate();
var file = new Blob ([content], {type:'application/base64'});
sendFile ("x.b64", file);
content = zip.generate({base64:false});
var file = new Blob ([content], {type:'application/binary'});
sendFile ("x.zip", file);
}
Currently this saves the contents of my zip in two versions, the first one is base64 encoded, and when I decode it with base64 -d the resulting zip is ok.
The second version should just save the raw data (the zip file), but this raw data arrives utf-8 encoded on my disk. (each value >= 0x80 is preprended with 0xc2). So how to get rid of this utf-8 encoding? Tried various type-strings like application/zip, or ommitting the type info completely, it just arrives always with utf-8 encoding. I am also curious how to make the browser store/convert base64-data (the first case) by itself, so that they arrive as decoded binary data on my disk... I'm using Chrome Version 23.0.1271.95 m
PS: The second content I analysed with a hexdump-utility inside the browser: it does not contain utf-8 encodings (or my hexdump calls something which does implicit conversion). For completeness (sorry, its just transposed from c, so it might not be that cool js-code), I append it here:
function hex (bytes, val) {
var ret="";
var tmp="";
for (var i=0;i<bytes;i++) {
tmp=val.toString (16);
if (tmp.length<2)
tmp="0"+tmp;
ret=tmp+ret;
val>>=8;
}
return ret;
}
function hexdump (buf, len) {
var p=0;
while (p<len) {
line=hex (2,p);
var i;
for (i=0;i<16;i++) {
if (i==8)
line +=" ";
if (p+i<len)
line+=" "+hex(1,buf.charCodeAt(p+i));
else
line+=" ";
}
line+=" |";
for (i=0;i<16;i++) {
if (p+i<len) {
var cc=buf.charCodeAt (p+i);
line+= ((cc>=32)&&(cc<=127)&&(cc!='|')?String.fromCharCode(cc):'.');
}
}
p+=16;
console.log (line);
}
}
From working draft:
If element is a DOMString, run the following substeps:
Let s be the result of converting element to a sequence of Unicode characters [Unicode] using the algorithm for doing so in WebIDL
[WebIDL].
Encode s as UTF-8 and append the resulting bytes to bytes.
So strings are always converted to UTF-8, and there is no parameter to affect this. This doesn't affect base64 strings because they only contain characters that match single byte per codepoint, with the codepoint and byte having the same value. Luckily Blob exposes lower level interface (direct bytes), so that limitation doesn't really matter.
You could do this:
var binaryString = zip.generate({base64: false}), //By glancing over the source I trust the string is in "binary" form
len = binaryString.length, //I.E. having only code points 0 - 255 that represent bytes
bytes = new Uint8Array(len);
for( var i = 0; i < len; ++i ) {
bytes[i] = binaryString.charCodeAt(i);
}
var file = new Blob([bytes], {type:'application/zip'});
sendFile( "myzip.zip", file );

Categories