Downloading generated binary content contains utf-8 encoded chars in disk-file

Downloading generated binary content contains utf-8 encoded chars in disk-file - javascript

I am trying to save a generated zip-file to disk from within a chrome extension with the follwing code:
function sendFile (nm, file) {
var a = document.createElement('a');
a.href = window.URL.createObjectURL(file);
a.download = nm; // file name
a.style.display = 'none';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
function downloadZip (nm) {
window.URL = window.webkitURL || window.URL;
var content;
content = zip.generate();
var file = new Blob ([content], {type:'application/base64'});
sendFile ("x.b64", file);
content = zip.generate({base64:false});
var file = new Blob ([content], {type:'application/binary'});
sendFile ("x.zip", file);
}
Currently this saves the contents of my zip in two versions, the first one is base64 encoded, and when I decode it with base64 -d the resulting zip is ok.
The second version should just save the raw data (the zip file), but this raw data arrives utf-8 encoded on my disk. (each value >= 0x80 is preprended with 0xc2). So how to get rid of this utf-8 encoding? Tried various type-strings like application/zip, or ommitting the type info completely, it just arrives always with utf-8 encoding. I am also curious how to make the browser store/convert base64-data (the first case) by itself, so that they arrive as decoded binary data on my disk... I'm using Chrome Version 23.0.1271.95 m
PS: The second content I analysed with a hexdump-utility inside the browser: it does not contain utf-8 encodings (or my hexdump calls something which does implicit conversion). For completeness (sorry, its just transposed from c, so it might not be that cool js-code), I append it here:
function hex (bytes, val) {
var ret="";
var tmp="";
for (var i=0;i<bytes;i++) {
tmp=val.toString (16);
if (tmp.length<2)
tmp="0"+tmp;
ret=tmp+ret;
val>>=8;
}
return ret;
}
function hexdump (buf, len) {
var p=0;
while (p<len) {
line=hex (2,p);
var i;
for (i=0;i<16;i++) {
if (i==8)
line +=" ";
if (p+i<len)
line+=" "+hex(1,buf.charCodeAt(p+i));
else
line+=" ";
}
line+=" |";
for (i=0;i<16;i++) {
if (p+i<len) {
var cc=buf.charCodeAt (p+i);
line+= ((cc>=32)&&(cc<=127)&&(cc!='|')?String.fromCharCode(cc):'.');
}
}
p+=16;
console.log (line);
}
}

From working draft:
If element is a DOMString, run the following substeps:
Let s be the result of converting element to a sequence of Unicode characters [Unicode] using the algorithm for doing so in WebIDL
[WebIDL].
Encode s as UTF-8 and append the resulting bytes to bytes.
So strings are always converted to UTF-8, and there is no parameter to affect this. This doesn't affect base64 strings because they only contain characters that match single byte per codepoint, with the codepoint and byte having the same value. Luckily Blob exposes lower level interface (direct bytes), so that limitation doesn't really matter.
You could do this:
var binaryString = zip.generate({base64: false}), //By glancing over the source I trust the string is in "binary" form
len = binaryString.length, //I.E. having only code points 0 - 255 that represent bytes
bytes = new Uint8Array(len);
for( var i = 0; i < len; ++i ) {
bytes[i] = binaryString.charCodeAt(i);
}
var file = new Blob([bytes], {type:'application/zip'});
sendFile( "myzip.zip", file );

Related

How to create a PDF file from any Base64 string?

I want to input any Base64 string to function and get the PDF from there. So tried this way, It download the PDF but there is a error
"Failed to load PDF document."
This is the way I tried,
let data = "SGVsbG8gd29ybGQ=" //hello world
var bufferArray = this.base64ToArrayBuffer(data);
var binary_string = window.atob(data)
var len = bufferArray.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
let blob = new Blob([bytes.buffer], { type: 'application/pdf' })
var url = URL.createObjectURL(blob);
window.open(url);
//convert base64 string to arraybuffer
base64ToArrayBuffer(data) {
var bString = window.atob(data);
var bLength = bString.length;
var bytes = new Uint8Array(bLength);
for (var i = 0; i < bLength; i++) {
var ascii = bString.charCodeAt(i);
bytes[i] = ascii;
}
return bytes;
};

Base64 is not pdf so hello.b64 will never morph into hello.pdf
It needs a pdf header page and trailer in decimal bytes, those cannot be easily added as base64 object wrapping as too many variables.
The text/pdf needs careful script as text to wrap around the hello text see hello example https://stackoverflow.com/a/70748286/10802527
So as Base64 for example
JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgSGVsbG8gV29ybGQgICApJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G
<iframe type="application/pdf" width="95%" height=150 src="data:application/pdf;base64,JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgSGVsbG8gV29ybGQgICApJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G">frame</iframe>
Try above but may be blocked by security it will look like this for some users but not ALL !
In comments you asked how text could be manipulated in java script, and my stock answer is java script cannot generally be easily used to build PDF or edit Base64 content. However if you have prepared placeholders it can be changed by find and replace. But must be done with care as the total file length should never be changed.
As an example take the above as a prior template and switch the content to.
JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgRmFyZS10aGVlLXdlbGwpJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G
So by find and replace SGVsbG8gV29ybGQgICAp with RmFyZS10aGVlLXdlbGwp we get a text change:- (it is important the string length is a multiple of 4 and the length is the same)
<iframe type="application/pdf" width="95%" height=150 src="data:application/pdf;base64,JVBERi0xLjIgDQo5IDAgb2JqDQo8PA0KPj4NCnN0cmVhbQ0KQlQvIDMyIFRmKCAgRmFyZS10aGVlLXdlbGwpJyBFVA0KZW5kc3RyZWFtDQplbmRvYmoNCjQgMCBvYmoNCjw8DQovVHlwZSAvUGFnZQ0KL1BhcmVudCA1IDAgUg0KL0NvbnRlbnRzIDkgMCBSDQo+Pg0KZW5kb2JqDQo1IDAgb2JqDQo8PA0KL0tpZHMgWzQgMCBSIF0NCi9Db3VudCAxDQovVHlwZSAvUGFnZXMNCi9NZWRpYUJveCBbIDAgMCAyNTAgNTAgXQ0KPj4NCmVuZG9iag0KMyAwIG9iag0KPDwNCi9QYWdlcyA1IDAgUg0KL1R5cGUgL0NhdGFsb2cNCj4+DQplbmRvYmoNCnRyYWlsZXINCjw8DQovUm9vdCAzIDAgUg0KPj4NCiUlRU9G">frame</iframe>
and the result be
There are strict rules to be followed when using this method:-
Hello World ) is the template, note the inclusion of white space before the ) limit thus
Fare-thee-well) is as far as substitution is allowed in this case
so source field must be pre-planned to be big enough for largest replacement and is based on a plain text length of multiples of 3 (matches base64 blocks of 4)

JAVASCRIPT decode a base64string (which is an encoded zipfile) to a zipfile and get the zipfiles content by name

the question says it all, im receiving a base64 encoded ZIPFILE from the server, which I first want to decode to a ZIPFILE in memory and then get the ZIPFILES content, which is a json-file.
I tried to use JSZIP but im totally lost in this case ... the base64 string is received with javascript by a promise.
So my question in short is: How can I convert a base64 encoded ZIPFILE to a ZIPFILE in memory to get its contents.
BASE64 -> ZIPFILE -> CONTENT
I use this complicated process to save much space on my database. And I dont want to handle this process on server-side, but on clientside with JS.
Thanks in advance!

If anyone is interested in my solution to this problem read my answer right here:
I received the data in a base64-string format, then converted the string to a blob. Then I used the blob-handle to load the zipfile with the JSZip-Library. After that I could just grab the contents of the zipfile. Code is below:
function base64ToBlob(base64) {
let binaryString = window.atob(base64);
let binaryLen = binaryString.length;
let ab = new ArrayBuffer(binaryLen);
let ia = new Uint8Array(ab);
for (let i = 0; i < binaryLen; i++) {
ia[i] = binaryString.charCodeAt(i);
}
let bb = new Blob([ab]);
bb.lastModifiedDate = new Date();
bb.name = "archive.zip";
bb.type = "zip";
return bb;
}
To get the contents of the zipfile:
let blob = base64ToBlob(resolved);
let zip = new JSZip();
zip.loadAsync(blob).then(function(zip) {
zip.file("archived.json").async("string").then(function (content) {
console.log(content);
// content is the file as a string
});
}).catch((e) => {
});
As you can see, first the blob is created from the base64-string. Then the handle is given over to the JSZip loadAsync method. After that you have to set the name of the file which you want to retrieve from the zipfile. In this case it is the file called "archived.json". Now because of the async("string") function the file (file contents) are returned as a string. To further use the extracted string, just work with the content variable.

Javascript: create UTF-16 text file?

I have some string need to be a UTF-16 text file. For example:
var s = "aosjdfkzlzkdoaslckjznx";
var file = "data:text/plain;base64," + btoa(s);
This will result a UTF-8 encoding text file. How can I get a UTF-16 text file with string s?

Related: Javascript to csv export encoding issue
This should do it:
document.getElementById('download').addEventListener('click', function(){
downloadUtf16('Hello, World', 'myFile.csv')
});
function downloadUtf16(str, filename) {
// ref: https://stackoverflow.com/q/6226189
var charCode, byteArray = [];
// BE BOM
byteArray.push(254, 255);
// LE BOM
// byteArray.push(255, 254);
for (var i = 0; i < str.length; ++i) {
charCode = str.charCodeAt(i);
// BE Bytes
byteArray.push((charCode & 0xFF00) >>> 8);
byteArray.push(charCode & 0xFF);
// LE Bytes
// byteArray.push(charCode & 0xff);
// byteArray.push(charCode / 256 >>> 0);
}
var blob = new Blob([new Uint8Array(byteArray)], {type:'text/plain;charset=UTF-16BE;'});
var blobUrl = URL.createObjectURL(blob);
// ref: https://stackoverflow.com/a/18197511
var link = document.createElement('a');
link.href = blobUrl;
link.download = filename;
if (document.createEvent) {
var event = document.createEvent('MouseEvents');
event.initEvent('click', true, true);
link.dispatchEvent(event);
} else {
link.click();
}
}
<button id="download">Download</button>

You can use a legacy polyfill of the native TextEncoder API to transform a JavaScript string into an ArrayBuffer. As you'll see in that documentation, UTF16 with either endianness is was supported. Libraries that provide UTF-16 support in a Text-Encoder-compatible way will probably appear soon, if they haven't already. Let's assume that one such library exposes a constructor called ExtendedTextEncoder.
Then you can easily create a Blob URL to allow users to download the file, without the inefficient base-64 conversion.
Something like this:
s = "aosjdfkzlzkdoaslckjznx"
var encoder = new ExtendedTextEncoder("utf-16be")
var blob = new Blob(encoder.encode(s), "text/plain")
var url = URL.createObjectURL(blob)
Now you can use url instead of your data: URL.

Javascript Blob object saved as file contains extra bytes

I have a program which reads an array of bytes. Those bytes are supposed to be ISO-8859-2 decimal codes of characters. My test array has two elements: 103 which is letter g and 179 which is letter ł (l with tail). I then create a Blob object from it and check its content using two methods:
FileReader
objectURL
The first method gives correct results but the second method gives an extra character in the saved blob file.
Here is the code:
var bytes = [103, 179];
var chr1 = String.fromCharCode(bytes[0]);
var chr2 = String.fromCharCode(bytes[1]);
var str = '';
str += chr1;
str += chr2;
console.log(str.charCodeAt(0)); //103
console.log(str.charCodeAt(1)); //179
console.log(str.charCodeAt(2)); //NaN
var blob = new Blob([str]);
console.log(blob.size); //3
//Checking Blob contents using first method - FileReader
var reader = new FileReader();
reader.addEventListener("loadend", function() {
var str1 = this.result;
console.log(str1); //g³
console.log(str1.charCodeAt(0)); //103
console.log(str1.charCodeAt(1)); //179
console.log(str1.charCodeAt(2)); //NaN
});
reader.readAsText(blob);
//Checking Blob contents using second method - objectURL
var url = URL.createObjectURL(blob);
$('<a>',{
text: 'Download the blob',
title: 'Download',
href: url
}).appendTo('#my');
In order to use the second method I created a fiddle. In the fiddle, when you click the "Download" link and save and then open the file in a binary editor, it consists of the following bytes: 103, 194, 179.
My question is, where does the 194 come from and how to create a blob file (using the createobjectURL method) containing only bytes given in the original array ([103, 179] in this case).

The extra 194 comes from an encoding issue :
179 is the unicode code point of "SUPERCRIPT THREE" so the string str will contains "g³". After creating the blob, you will get this string encoded in utf8 : 0x67 for g, 0xC2 0xB3 for ³ (194, 179 in decimal) and it takes 3 bytes. Of course, if you use a FileReader, you will get back 2 characters, "g³".
To avoid that situation (and if you don't want to put everything in utf8), you can use a typed array to construct the blob :
var u8 = new Uint8Array(bytes);
var blob = new Blob([u8]);
That way, you will keep exactly the bytes you want.

Saving binary data in a browser without it getting UTF8 encoded on download

My web app receives data in the form of a base64 encoded string, which is decodes using atob, and stores via URL.createObjectURL(). This data is then downloaded via the right-click save-as dialog. The downloaded filed always matches the source file when the source file is ascii encoded. However this isn't the case when the source file is just plain binary data. A diff of a non ascii encoded downloaded file vs its source file appears to show that the downloaded file is UTF-8 encoded. How can this problem be fixed? Please note, I'm locked into using firefox 10.

Convert the string to a Arraybuffer and it should work. If there is any way that you can get the data into an array buffer directly without passing a sting that would be the best solution.
The following code is tested in FF10, and are using the now obsolete MozBlobBuilder.
fiddle
var str="",
idx, len,
buf, view, blobbuild, blob, url,
elem;
// create a test string
for (var idx = 0; idx < 256; ++idx) {
str += String.fromCharCode(idx);
}
// create a buffer
buf = new ArrayBuffer(str.length);
view = new Uint8Array(buf);
// convert string to buffer
for (idx = 0, len = str.length; idx < len; ++idx) {
view[idx] = str.charCodeAt(idx);
}
blobbuild = new MozBlobBuilder();
blobbuild.append(buf);
blob = blobbuild.getBlob('application/octet-stream');
url = URL.createObjectURL(blob);
elem = document.createElement('a');
elem.href = url;
elem.textContent = 'Test';
document.body.appendChild(elem);

We Keep Coding

JavaScript is the programming language of the Web.

Downloading generated binary content contains utf-8 encoded chars in disk-file - javascript

Related

How to create a PDF file from any Base64 string?

JAVASCRIPT decode a base64string (which is an encoded zipfile) to a zipfile and get the zipfiles content by name

Javascript: create UTF-16 text file?

Javascript Blob object saved as file contains extra bytes

Saving binary data in a browser without it getting UTF8 encoded on download

Categories

Resources