Load PDF base64 encoded into the viewer.js of PDF.JS

Load PDF base64 encoded into the viewer.js of PDF.JS - javascript

I'm fighting with PDF.JS. All I want is to ignore to add a pdf file in the GET-Paramter, for real.. who does this today??
So my problem is, I'm trying to load a pdf file into my loaded pdf.js-file. I want to use viewer.html and the viewer.js. The file is served as base64-encoded-string. For tests I am loading the base64 code into the html to have directly access over Javascript.
What files I'm loading:
build/pdf.js
build/pdf.worker.js
web/viewer.js
No loading errors
var BASE64_MARKER = ';base64,';
var pdfAsArray = convertDataURIToBinary("data:application/pdf;base64, " + document.getElementById('pdfData').value);
pdfjsLib.getDocument(pdfAsArray).then(function (pdf) {
//var url = URL.createObjectURL(blob);
console.log(pdfjsLib);
pdf.getPage(1).then(function(page) {
// you can now use *page* here
var scale = 1.5;
var viewport = page.getViewport({ scale: scale, });
// Prepare canvas using PDF page dimensions.
var canvas = document.getElementById('viewer');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context.
var renderContext = {
canvasContext: context,
viewport: viewport,
};
page.render(renderContext);
});
//pdfjsLib.load(pdf);
})
function convertDataURIToBinary(dataURI) {
var base64Index = dataURI.indexOf(BASE64_MARKER) + BASE64_MARKER.length;
var base64 = dataURI.substring(base64Index);
var raw = window.atob(base64);
var rawLength = raw.length;
var array = new Uint8Array(new ArrayBuffer(rawLength));
for (i = 0; i < rawLength; i++) {
array[i] = raw.charCodeAt(i);
}
return array;
}
Console.log here is..
{build: "d7afb74a", version: "2.2.228", getDocument: ƒ, LoopbackPort: ƒ, PDFDataRangeTransport: ƒ, …}
The PDF comes right and I can see Javascript gives me a correct console.log. I can see it has 2 pages and more than 1MB data. So I think the code and pdf is okay.
So and now I dont't want to use a fkn canvas. (I only saw tutorials where users working with canvas and not with the viewer.html) I'm not going to work with iframes,canvas or objects. I just want that the viewer.js is taking MY pdf not any other. (example.pdf)
I want that pdf.js is loading the pdf im parsing with PHP and onload it should just appears. PHP is giving the pdf base64 encoded.
I saw the article on the docu of pdf.js: https://github.com/mozilla/pdf.js/wiki/Frequently-Asked-Questions#file
You can use raw binary data to open a PDF document: use Uint8Array instead of URL in the PDFViewerApplication.open call. If you have base64 encoded data, please decode it first -- not all browsers have atob or data URI scheme support. (The base64 conversion operation uses more memory, so we recommend delivering raw PDF data as typed array in first place.)
What a nice tipp. BUT nobody says where you have access to PDFViewerApplication. If I do this:
pdfjsLib.PDFViewerApplication.open(pdfAsArray);
I will get an error like 'open is not a function' (i tried with load() too)
Sry for my bad english, hope you understand my problem and can help me ..

Related

Converting an encoded JPEG image to Uint8Array in JavaScript

I have a python server that sends a frame of RGB data to my JS script via a websocket. The frame is encoded using simplejpeg python package.
The code looks like this:
jpeg_frame = simplejpeg.encode_jpeg(image=color_frame, quality=85,colorspace='RGB', colorsubsampling='444', fastdct=True)
jpeg_frame is passed to the websocket and sent to the JS script.
On the JS side however, I would like to decompress the image and have it in the form of a Uint8Array so that I can work with the data. The image does not have to be viewed.
The data recieved is in the form of ArrayBuffer.
This is what I have so far.
socketio.on('colorFrame',(data)=>{
var mime = 'image/jpeg';
var a = new Uint8Array(data);
var nb = a.length;
if (nb < 4)
return null;
var binary = "";
for (var i = 0; i < nb; i++)
binary += String.fromCharCode(a[i]);
var base64 = window.btoa(binary);
var image = new Image();
image.src = 'data:' + mime + ';base64,' + base64;
var canvas = document.createElement('canvas');
var ctx = canvas.getContext('2d');
ctx.drawImage(image,0,0)
image.onload = function(){
var image_data = ctx.getImageData(0,0, 960, 540);
console.log(image_data);
};
});
So far I could not yet figure out how I can decompress the image. I dont mind the inaccuracy of the lossy compression, I just want the image back to its original resolution and be able to convert it to a Uint8Array.
What is the simplest way to get JPEG decoding working in a JS scrip?

There is an excellent article "Decoding a PNG Image in JavaScript" that answers the literal question. It's a non-trivial answer. (Note: I am not the author of the article)
However
Based on your code, it looks like you might not care about actually decoding the Image yourself. If you just want to display it, take a look at this StackOverflow question

JS: how can I base64 encode a local file without XMLHttpRequest?

I'm trying to base64 encode a local file. It's next to my .js file so there's no uploading going on. Solutions like this (using XMLHttpRequest) get a cross-site scripting error.
I'm trying something like this (which doesn't work but it might help explain my problem):
var file = 'file.jpg'
var reader = new FileReader();
reader.onload = function(e) {
var res = e.target.result;
console.log(res);
};
var f = reader.readAsDataURL(file);
Anyone have any experience doing this locally?

Solutions like this (using XMLHttpRequest) get a cross-site
scripting error.
If using chrome or chromium browser, you could launch with --allow-file-access-from-files flag set to allow request of resource from local filesystem using XMLHttpRequest() or canvas.toDataURL().
You can use <img> element, <canvas> element .toDataURL() to create data URL of local image file without using XMLHttpRequest()
var file = "file.jpg";
var img = new Image;
var canvas = document.createElement("canvas");
var ctx = canvas.getContext("2d");
img.onload = function() {
canvas.width = this.naturalWidth;
canvas.height = this.naturalHeight;
ctx.drawImage(this, 0, 0);
var res = canvas.toDataURL("image/jpeg", 1); // set image `type` to `image/jpeg`
console.log(res);
}
img.src = file;
You could alternatively use XMLHttpRequest() as described at Convert local image to base64 string in Javascript.
See also How to print all the txt files inside a folder using java script .
For a details of difference of returned data URI from either approach see canvas2d toDataURL() different output on different browser
As described by #Kaiido at comment below
it will first decode it, at this stage it's still your file, then it
will paint it to the canvas (now it's just raw pixels) and finally it
will reencode it (it has nothing to do with your original file
anymore) check the dataURI strings... They're compeltely different and
even if you do the canvas operation from two different browsers,
you'll have different outputs, while FileReader will always give you
the same output, since it encode the file directly, it doesn't decode
it.

How to encrypt and decrypt image files online?

I'm working on a web application that involves loading images into a canvas object, then manipulating those images beyond recognition. I need to hide the original source image file (a jpeg) so that the user on the client side should not be able to use dev tools to see the original image.
I have tried to encode the images as a base64 and load it via a JSON data file, but even with this method, the inspector tool still shows the original image file (when it is set as the src of my javascript image object). Is there some way that I can encrypt and decrypt the image files, so that the user has no way of seeing the original image (or have it be some garbled image, for example)? Preferably I'd like to do this on the client side, as all my code is client side at the moment. Thanks in advance!
Here is my code for loading the base64 encoded image data via a JSON file:
//LOAD JSON INSTEAD?
$.getJSON( "media/masks.json", function( data ) {
console.log("media/masks.json LOADED");
//loop through data
var cnt = 0;
for (var key in data)
{
if (data.hasOwnProperty(key))
{
// here you have access to
//var id = key;
var imgData = data[key];
//create image object from data
var image = new Image();
image.src = imgData;
console.log('img src: '+ imgData);
var elementId = $scope.masks[cnt].id;
// copy the images to canvases
imagecanvas = document.createElement('CANVAS');
imagecanvas.width = image.width;
imagecanvas.height = image.height;
imagecanvas.getContext('2d').drawImage(image,0,0);
imageCanvases[elementId] = imagecanvas;
}
cnt++;
}
});
This is what I see in the Chrome dev tools Network inspector (exactly what I'm trying to avoid):

I need to hide the original source image file (a jpeg) so that the user on the client side should not be able to use dev tools to see the original image.
That's not possible. There is always a way to get at the image using developer tools. Even if there wasn't, a simple screen capture would defeat whatever measures you put in place.

Fetching images with JavascriptResponse in CefBrowser (Without downloading twice)

I'm attempting to use CefSharp (Offscreen) to get image information in a webpage. I'd like to avoid downloading the content twice (I'm aware I can pull the src string from an image tag, and then download the image again). Right now, this is my code:
using (var browser = new ChromiumWebBrowser("http://example.com"))
{
//All this does is wait for the entire frame to be loaded.
await LoadPageAsync(browser);
var res1 = await browser.EvaluateScriptAsync("document.getElementsByTagName('img')[0].src");
//res1.Result = the source of the image (A string)
var res2 = await browser.EvaluateScriptAsync("document.getElementsByTagName('img')[0]");
//This causes an APPCRASH on CefSharp.BrowserSubprocess.exe
}
The way I figure it, CefSharp is already downloading these images to render a webpage. I'd like to avoid making a second request to pull these images from the client, and pull them directly from the client. Is this possible? What are the limitations of the JavascriptResponse object, and why is it causing an APPCRASH here?
Some thoughts: I thought about base64 encoding the image and then pulling it out this way, but this would require me to generate a canvas and fill that canvas every time for each image I want, generate a base64 string, bring it to c# as a string, and then decode it back to an image. I don't know how efficient that would be, but I'm hoping there could be a better solution.

This is how I solved it:
result = await browser.EvaluateScriptAsync(#"
;(function() {
var getDataFromImg = function(img) {
var canvas = document.createElement('canvas');
var context = canvas.getContext('2d');
context.drawImage(img, 0, 0 );
var dataURL = canvas.toDataURL('image/png');
return dataURL.replace(/^data:image\/(png|jpg);base64,/, '');
}
var images = document.querySelectorAll('.image');
var finalArray = {};
for ( var i=0; i<images.length; i++ )
{
//I just filled in array. Depending on what you're grabbing, you may want to fill
//This with objects instead with text to identify each image.
finalArray.push(getDataFromDiv(images[i]));
}
return finalArray;
})()");
//Helper function for below
private static string FixBase64ForImage(string image)
{
var sbText = new StringBuilder(image, image.Length);
sbText.Replace("\r\n", string.Empty);
sbText.Replace(" ", string.Empty);
return sbText.ToString();
}
//In c# convert the data to a memory stream, and then load it from that.
var bitmapData = Convert.FromBase64String(FixBase64ForImage(image));
var streamBitmap = new MemoryStream(bitmapData);
var sourceImage = (Bitmap) Image.FromStream(streamBitmap);

Try executing this javascript...
How to get base64 encoded data from html image
CefSharp should have FileReader api.
Then you can have the EvaluateScriptAsync call return the base64 encoded image data.

Using pdf.js to display pdf from raw data

I am just getting started with pdf.js and I am trying to load a pdf file from the raw pdf data. I have seen the code:
PDFJS.getPdf('cwpdf.pdf', function getPdfHelloWorld(data) {
...
}
But I am wondering if there is any way to load a pdf from the raw pdf data instead of from the filename. Is this possible?

I put together some complete code and was able to find the problem with the solution below:
var int8View = new Uint8Array(...); //populate int8View with the raw pdf data
PDFJS.getDocument(int8View).then(function(pdf) {
}
When using this solution I ran into the problem other users have seen (#MurWade and #user94154) - the stream must have data error message. It looks like the problem is in the following line:
var int8View = new Uint8Array(...);
The array containing the data does not get properly created, since the data is not in the expected format. Therefore, this line works for some cases, but it might not work in the general case.
I've put together a complete solution, that seems to work better. It loads a PDF file, and it converts it to a raw PDF stream. This is there just for testing purposes, in a real world example, the PDF stream will probably be received in a different fashion. You can examine the stream in a debugger, and it will show as plain text. Below is the key line of the code to make this sample work. Instead converting the raw PDF stream to an array, convert it to data.
var docInitParams = { data: pdfraw };
Then proceed with loading the data. Below is the complete working sample of how to load a standard raw PDF stream and display it. I used to PDF JS hello world sample as a starting point. Please let me know in the comments if any clarification is necessary on this.
'use strict';
PDFJS.getDocument('helloworld.pdf').then(function(pdf) {
pdf.getData().then(function(arrayBuffer) {
var pdfraw = String.fromCharCode.apply(null, arrayBuffer);
var docInitParams = {
data: pdfraw
};
PDFJS.getDocument(docInitParams).then(function(pdfFromRaw) {
pdfFromRaw.getPage(1).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: context,
viewport: viewport
};
page.render(renderContext);
});
});
});
});

Well, since no one else has answered I will post my findings. I figured out that yes, it is possible to load a pdf file from the raw data. The way this can be done is by using a UInt8Array populated with data in place of the url to where the pdf file is stored.
Example code to do this is below:
var int8View = new Uint8Array(...); //populate int8View with the raw pdf data
PDFJS.getDocument(int8View).then(function(pdf) {
}

We Keep Coding

JavaScript is the programming language of the Web.

Load PDF base64 encoded into the viewer.js of PDF.JS - javascript

Related

Converting an encoded JPEG image to Uint8Array in JavaScript

JS: how can I base64 encode a local file without XMLHttpRequest?

How to encrypt and decrypt image files online?

Fetching images with JavascriptResponse in CefBrowser (Without downloading twice)

Using pdf.js to display pdf from raw data

Categories

Resources