Convert a PDF to thumbnail using PDF.JS - javascript

I am trying to write some code that converts the first page of a PDF to a PNG thumbnail. I have found several examples here on stackoverflow and other place on the web and it seems like a simple thing to do, but I cannot make it work.
I have a HTML file that looks like this
<!DOCTYPE html>
<html>
<head>
<title>JR PDF test</title>
</head>
<body>
<script type="text/javascript" src="/build/pdf.js"></script>
<script type="text/javascript" src="jr-pdf.js"></script>
<script type="text/javascript" src="canvas2image.js"></script>
</body>
<div id="pdf-main-container">
<a id="download-image" href="#">Download PNG</a>
</div>
</div>
</html>
<canvas id="the-canvas"></canvas>
and a the jr-pdf.js looks like this
// URL of PDF document
var url = "REGLEMENT_FOR_RALLY_2012.pdf";
// Asynchronous download PDF
PDFJS.getDocument(url)
.then(function(pdf) {
return pdf.getPage(1);
})
.then(function(page) {
// Set scale (zoom) level
var scale = 1.5;
// Get viewport (dimensions)
var viewport = page.getViewport(scale);
// Get canvas#the-canvas
var canvas = document.getElementById('the-canvas');
// Fetch canvas' 2d context
var context = canvas.getContext('2d');
// Set dimensions to Canvas
canvas.height = viewport.height;
canvas.width = viewport.width;
// Prepare object needed by render method
var renderContext = {
canvasContext: context,
viewport: viewport
};
// Render PDF page
page.render(renderContext);
// JR experiments
console.log('hej');
url = canvas.toDataURL();
downloadButton = document.getElementById('download-image');
downloadButton.setAttribute('download', 'jr.png');
downloadButton.setAttribute('href', url);
});
The first page of the PDF file is rendered correctly to the screen and a jr.png file is generated. When I look at the PNG file the header seems right, but when I try to view the file with an image viewer it is empty (actually it is shown as transparent).
So I guess these lines are wrong:
url = canvas.toDataURL();
downloadButton = document.getElementById('download-image');
downloadButton.setAttribute('download', 'jr.png');
downloadButton.setAttribute('href', url);
Any suggestions on how to make a correct PNG file?

page.render is asynchronous function -- you need to wait until it finishes painting
var renderTask = page.render(renderContext);
renderTask.promise.then(function () {
// use canvas now
var url = canvas.toDataURL();
var downloadButton = document.getElementById('download-image');
downloadButton.setAttribute('download', 'jr.png');
downloadButton.setAttribute('href', url);
});

Related

reference error: PDFJS is not defined when loading a pdf

I am trying to use pdf.js to load pdf into a web application so that I can further extract information from the pdf file live. But I am getting this error with a very minimal example.
I have tried wrapping the code in $(document).ready() as suggested in Uncaught ReferenceError: PDFJS is not defined when initialising PDF.JS
I am not able to access PDFJS in console either.
Below is the code I am using (from https://www.sitepoint.com/custom-pdf-rendering/)
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>PDF.js Learning</title>
</head>
<body>
<script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script type="text/javascript" src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<script type="text/javascript" src="https://mozilla.github.io/pdf.js/build/pdf.worker.js"></script>
<script>
$(document).ready(function () {
var url = "https://github.com/mozilla/pdf.js/blob/master/web/compressed.tracemonkey-pldi-09.pdf";
// Asynchronous download PDF
PDFJS.getDocument(url)
.then(function(pdf) {
return pdf.getPage(1);
})
.then(function(page) {
// Set scale (zoom) level
var scale = 1.5;
// Get viewport (dimensions)
var viewport = page.getViewport(scale);
// Get canvas#the-canvas
var canvas = document.getElementById('the-canvas');
// Fetch canvas' 2d context
var context = canvas.getContext('2d');
// Set dimensions to Canvas
canvas.height = viewport.height;
canvas.width = viewport.width;
// Prepare object needed by render method
var renderContext = {
canvasContext: context,
viewport: viewport
};
// Render PDF page
page.render(renderContext);
});
})
</script>
<canvas id='the-canvas'></canvas>
</body>
</html>
pdfjsLib.getDocument() works. Now I just need to know why...

Display annotations/hyperlinks using pdf.js

I've managed to display a sample PDF file using mozilla's pdf.js. On the 1st page of the PDF file there is one internal link to page 2 and one external link to google.com.
My next step is getting the links in the sample PDF to work (be clickable) and I'm struggling with this one.
At the moment all I can do is get the links/annotations and print them to the console. Can anyone help me render these links to the annotations div layer?
I've looked through the pdf.js docs but I can't quite find what I'm looking for... I'm just looking to add this clickable links feature to the simple code below.
Here is a code pen of my progress so far: http://codepen.io/laurencemeah/pen/ONrJXj
var pdfFile = 'URL PATH TO PDF FILE';
var pageNum = 1;
PDFJS.getDocument(pdfFile).then(function(pdf) {
pdf.getPage(pageNum).then(function(page) {
var desiredWidth = document.getElementById('pdf-holder').getBoundingClientRect().width;
var viewport = page.getViewport(1);
var scale = desiredWidth / viewport.width;
var scaledViewport = page.getViewport(scale);
var canvas = document.getElementById('pdf-canvas');
var context = canvas.getContext('2d');
canvas.height = scaledViewport.height;
canvas.width = scaledViewport.width;
var renderContext = {
canvasContext: context,
viewport: scaledViewport
};
page.render(renderContext);
page.getAnnotations().then(function(data) {
console.log(data);
// now display them in annotation layer div
});
});
});
#pdf-holder {
width: 100%;
height: auto;
<script src="http://mozilla.github.io/pdf.js/build/pdf.js"></script>
<div id="pdf-holder">
<canvas id="pdf-canvas"></canvas>
<div id="annotation-layer"></div>
</div>

Image Reading from base64 data in javascript produces image of different size

I am trying to get a screenshot of a page using chrome extension and trying to crop the image. Upon taking the screenshot, I get a base64 dataUrl of the image.
When I try to render the image using the dataURL, the image renders larger(appears zoomed in) than the original image. When I manually give the <img> object the original size it renders fine. But when I draw this image to a canvas, the canvas takes the enlarged size.
I have been banging my head against the wall to get a solution for this problem but to no avail. Any help is greatly appreciated.
//Here is the JS code:
var image = document.getElementById("image");
var canvas = document.getElementById('myCanvas');
var context = canvas.getContext('2d');
image.onload = function() {
var sourceX = 0;
var sourceY = 0;
var sourceWidth = 150;
var sourceHeight = 80;
var destWidth = sourceWidth;
var destHeight = sourceHeight;
var destX = 0;
var destY = 0;
context.drawImage(image, sourceX, sourceY, sourceWidth, sourceHeight, destX, destY, destWidth, destHeight);
};
And here is the HTML code, I am removing the dataurl as it takes too much space, and pasting another URL.
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<title>JS Bin</title>
</head>
<body>
<img height = "678" width = "1280" id = "image" src = "http://s12.postimg.org/7r9q3h2vx/ss2.jpg"/>
<canvas id="myCanvas" width="500" height="200"></canvas>
</body>
</html>
The following is the image(from the dataurl) I want to crop:
The following is the cropped image that I get. It is not the intended image because it appears enlarged(zoomed in). It is the top left section of the original image.
For further reference, here is the jsbin link: https://jsbin.com/pomayowara/edit?output
The base64 dataURL provided has information for an image of 2560 × 1356, but you rescaling the image to 1280x678. Your code is obtaining the original size of the image and is therefore being enlarged. You would need to send the canvas the original ratio of the image and not the resized dimensions of the image.
So, in case someone is facing a similar problem, this is what I did.
I was trying to crop the screenshot of a webpage using the coordinates given. As pointed out by #mike-schultz, the problem that I was facing was the image dataUrl was showing the image size to be 2560x1356 where as my browser viewport size was 1280x678. This was producing a zoomed-in image when drawn on canvas.
As per the suggestions of #mike0schultz, I doubled the sourceWidth and sourceHeight. This worked like a charm in showing the correct cropped part of the screenshot. The only problem was it was still blurry, not sharp at all. So I tried doing this:
I doubled the width and height of the canvas through javascript, then resized the canvas using CSS and finally scaled the context to (2,2). This gave me the perfect image as a result.
The code is as follows coords.width and coords.height are the intended dimensions of the image:
var c = document.createElement("canvas");
c.id = "myCanvas";
c.width = coords.width*2;
c.height = coords.height*2;
c.style.width = coords.width+"px";
c.style.height = coords.height+"px";
c.getContext("2d").scale(2,2)
var ctx = c.getContext("2d");
ctx.webkitImageSmoothingEnabled = false;
ctx.mozImageSmoothingEnabled = false;
ctx.msImageSmoothingEnabled = false;
ctx.imageSmoothingEnabled = false;
ctx.drawImage(image,
coords.left*devRes, coords.top*devRes,
coords.width*devRes, coords.height*devRes,
0,0,
coords.width, coords.height);

How to set BPG on background

Can I use a BPG image as a <div> or <body> background? background: url('bg.bpg'); doesn't work.
There is a Javascript decoder which allows .bpg images to be displayed in <img> tags, but I want to use one as a <body> background.
The Javascript BPG decoder works by converting the source .bpg file into an ImageData object, which can then be drawn to a canvas. So all you need to do is get the canvas as a PNG data string and set that as the body background image:
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>BPG background example.</title>
<!-- Decoder script from http://bellard.org/bpg/ -->
<script src="bpgdec8a.js"></script>
<script>
"use strict";
function setBackground(filename)
{
// Create a hidden canvas with the same dimensions as the image.
var canvas = document.createElement("canvas");
canvas.style.visibility = "hidden";
canvas.width = 512;
canvas.height = 512;
document.body.appendChild(canvas);
// Create a drawing context and a BPG decoder.
var ctx = canvas.getContext("2d");
var img = new BPGDecoder(ctx);
// Attempt to load the image.
img.onload = function()
{
// Once the image is ready, draw it to the canvas...
ctx.putImageData(this.imageData, 0, 0);
// ...and copy the canvas to the body background.
document.body.style.background = "url('" + canvas.toDataURL("image/png") + "')";
};
img.load(filename);
}
</script>
</head>
<body onload="setBackground('lena512color.bpg');">
<p>Hello, World!</p>
</body>
</html>

Can I get a preview for a PDF using HTML5 File API?

I can set the src of an image to preview a dropped image file, but how about a pdf?
(If not a preview thumbnail, then how about file type icons at least?)
Obviously, PDF files are not web compliant, so we have to make use of external libraries which help us deal with these type of files. A very well-known is the Mozilla PDF JS library.
Now do a preview of the first page of the pdf file.
//
// Asynchronous download PDF as an ArrayBuffer
//
PDFJS.disableWorker = true;
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
// console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
//
// Fetch the first page
//
// console.log(pdf)
pdf.getPage(1).then(function getPageHelloWorld(page) {
var scale = 0.5;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
//
// Render PDF page into canvas context
//
var task = page.render({
canvasContext: context,
viewport: viewport
})
task.promise.then(function() {
// console.log(canvas.toDataURL('image/jpeg'));
});
});
}, function(error) {
// console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
<script src="https://rawgithub.com/mozilla/pdf.js/gh-pages/build/pdf.js"></script>
<canvas id="the-canvas" style="border:1px solid black"></canvas>
<input id='pdf' type='file' />
The way to implement this is to get the file type when you are doing the upload of the pdf and save. when you displaying the record you can just do a check to see the file type

Categories