Save 1st page of a PDF file using JavaScript

Save 1st page of a PDF file using JavaScript - javascript

I need to extract 1st page of an uploaded PDF file(in SharePoint Online) & save it as a separate PDF file using JavaScript.
After some searching I found this. But I'm not able to understand how it works.
Please help.

As requested in the comment in a previous answer I am posting sample code to just get the first page in its original format, so not as a bitmap.
This uses a third party REST service that can PDF Convert, Merge, Split, Watermark, Secure and OCR files. As it is REST based, it supports loads of languages, JavaScript being one of them.
What follows is a self-contained HTML page that does not require any additional server side logic on your part. It allows a PDF file to be uploaded, splits up the PDF into individual pages and discards them all except for the first one. There are other ways to achieve the same using this service, but this is the easiest one that came to mind.
You need to create an account to get the API key, which you then need to insert in the code.
Quite a bit of the code below deals with the UI and pushing the generated PDF to the browser. Naturally you can shorten it significantly by taking all that code out.
<!DOCTYPE html>
<html>
<head>
<title>Muhimbi API - Split action</title>
<script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.1.1/jquery.min.js"></script>
<script type="text/javascript">
// ** Specify the API key associated with your subscription.
var api_key = '';
// ** For IE compatibility*
// ** IE does not support 'readAsBinaryString' function for the FileReader object. Create a substitute function using 'readAsArrayBuffer' function.
if (FileReader.prototype.readAsBinaryString === undefined) {
FileReader.prototype.readAsBinaryString = function (file_content) {
var binary_string = "";
var thiswindow = this;
var reader = new FileReader();
reader.onload = function (e) {
var bytes = new Uint8Array(reader.result);
var length = bytes.byteLength;
for (var i = 0; i < length; i++) {
binary_string += String.fromCharCode(bytes[i]);
}
thiswindow.content = binary_string;
$(thiswindow).trigger('onload');
}
reader.readAsArrayBuffer(file_content);
}
}
// ** For IE compatibility*
// ** Create a Blob object from the base64 encoded string.
function CreateBlob(base64string)
{
var file_bytes = atob(base64string);
var byte_numbers = new Array(file_bytes.length);
for (var i = 0; i < file_bytes.length; i++) {
byte_numbers[i] = file_bytes.charCodeAt(i);
}
var byte_array = new Uint8Array(byte_numbers);
var file_blob = new Blob([byte_array], {type: "application/pdf"});
return file_blob;
}
// ** Execute code when DOM is loaded in the browser.
$(document).ready(function ()
{
//** Make sure an api key has been entered.
if(api_key=='')
{
alert('Please update the sample code and enter the API Key that came with your subscription.');
}
// ** Attach a click event to the Convert button.
$('#btnConvert').click(function ()
{
// ** Proceed only when API Key is provided.
if(api_key=='')
return;
try
{
// ** Get the file object from the File control.
var source_file = document.getElementById('file_to_split').files[0];
//** Was a file uploaded?
if (source_file)
{
// ** Get the file name from the uploaded file.
var source_file_name = source_file.name;
var reader = new FileReader();
//** Read the file into base64 encoded string using FileReader object.
reader.onload = function(reader_event)
{
var binary_string;
if (!reader_event) {
// ** For IE.
binary_string = reader.content;
}
else {
// ** For other browsers.
binary_string = reader_event.target.result;
}
// ** Convert binary to base64 encoded string.
var source_file_content = btoa(binary_string);
if(source_file_content)
{
// ** We need to fill out the data for the conversion operation
var input_data = "{";
input_data += '"use_async_pattern": false';
input_data += ', "fail_on_error": false';
input_data += ', "split_parameter": 1';
input_data += ', "file_split_type": "ByNumberOfPages"';
input_data += ', "source_file_name": "' + source_file_name + '"'; // ** Always pass the name of the input file with the correct file extension.
input_data += ', "source_file_content": "' + source_file_content + '"'; // ** Pass the content of the uploaded file, making sure it is base64 encoded.
input_data += '}',
// ** Allow cross domain request
jQuery.support.cors = true;
// ** Make API Call.
$.ajax(
{
type: 'POST',
// ** Set the request header with API key and content type
beforeSend: function(request)
{
request.setRequestHeader("Content-Type", 'application/json');
request.setRequestHeader("api_key", api_key);
},
url: 'https://api.muhimbi.com/api/v1/operations/split_pdf',
data: input_data,
dataType: 'json',
// ** Carry out the conversion
success: function (data)
{
var result_code = "";
var result_details = "";
var processed_file_contents = "";
var base_file_name = "";
// ** Read response values.
$.each(data, function (key, value)
{
if (key == 'result_code')
{
result_code = value;
}
else if (key == 'result_details')
{
result_details = value;
}
else if (key == 'processed_file_contents')
{
processed_file_contents = value;
}
else if (key == 'base_file_name')
{
base_file_name = value;
}
});
// ** Show result code and details.
$("#spnResultCode").text(result_code);
$("#spnResultDetails").text(result_details);
if(result_code=="Success")
{
// ** Get first item in the array. This is the first page in the PDF
var processed_file_content = processed_file_contents[0];
// ** Convert to Blob.
var file_blob = CreateBlob(processed_file_content)
// ** Prompt user to save or open the converted file
if (window.navigator.msSaveBlob) {
// ** For IE.
window.navigator.msSaveOrOpenBlob(file_blob, base_file_name + "." + output_format);
}
else {
// ** For other browsers.
// ** Create temporary hyperlink to download content.
var download_link = window.document.createElement("a");
download_link.href = window.URL.createObjectURL(file_blob, { type: "application/octet-stream" });
download_link.download = base_file_name + ".pdf";
document.body.appendChild(download_link);
download_link.click();
document.body.removeChild(download_link);
}
}
},
error: function (msg, url, line)
{
console.log('error msg = ' + msg + ', url = ' + url + ', line = ' + line);
// ** Show the error
$("#spnResultCode").text("API call error.");
$("#spnResultDetails").text('error msg = ' + msg + ', url = ' + url + ', line = ' + line);
}
});
}
else
{
// ** Show the error
$("#spnResultCode").text("File read error.");
$("#spnResultDetails").text('Could not read file.');
}
};
reader.readAsBinaryString(source_file);
}
else
{
alert('Select file to convert.');
}
}
catch(err)
{
console.log(err.message);
// ** Show exception
$("#spnResultCode").text("Exception occurred.");
$("#spnResultDetails").text(err.message);
}
});
});
</script>
</head>
<body>
<div>
<form id="convert_form">
Select file: <input type="file" id="file_to_split" />
<br /><br />
<button id="btnConvert" type="button">Split PDF</button>
<br /><br />
Result_Code: <span id="spnResultCode"></span>
<br />
Result_Details: <span id="spnResultDetails"></span>
</form>
</div>
</body>
</html>
Big fat disclaimer, I worked on this service, so consider me biased. Having said that, it works well and could potentially solve your problem.

Finally found a solution.
First converting the uploaded PDF to image using PDF.JS, done some customization in the sample code.
Then saved the 1st page image as PDF using jsPDF.
The customized download code,
$("#download-image").on('click', function() {
var imgData = __CANVAS.toDataURL();
var doc = new jsPDF();
doc.addImage(imgData, 0, 0, 210, 300);
doc.save('page1.pdf');
});

Related

How to use microsoft cognitive api for emotion detection in faces?

I am trying to detect emotions in faces from an image uploaded. I can't seem to find any example code for emotion detection.
https://azure.microsoft.com/en-us/try/cognitive-services/my-apis/?apiSlug=face-api&country=Canada&allowContact=true
I found this
https://learn.microsoft.com/en-us/azure/cognitive-services/emotion/quickstarts/javascript
but the url endpoint doesn't work. I then tried regular face api, but even that I get resource not found.
Does anyone know what's going one?
Thanks
var FACE = new function () {
this.listen = function() {
var camera = document.getElementById('camera');
camera.addEventListener('change', function(e) {
var imageFile = e.target.files[0];
var reader = new FileReader();
var fileType;
//wire up the listener for the async 'loadend' event
reader.addEventListener('loadend', function () {
//get the result of the async readAsArrayBuffer call
var fileContentArrayBuffer = reader.result;
sendImage(fileContentArrayBuffer, fileType);
});
if (imageFile) {
//save the mime type of the file
fileType = imageFile.type;
//read the file asynchronously
reader.readAsArrayBuffer(imageFile);
}
});
function sendImage(fileContentArrayBuffer, fileType) {
$.ajax({
// NOTE: You must use the same location in your REST call as you used to obtain your subscription keys.
// For example, if you obtained your subscription keys from westcentralus, replace "westus" in the
// URL below with "westcentralus".
url: "https://westcentralus.api.cognitive.microsoft.com/face/v1.0/",
beforeSend: function(xhrObj){
// Request headers, also supports "application/octet-stream"
xhrObj.setRequestHeader("Content-Type","application/json");
// NOTE: Replace the "Ocp-Apim-Subscription-Key" value with a valid subscription key.
xhrObj.setRequestHeader("Ocp-Apim-Subscription-Key","my key");
},
//don't forget this!
processData: false,
type: "POST",
// Request body
data: new Blob([fileContentArrayBuffer], { type: fileType })
}).done(function(data) {
alert(data);
// Get face rectangle dimensions
var faceRectangle = data[0].faceRectangle;
var faceRectangleList = $('#faceRectangle');
// Append to DOM
for (var prop in faceRectangle) {
faceRectangleList.append("<li> " + prop + ": " + faceRectangle[prop] + "</li>");
}
// Get emotion confidence scores
var scores = data[0].scores;
var scoresList = $('#scores');
// Append to DOM
for(var prop in scores) {
scoresList.append("<li> " + prop + ": " + scores[prop] + "</li>")
}
}).fail(function(err) {
alert("Error: " + JSON.stringify(err));
});
}
};
};

Assuming you have your key, the request URL for Emotion API should be
https://westus.api.cognitive.microsoft.com/emotion/v1.0/recognize?
You may also want to take a look at this website. It got similar code.
Sorry I can't use comment function as I am new here and don't have enough reputation to do so.

Could you double check with your api region? Because this error occurs when there is no resource found for given api key in a region.
And for accessing emotions you will need to pass parameters to api which will give you attributes for faces in response which contains emotions.

Dynamically creating ODT using WebODF / Javascript

Using javascript, I need to create an .odt file and populate the contents with data in javascript variables. The only thing that I have found that might work is WebODF. An example that seems similar to it is here.
When I am trying to do something similar to PDF with pdfkit (using node) I can do something like this:
PDFDocument = require('pdfkit');
var doc = new PDFDocument();
doc.pipe(fs.createWriteStream(fileName));
doc.text("Fist line");
doc.text("Second line");
Is it possible to do something similar to it using WebODF? I've found ops.OpInsertText, but I'm not sure how I can use it to actually insert text.
Again, ideally the solution is only in javascript.

If I got your question right, you want to create a new file dynamically using data in JavaScript variable.
You ca refer this answer to load a file from javascript variable in form of byte Array.
And this will get you up and running with a odt file ,which you can save to desired location.
function saveByteArrayLocally(error, data) {
var mime = "application/vnd.oasis.opendocument.text";
var blob = new Blob([data.buffer], {type: mime});
var res = $http({
method: 'POST', url: myWebServiceUrl,
headers: {'Content-Type': undefined},
data: blob
});
res.success(function(data, status, headers, config) {
console.log(status);
});
}
NOTE: You can use multer,express.js framework to design services as backend to save files.

This may help you.In this example I am attaching the Value returned from promt to the cursor position inside the webodf. You can similarly insert data to any other elements offest().
pressing crtl+space will show a promt, and whatever we type there is inserted to odf.
function insertBreakAtPoint(e) {
var range;
var textNode;
var offset;
var key = prompt("Enter the JSON Key", "name");
{% raw %}
var key_final = '{{address.'+key+'}}';
{% endraw %}
var caretOverlay=$('.webodf-caretOverlay').offset();
if (document.caretPositionFromPoint) {
range = document.caretPositionFromPoint(
caretOverlay.left, caretOverlay.top
);
textNode = range.offsetNode;
offset = range.offset;
} else if (document.caretRangeFromPoint) {
range = document.caretRangeFromPoint(
caretOverlay.left, caretOverlay.top
);
textNode = range.startContainer;
offset = range.startOffset;
}
#only split TEXT_NODEs
if (textNode.nodeType == 3) {
var replacement = textNode.splitText(offset);
var keynode = document.createTextNode(key_final);
textNode.parentNode.insertBefore(keynode, replacement);
}
}
function KeyPress(e) {
var evtobj = window.event? event : e
if (evtobj.keyCode == 32 && evtobj.ctrlKey)
insertBreakAtPoint();
}
document.onkeydown = KeyPress;

Creating a CSV file from a Meteor.js Collection

I've written my code so far and can get a list of all the records to show up on a webpage, however I need to be able to get it as a CSV (comma separated values) file.
Right now the page shows a list like follows:
Name Address Description
Bob 1 street Journalist
Bill 2 street Fireman
etc...
Anyway I can have meteor create a CSV file for download, instead of it showing up as a webpage with all the HTML markup?

Based on How to serve a file using iron router or meteor itself?
HTML:
<template name="blah">
Download the CSV
</template>
JS:
// An example collection
var DummyData = new Mongo.Collection("dummyData");
// create some sample data
if (Meteor.isServer) {
Meteor.startup(function() {
var dummyDataCursor = DummyData.find();
if (dummyDataCursor.count() === 0) {
for(var i=1; i<=100; i++) {
DummyData.insert({Name: "Name" + i,Address: "Address" + i, Description:"Description" + i});
}
}
});
}
Router.route('/csv', {
where: 'server',
action: function () {
var filename = 'meteor_dummydata.csv';
var fileData = "";
var headers = {
'Content-type': 'text/csv',
'Content-Disposition': "attachment; filename=" + filename
};
var records = DummyData.find();
// build a CSV string. Oversimplified. You'd have to escape quotes and commas.
records.forEach(function(rec) {
fileData += rec.Name + "," + rec.Address + "," + rec.Description + "\r\n";
});
this.response.writeHead(200, headers);
return this.response.end(fileData);
}
});

Plupload chunk size renaming file to Blob

I'm using Plupload in order to download file. The configuration we have is the folowing :
$("#uploadData").pluploadQueue({
// General settings
runtimes: 'html5,flash,silverlight,html4',
url: serviceurl,
// Maximum file size
max_file_size: '50mb',
chunk_size: '1mb',
max_file_count: 50,
unique_names: true,
// Resize images on clientside if we can
resize: {
width: 200,
height: 200,
quality: 90,
crop: true // crop to exact dimensions
},
// Specify what files to browse for
filters: [
{ title: "Documents Excel", extensions: "xlsx" }
],
init: {
FilesAdded: function (up, files) {
up.start();
},
UploadComplete: function (up, files) {
if (up.total.uploaded == up.files.length) {
$(".plupload_buttons").css("display", "inline");
$(".plupload_upload_status").css("display", "inline");
up.init();
}
}
},
The problem i have is when i upload a file that is bigger than 1MB, i don't receive the right name, instead i receive Blob for name.
For exemple, the name of my file is "Test.xlsx" and the size is 2MB, i will receive, on the server side, "blob" for name and not Test.
Limitation, i'm not allowed to change the chuck size limitation on the client.
How can i get the right name.
Thank for your help.
code used to receive the data on the server side :
[System.Web.Mvc.HttpPost]
public ActionResult UploadData(int? chunk, string name)
{
var fileUpload = Request.Files[0];
if (Session["upDataFiles"] == null)
{
Session["upDataFiles"] = new List<FileUploadViewModel>();
}
Session["upDataFiles"] = UpdateTempDataUpload(fileUpload.FileName, name, (List<FileUploadViewModel>)Session["upDataFiles"]);
UploadFile(chunk, name);
return Content("chunk uploaded", "text/plain");
}
private void UploadFile(int? fileChunk, string fileName)
{
var fileUpload = Request.Files[0];
var uploadPath = Server.MapPath("~/App_Data");
var fullPath = Path.Combine(uploadPath, fileName);
fileChunk = fileChunk ?? 0;
using (var fs = new FileStream(Path.Combine(uploadPath, fileName), fileChunk == 0 ? FileMode.Create : FileMode.Append, FileAccess.Write, FileShare.ReadWrite))
{
var buffer = new byte[fileUpload.InputStream.Length];
fileUpload.InputStream.Read(buffer, 0, buffer.Length);
fs.Write(buffer, 0, buffer.Length);
fs.Close();
}
}
When i check the name in the request.File object, it's blob and not the actual name.

This issue was described in detail on GitHub. It might be a little confusing, but from what I understand:
$_FILES[ 'name' ] will obligatorily be 'blob' if you use chunks.
They seem to claim that $_REQUEST[ 'name' ] should have the real name, but the users seem to disagree.
As a workaround it is proposed to send the filename along in another form field or url parameter (using for example the BeforeUpload event to set that information). You can set multipart_params and store your information on the file object that you added before or look at a field in your html to get information at this point.

To extend the answer by nus a bit:
I used the BeforeUpload event in Javascript as described in the GitHub post and then had to make a change server-side.
Previously, to get the file name I'd used:
var file = Request.Files[f];
var fileName = file.FileName
However, this had been where 'blob' was being returned. Instead, I made a change so while I still used Request.Files[f] for the file details, I derived the file name like so:
var file = Request.Files[f];
var fileName = Request.Params["name"];

Use BeforeUpload event under init as below:
init : {
BeforeUpload: function(up, file) {
log('[BeforeUpload]', 'File: ', file);
}
}
Also add log function:
function log() {
var str = "";
plupload.each(arguments, function(arg) {
var row = "";
if (typeof(arg) != "string") {
plupload.each(arg, function(value, key) {
// Convert items in File objects to human readable form
if (arg instanceof plupload.File) {
// Convert status to human readable
switch (value) {
case plupload.QUEUED:
value = 'QUEUED';
break;
case plupload.UPLOADING:
value = 'UPLOADING';
break;
case plupload.FAILED:
value = 'FAILED';
break;
case plupload.DONE:
value = 'DONE';
break;
}
}
if (typeof(value) != "function") {
row += (row ? ', ' : '') + key + '=' + value;
}
});
str += row + " ";
} else {
str += arg + " ";
}
});
var log = $('#log');
log.append(str + "\n");
// log.scrollTop(log[0].scrollHeight);
}
And then you can get Filename in $_POST['name'] or $_REQUEST['name'].
Also you can set unique_names as false.
You can also find help here

Inside a web worker, how to find html attribute from string?

Inside a web worker, I have an html string like:
"<div id='foo'> <img src='bar'></img> <ul id='baz'></ul> </div>"
Is there any library I can import to easily access id and src attributes of the different tags ? Is regex the only way inside a worker ?

There are two ways to solve this problem efficiently:
Regex
With the risk of getting false positives, you can use something like:
var pattern = /<img [^>]*?src=(["'])((?:[^"']+|(?!\1)["'])*)(\1)/i;
var match = string.match(pattern);
var src = match ? match[2] : '';
Built-in parser & messaging
If getting the HTML right is a critical requirement, just let the browser parse the HTML, by passing the string to the caller. Here's a full example:
Caller:
var worker = new Worker('worker.js');
worker.addEventListener('message', function(e) {
if (!e.data) return;
if (e.data.method === 'getsrc') {
// Unlike document.createElement, etc, the following method does not
// load the image when the HTML is parsed
var doc = document.implementation.createHTMLDocument('');
doc.body.innerHTML = e.data.data;
var images = doc.getElementsByTagName('img');
var result = [];
for (var i=0; i<images.length; i++) {
result.push(images[i].getAttribute('src'));
}
worker.postMessage({
messageID: e.data.messageID,
result: result
});
} else if (e.data.method === 'debug') {
console.log(e.data.data);
}
});
worker.js
// A simple generic messaging API
var callbacks = {};
var lastMessageID = 0;
addEventListener('message', function(e) {
if (callbacks[e.data.messageID]) {
callbacks[e.data.messageID](e.data.result);
}
});
function sendRequest(method, data, callback) {
var messageID = ++lastMessageID;
if (callback) callbacks[messageID] = callback;
postMessage({
method: method,
data: data,
messageID: messageID
});
}
// Example:
sendRequest('getsrc',
'<img src="foo.png">' +
"<img src='bar.png'>" +
'<textarea><img src="should.not.be.visible"></textarea>',
function(result) {
sendRequest('debug', 'Received: ' + result.join(', '));
}
);

We Keep Coding

JavaScript is the programming language of the Web.

Save 1st page of a PDF file using JavaScript - javascript

I need to extract 1st page of an uploaded PDF file(in SharePoint Online) & save it as a separate PDF file using JavaScript. After some searching I found this. But I'm not able to understand how it works. Please help.

Related

How to use microsoft cognitive api for emotion detection in faces?

Dynamically creating ODT using WebODF / Javascript

Creating a CSV file from a Meteor.js Collection

Plupload chunk size renaming file to Blob

Inside a web worker, how to find html attribute from string?

Categories

Resources