Convert HTML to Word then to pdf - javascript

I need to convert HTML to PDF. I have tried with jsPDF and read a lot of questions here on stackoverflow about this. I have tried all the methods that exist, html(), fromHtml, html2pdf and html2canvas. But all of them have various problems. Either missing content, fuzzy content or margins are completely off.
So I am trying a different route. I found following code snippet to convert to word document. And this works.
function exportHTML(){
var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
"xmlns:w='urn:schemas-microsoft-com:office:word' "+
"xmlns='http://www.w3.org/TR/REC-html40'>"+
"<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
var footer = "</body></html>";
var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;
var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);
var fileDownload = document.createElement("a");
document.body.appendChild(fileDownload);
fileDownload.href = source;
fileDownload.download = 'document.doc';
fileDownload.click();
document.body.removeChild(fileDownload);
}
However I do not want the word file to be downloaded. I need to capture it and convert it to a base64 string because then I can send it to a rest api that can convert the word document to pdf. That rest api does not support html directly otherwise I would just send the html. Hence the workaround to word then to pdf. ps I cannot use an online pdf solution due to sensitive information, the rest api is an internal service.

However I do not want the word file to be downloaded. I need to capture it and convert it to a base64 string because then I can send it to a rest api that can convert the word document to pdf.
Then no need to insert it into a download link. Just base64 encode the string with btoa:
function exportHTML(){
var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
"xmlns:w='urn:schemas-microsoft-com:office:word' "+
"xmlns='http://www.w3.org/TR/REC-html40'>"+
"<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
var footer = "</body></html>";
var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;
var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);
// encode here instead of creating a link
var encoded = window.btoa(source);
return encoded;
}
Then you'll be free to use XMLHttpRequest to send the encoded string to your API endpoint. E.g.:
var encodedString = exportHTML();
var xhr = new XMLHttpRequest();
xhr.open('POST', '/my-conversion-endpoint', true);
xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded');
xhr.onreadystatechange = function() {
if(xhr.readyState == 4 && xhr.status == 200) {
// request finished
alert(xhr.responseText);
}
}
xhr.send('encodedString=' + encodedString);

Use "new Blob" by file's construct:
function exportHTML(){
var header = "<html xmlns:o='urn:schemas-microsoft-com:office:office' "+
"xmlns:w='urn:schemas-microsoft-com:office:word' "+
"xmlns='http://www.w3.org/TR/REC-html40'>"+
"<head><meta charset='utf-8'><title>Export HTML to Word Document with JavaScript</title></head><body>";
var footer = "</body></html>";
var sourceHTML = header+document.getElementById("source-html").innerHTML+footer;
var source = 'data:application/vnd.ms-word;charset=utf-8,' + encodeURIComponent(sourceHTML);
//var fileDownload = document.createElement("a");
//document.body.appendChild(fileDownload);
//fileDownload.href = source;
//fileDownload.download = 'document.doc';
//fileDownload.click();
//document.body.removeChild(fileDownload);
var my_file=new Blob([source]);
getBase64(my_file);
}
function getBase64(file) {
var reader = new FileReader();
reader.readAsDataURL(file);
reader.onload = function () {
console.log(reader.result);
};
reader.onerror = function (error) {
console.log('Error: ', error);
};
}
exportHTML();
<div id="source-html">Hi <b>World</b>!</div>

Related

Google Apps Script - use Google Drive pdf with PDF-lib.js

Background:
I am try to fill in a pdf file that:
a) has form fields.
b) that is stored on Google Drive.
with data that is stored in Google spreadsheet.
I am using:
Google Apps Script.
HtmlService
PDF-lib.js in a htmlOutput object generated from a htmlTemplate.
The work flow is:
The showModalDialog_downloadFilledPDFform_NAMOFFORM() function is called from a menu.
The function is:
function showModalDialog_downloadFilledPDFform_NAMOFFORM() {
var pdf_template_file_url = getPDFfileURL("form1.pdf");
var htmlTemplate = HtmlService.createTemplateFromFile('downloadFilledPDFformHTML');
htmlTemplate.dataFromServerTemplate = { pdf_template_file: "form1.pdf", pdf_template_file_url: pdf_template_file_url };
var htmlOutput = htmlTemplate.evaluate();
htmlOutput.setWidth(648.1);
htmlOutput.setHeight(286.300)
SpreadsheetApp.getUi().showModalDialog(htmlOutput, 'Download filled PDF');
}
The url that is passed to the htmlTemplate is generated by: "fillPDFForm.gs"
function: fillPDFForm.gs:
var pdfFileNamesAndIDs = [ ]
pdfFileNamesAndIDs.push(["form1.pdf", "1y8F5NgnK50mdtWSR6v1b8pELsbbBJert"])
pdfFileNamesAndIDs.push(["form2.pdf", "1B4BOaI-BqFmhmnFx7FaT-yys-U0vkYKz"])
pdfFileNamesAndIDs.push(["form3.pdf", "17LrJpRA5oBZBqw-2du1H74KxWIX55qYC"])
function getPDFfileURL(fileName) {
var documentID = "";
for (var i in pdfFileNamesAndIDs) {
//console.log(pdfFileNamesAndIDs[i][0]);
if (pdfFileNamesAndIDs[i][0] == fileName) {
documentID = pdfFileNamesAndIDs[i][1];
console.log("documentID: " + documentID);
}
}
var documentFile = DriveApp.getFileById(documentID);
var documentURL = documentFile.getDownloadUrl();
Logger.log("documentURL = "+documentURL);
return documentURL;
}
The Problem:
The URL generated by getPDFfileURL() doesn't work in the html file generated in showModalDialog_downloadFilledPDFform_NAMOFFORM().
The error in Chrome dev console is:
pdf-lib#1.11.0:15 Uncaught (in promise) Error: Failed to parse PDF document (line:0 col:0 offset=0): No PDF header found
at e [as constructor] (pdf-lib#1.11.0:15:189222)
at new e (pdf-lib#1.11.0:15:190065)
at e.parseHeader (pdf-lib#1.11.0:15:401731)
at e.<anonymous> (pdf-lib#1.11.0:15:400782)
at pdf-lib#1.11.0:15:1845
at Object.next (pdf-lib#1.11.0:15:1950)
at pdf-lib#1.11.0:15:887
at new Promise (<anonymous>)
at i (pdf-lib#1.11.0:15:632)
at e.parseDocument (pdf-lib#1.11.0:15:400580)
The basic concept for the html page (shown as a modal dialog box), came from: https://jsfiddle.net/Hopding/0mwfqkv6/3/
The contents of the htmlTemplate are:
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<!-- Add Stylesheet -->
<?!= HtmlService.createHtmlOutputFromFile('downloadFilledPDFformCSS').getContent(); ?>
<!-- Add pdf-lib and downloadjs libraries -->
<!-- https://pdf-lib.js.org/ and https://github.com/rndme/download -->
<!-- https://jsfiddle.net/Hopding/0mwfqkv6/3/ -->
<script src="https://unpkg.com/pdf-lib#1.11.0"></script>
<script src="https://unpkg.com/downloadjs#1.4.7"></script>
</head>
<body>
<h2 id="myTitle"></h2>
<p>Click the button to fill form fields in an the following PDF document: <code id="pdf_template_file">pdf-lib</code></p>
<button onclick="fillForm()">Fill PDF</button>
<p class="small">(Your browser will download the resulting file)</p>
</body>
<script>
const data = <?!= JSON.stringify(dataFromServerTemplate) ?>; //Stores the data directly in the javascript code
function removeExtension(filename) {
return filename.substring(0, filename.lastIndexOf('.')) || filename;
}
const pdf_template_file = data.pdf_template_file;
const pdf_template_file_basename = removeExtension(pdf_template_file);
// sample usage
function initialize() {
document.getElementById("myTitle").innerText = pdf_template_file;
//or use jquery: $("#myTitle").text(data.first + " - " + data.last);
document.getElementById("pdf_template_file").innerText = pdf_template_file;
}
// use onload or use jquery to call your initialization after the document loads
window.onload = initialize;
</script>
<script>
const { PDFDocument } = PDFLib;
async function fillForm() {
// Fetch the PDF with form fields
const formUrl = data.pdf_template_file_url
//const formPdfBytes = await fetch(formUrl).then(res => res.arrayBuffer())
const formPdfBytes = await fetch(formUrl, {
redirect: "follow",
mode: 'no-cors',
method: 'GET',
headers: {
'Content-Type': 'application/pdf',
}
}).then(res => res.arrayBuffer());
// Load a PDF with form fields
const pdfDoc = await PDFDocument.load(formPdfBytes);
// Get the form containing all the fields
const form = pdfDoc.getForm()
// Get all fields in the PDF by their names
const invIDField = form.getTextField('invID')
const makeAndModelField = form.getTextField('makeAndModel')
const nameField = form.getTextField('name')
const addressField = form.getTextField('address')
const phoneNumberField = form.getTextField('phoneNumber')
const emailAddressField = form.getTextField('emailAddress')
const dateField = form.getTextField('date')
// Output file name
const INPUT_FNAME = "AN"
const INPUT_LNAME = "Other"
// Fill in the basic info fields
invIDField.setText()
makeAndModelField.setText()
nameField.setText(INPUT_FNAME + " " + INPUT_LNAME)
addressField.setText()
phoneNumberField.setText()
emailAddressField.setText()
dateField.setText()
// Serialize the PDFDocument to bytes (a Uint8Array)
const pdfBytes = await pdfDoc.save({updateFieldAppearances: false})
const outputPDFfilename = pdf_template_file_basename + "." + INPUT_FNAME + "_" + INPUT_LNAME + ".pdf"
// Trigger the browser to download the PDF document
download(pdfBytes, outputPDFfilename, "application/pdf");
}
</script>
</html>
I have replicated the contents of the html file on my testing webserver. The server has 3 files: index.html, stykesheet.css and form1.pdf
The pdf (on the web server) is the same pdf file that is stored on Google drive.
On my server the following works:
if I use the pdf file that is in the same folder as the html and css files, a filled pdf is offered for download.
...but the following doesn't work:
if I use the same URL that is generated by getPDFfileURL(), nothing happens and no filled pdf is offered for download.
So the question is:
How do I generate the correct URL (for the pdf file stored in Google Drive), so it can then be used by PDF-lib.js (in the htmlTemplate)?
The answer is:
fetch the pdf file from Google drive as raw bytes.
encode the bytes as base64.
pass the base64 string from the GAS function to the htmlTemplate.
This is the function that gets the pdf file and returns it as a base64 encoded string:
function getPDFfileAsBase64() {
var fileId = "";
var url = "https://drive.google.com/uc?id=" + fileId + "&alt=media";
console.log("url: " + url);
var params = {
method: "get",
headers: {
Authorization: "Bearer " + ScriptApp.getOAuthToken(),
},
};
var bytes = UrlFetchApp.fetch(url, params).getContent();
var encoded = Utilities.base64Encode(bytes);
//console.log("encoded: " + encoded);
return encoded;
}
the function to create a modal dialog is:
function showModalDialog_downloadFilledPDFform_form1() {
// Display a modal dialog box with custom HtmlService content.
var pdf_template_file = "form1.pdf";
var pdf_template_file_AsBase64 = getPDFfileAsBase64(pdf_template_file);
var htmlTemplate = HtmlService.createTemplateFromFile('downloadFilledPDFformHTML');
htmlTemplate.dataFromServerTemplate = { pdf_template_file: pdf_template_file, pdf_template_file_AsBase64: pdf_template_file_AsBase64};
var htmlOutput = htmlTemplate.evaluate();
htmlOutput.setWidth(648.1);
htmlOutput.setHeight(286.300)
SpreadsheetApp.getUi().showModalDialog(htmlOutput, 'Download filled PDF');
}

Consume PDF stream into jsp

I am trying to display PDF report inside jsp
I have PDFstream available using following code
private void generatePDFReport(OutputStream stream, JasperPrint jasperPrint) throws JRException {
JRPdfExporter jrpdfexporter = new JRPdfExporter();
jrpdfexporter.setExporterInput(new SimpleExporterInput(jasperPrint));
jrpdfexporter.setExporterOutput(new SimpleOutputStreamExporterOutput(stream));
SimplePdfExporterConfiguration configuration = new SimplePdfExporterConfiguration();
jrpdfexporter.setConfiguration(configuration);
jrpdfexporter.exportReport();
}
now I am outputstream which is basically PDF Stream which i want to display into jsp, I dont want to set whole response contenttype as application/pdf but i want to embed pdf into part of jsp output
So i convrted pdfstream output into bytearray and trying to display using following
<script language="Javascript">
function loadDoc() {
alert('called loadDoc');
var xhr = new XMLHttpRequest();
alert('called xhr');
xhr.responseType = 'arraybuffer';
alert('called responseType');
alert('called xhr.onload ');
// Create the Blob URL:
var buffer = xhr.response;
var blob = new Blob([<%=byteCharSet%>], {
type: 'application/pdf'
});
var objectURL = URL.createObjectURL(blob);
alert(objectURL);
// Create an iframe to demonstrate it:
var iframe = document.createElement('iframe');
iframe.className = 'sample-iframe';
iframe.src = objectURL;
document.body.appendChild(iframe);
console.log(objectURL);
//xhr.open('GET', 'https://cors-anywhere.herokuapp.com/http://www.xmlpdf.com/manualfiles/hello-world.pdf', true);
//xhr.send();
}
</script>
but when I am trying to execute javascript couldn't understand bytearray output it simply getting failed blob value getting displayed as below in view page source
var blob = new Blob([%PDF-1.4
%����
3 0 obj
<</Filter/FlateDecode/Length 625>>stream
x���M��0#��>��um�'�v�������C
^���V�����Ķ�H����3�?�}� � �q�D3�
q�`�e��˼'��Do?1�(Ξ�h��2s�S(^�����gĈ^�/b��a�r"\"���_���Г�Lw
I used Iframe to solve this issue.
In iframe for src i used another jsp.
out.write("<iframe src= displayPDF.jsp" + " " + "type=application/pdf width=100% height=600px ");
out.write("</iframe>");
following is the displayPDF.jsp
<%# page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%><%
byte[] pdfByteArray = (byte[]) session.getAttribute("PDFbyteArray");
if (pdfByteArray == null)
System.out.println("pdfByteArray is null");
response.setHeader("Content-Disposition", "inline; filename=\"MyFile.pdf\"");
response.setContentType("application/pdf; name=\"MyFile.pdf\"");
ServletOutputStream serv_out = response.getOutputStream();
serv_out.write(pdfByteArray);
serv_out.flush();
serv_out.close();
out.clear();
session.removeAttribute("PDFbyteArray");%>

Output SharePoint Document Library Files to CSV File Using JavaScript

I’m seeking how to output SharePoint Document Library Files to csv file. I found script that get me almost there, but I can’t figure out how to update the code to export the information to a csv file instead to the console.log() or to an alert(). Everything I tried breaks the code. I review other JavaScript concept that shows the how to add out to CSV but I again the script concept breaks the code I’m trying to modify. The script I am using. In addition, the script output the file names. I like to get help on how I can not only output the file name, but I like to output, modified date, created date, and the link to the file. I hope this is possible and I appreciate any help in achieving this concept. Script I'm using follows below.
jQuery(document).ready(function() {
var scriptbase = _spPageContextInfo.webServerRelativeUrl + "/_layouts/15/";
$.getScript(scriptbase + "SP.Runtime.js", function() {
$.getScript(scriptbase + "SP.js", function() {
$.getScript(scriptbase + "SP.DocumentManagement.js", createDocumentSet);
});
});
});
var docSetFiles;
function createDocumentSet() {
//Get the client context,web and library object.
clientContext = new SP.ClientContext.get_current();
oWeb = clientContext.get_web();
var oList = oWeb.get_lists().getByTitle("Fact Sheets & Agreements");
clientContext.load(oList);
//Get the root folder of the library
oLibraryFolder = oList.get_rootFolder();
var documentSetFolder = "sites/nbib/ep/Fact%20Sheets/";
//Get the document set files using CAML query
var camlQuery = SP.CamlQuery.createAllItemsQuery();
camlQuery.set_folderServerRelativeUrl(documentSetFolder);
docSetFiles = oList.getItems(camlQuery);
//Load the client context and execute the batch
clientContext.load(docSetFiles, 'Include(File)');
clientContext.executeQueryAsync(QuerySuccess, QueryFailure);
}
function QuerySuccess() {
//Loop through the document set files and get the display name
var docSetFilesEnumerator = docSetFiles.getEnumerator();
while (docSetFilesEnumerator.moveNext()) {
var oDoc = docSetFilesEnumerator.get_current().get_file();
alert("Document Name : " + oDoc.get_name());
console.log("Document Name : " + oDoc.get_name());
}
}
function QueryFailure() {
console.log('Request failed - ' + args.get_message());
}
Sample test script in chrome.
function QuerySuccess() {
//Loop through the document set files and get the display name
var csv = 'Document Name\n';
var docSetFilesEnumerator = docSetFiles.getEnumerator();
while (docSetFilesEnumerator.moveNext()) {
var oDoc = docSetFilesEnumerator.get_current().get_file();
//alert("Document Name : " + oDoc.get_name());
//console.log("Document Name : " + oDoc.get_name());
csv += oDoc.get_name();//+',' if more cloumns
csv += "\n";
}
var hiddenElement = document.createElement('a');
hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(csv);
hiddenElement.target = '_blank';
hiddenElement.download = 'DocumentList.csv';
hiddenElement.click();
}

Error during JSON file download

I am trying to download JSON data into a JSON file using below mentioned code but the code just gives me a blank Internet Explorer. I need code to download JSON file without having any event raised at user interface.
var dataUri = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(JSONData));
var downloadAnchorNode = document.createElement('a');
downloadAnchorNode.setAttribute("href", dataUri);
downloadAnchorNode.setAttribute("download", "CarData.json");
downloadAnchorNode.click();
You are getting a blank page because you aren't inserting the anchor node into the DOM. Consider the following snippet. Even if you remove the download attribute, you will be taken to a page with JSON data on it.
var JSONData = {
foo: "bar"
};
var dataUri = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(JSONData));
var downloadAnchorNode = document.createElement('a');
downloadAnchorNode.innerHTML = "Click";
downloadAnchorNode.setAttribute("href", dataUri);
downloadAnchorNode.setAttribute("download", "CarData.json");
document.body.appendChild(downloadAnchorNode);
downloadAnchorNode.click();
Here it is without the download:
var JSONData = {
foo: "bar"
};
var dataUri = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(JSONData));
var downloadAnchorNode = document.createElement('a');
downloadAnchorNode.innerHTML = "Click";
downloadAnchorNode.setAttribute("href", dataUri);
document.body.appendChild(downloadAnchorNode);
downloadAnchorNode.click();
Additionally, as others have pointed out, IE does not support the download attribute.

File created with blob does not support NON-ASCII characters

First of all I have to say that questions similar to this have been asked before, but the answers did not help.
Download BLOB content using specified charset
F-8 encoidng issue when exporting csv file , JavaScript
This is the code.
ctrl.downloadFile = function (file) {
var filename = file.filename.split("/"),
name = (filename[filename.length - 1]);
$http({method: 'GET', url: '/download/' + name}, {responseType: "blob"}).then(
function success(response) {
var blob = new Blob([response.data], {type: response.headers['content-type'] + ";text/csv;charset=utf-8,%EF%BB%BF"}),
url = $window.URL || $window.webkitURL,
fileUrl = url.createObjectURL(blob);
var anchor = document.createElement('a');
anchor.href = fileUrl;
anchor.target = '_blank';
anchor.download = name;
anchor.style = "display: none";
document.body.appendChild(anchor);
anchor.click();
setTimeout(function () {
document.body.removeChild(anchor);
}, 100);
}
)
};
This is the html code that calls the downloadFile() function.
<a ng-click="$ctrl.downLoadFile(file)" target='_blank'>{{file.filename}}</a>
The Downloaded file does not support German special characters(Å, Ø, Ü ). What to do that the file supports those characters?
Update 1: I have done this also. But it did not work either.
var blob = new Blob(["\ufeff", response.data],
{type: response.headers['content-type'] +
";text/csv;charset=utf-8"})
Update 2: It works fine if I use these characters(Å, Ø, Ü ) instead of the response.data.
var blob = new Blob(["\ufeff", "Å Ø Ü" ],
{type: response.headers['content-type'] +
";text/csv;charset=utf-8"})
Update 3: This is a single page application. And Yes ,I have used <meta charset="utf-8"> in the head section of the html page.
Using the whole ctrl.downloadFile() was unnecessary. Instead, I used a function to make the download link and then call it in ng-href.
ctrl.urlForFile = function(file) {
var split = file.filename.split("/");
return '/download/' + file['xyz'] + "/" + split[split.length - 1];
};
<a ng-href="{{$ctrl.urlForFile(file)}}" target='_blank'>{{file.filename}}</a>
If you do not need to make the download link dynamically, you should use the link directly in ng-href.
<a ng-href="/download/xyz/dfdsf23423423" target='_blank'>{{file.filename}}</a>

Categories