Force Meteor To Refresh / Re-render Templates? - javascript

*For reference I'm using iron router.
Instead of a sign in page I have this global sign in form embedded in an nav (aka on every page).
Right now I'm doing a really hacky refresh to reload the page once a user logs in.
I would like to just reload to the template aka not refresh the whole page.
Basically just want the templates rendered function to rerun on login.
Here's my current login code:
'submit #login': function(event, template){
event.preventDefault();
var handle = template.find('#usernameLogin').value;
var secretKey = template.find('#passwordLogin').value;
Meteor.loginWithPassword(handle, secretKey, function(err){
if (err) {
alert(err);
}else{
$('#close').click();
/* replace this with reactive ajax or whatever when you can! */
Meteor._reload.reload();
}
});
},
My render function which I think may be the real issue now:
Template.tournament.rendered = function () {
thisCampaign = this.data;
var self = this;
if (this.data.tournament.live) {
/* if theres a registered user */
if (Meteor.userId()) {
/* Select a winner box */
var participants = $('.participant-id');
var currentParticipant;
var nextRound;
var thisMatch;
var nextMatch;
var bracket;
participants.map(function(index, value){
if ($(value).text() === Meteor.userId()) {
if ($(value).parent().find('.participant-status').text() === 'undetermined') {
nextRound = $(value).parent().find('.participant-round').text();
thisMatch = $(value).parent().find('.participant-match').text();
bracket = $(value).parent().parent().parent().find('.participant');
};
};
});
nextRound = parseInt(nextRound) + 1;
nextMatch = Math.round(parseInt(thisMatch)/2) - 1;
if (parseInt(thisMatch) % 2 != 0) {
currentParticipant = 0;
}else{
currentParticipant = 1;
}
var winnerOptions = '';
var winnerBox = $('<div class="select-winner">');
if (bracket) {
bracket.map(function(index, value) {
winnerOptions += '<span class="winner-option"> '+$(value).find('.participant-title').text()+' <div class="winner-info"> '+$(value).find('a').html()+' </div> </span>'
});
winnerBox.append(winnerOptions);
$($($('.round'+nextRound).find('li')[nextMatch]).find('.participant')[currentParticipant]).removeClass('loser').addClass('undetermined');
$($($('.round'+nextRound).find('li')[nextMatch]).find('.participant')[currentParticipant]).find('a').addClass('tooltip').html(winnerBox);
};
}else{
}
}else{
/* Tournament Start Time */
var tournamentStartTime = function(){
var d = new Date();
var n = d.getTime();
var currentTime = TimeSync.serverTime(n);
var startTime = self.data.card.startTime;
var difference = startTime - currentTime;
var hoursDifference = Math.floor(difference/1000/60/60);
difference -= hoursDifference*1000*60*60
var minutesDifference = Math.floor(difference/1000/60);
difference -= minutesDifference*1000*60
var secondsDifference = Math.floor(difference/1000);
/* if ends (make tournament live server side?) */
if (hoursDifference < 0 || minutesDifference < 0 || secondsDifference < 0) {
Meteor.clearInterval(tStartTime);
Session.set("tournamentStartTime", false);
}else{
if (hoursDifference < 10) {hoursDifference = "0"+hoursDifference;}
if (minutesDifference < 10) {minutesDifference = "0"+minutesDifference;}
if (secondsDifference < 10) {secondsDifference = "0"+secondsDifference;}
var formattedTime = hoursDifference + ':' + minutesDifference + ':' + secondsDifference;
Session.set("tournamentStartTime", formattedTime);
}
};
Session.set("tournamentStartTime", '00:00:00');
tournamentStartTime();
var tStartTime = Meteor.setInterval(tournamentStartTime, 1000);
/* Allow new user sign up */
var alreadySignedUp = false;
var usersSignedUp = $('.participant-id')
usersSignedUp.map(function (index, user) {
if ($(user).text().trim() === Meteor.userId()) {
alreadySignedUp = true;
}
});
if (this.data.card.host != Meteor.user().username && !(alreadySignedUp)) {
var openSlots = [];
var allSlots = $('.participant');
allSlots.map(function (index, participant) {
if ($(participant).find('.participant-title').text().trim() === '' && !($(participant).hasClass('loser'))) {
openSlots.push(participant);
}
});
openSlots.map(function (openSlot, index) {
$(openSlot).removeClass('winner').addClass('undetermined');
});
}
/* if theres a registered user */
if (Meteor.userId()) {
}else{
}
}
};

From what i can see there, your rendered function would not work as you expect as the template may render while the loggingIn state is still occuring...
My suggestion would be to use something along the lines of {{#if currentUser}} page here{{/if}} and then put the code you are trying to run in the rendered in a helper inside that currentUser block that way it would only display and be called if there is a logged in user, otherwise it would not show up and you would not need to re-render the page to perform any of that.
Basically once the user has logged in, any helper (other than rendered) that has the Meteor.userId() or Meteor.user() functions being called would re-run automatically, otherwise you could perform login actions inside a Tracker.autorun function if they are global to your app per client.

Related

One time initialization for JavaScript variable

I am developing a JavaScript for a webpage. This script has to create a popup, when the user is idle for a certain time. When the popup shows up, the user can choose to either close the popup or to minimize it.
In case of closing the popup, further opened pages within the website shall not open the popup anymore. In case of minimizing, it should not do either. Nonetheless when the user has a certain idle time on any page the first time, it shall appear.
It works in so far, that the pop up is created and also the closing of the pop works (and that it does not open anymore). But it does not work a refresh of the page anymore. So the storing does not work. And I know it is, because of my variables and a refresh also restarts the script again, so the initialization of the variables does rewrite the session value.
So basically my question is: How do it do the 1st time initialization of the variables, which than are furtherly used after a refresh?
My code is the following:
var isClosed = new Boolean(false);
var isShrinked = new Boolean(false);
var test = "Tesst";
sessionStorage.setItem("session", isClosed=false);
function close_popup() {
$('#' + 'box').fadeOut('slow');
sessionStorage.setItem("session", isClosed=true);
}
(function idelor(){
document.onclick = document.onmousemove = document.onkeypress = function() {
idleCounter = 0;
};
window.setInterval(function() {
if (sessionStorage.getItem("session").toString() == "false") {
if (isShrinked == false) {
if (++idleCounter >= IDLE_TIMEOUT) {
var scriptCode = document.createElement('p');
scriptCode.id = 'Sentotal';
document.getElementsByTagName('body')[0]
.appendChild(scriptCode);
document.getElementById("Sentotal").innerHTML = boxTotal;
}
}
}
}, interval);}());
You can use cookie for this matter.
//A simple function to get the value stored in cookie;
var getCookie = name =>
document.cookie.split(';')
.map(token => token.trim('').split('='))
.reduce((prev, curr) =>
(prev[curr[0]] = curr[1]) && prev, {})[name],
myPopup = () => {
if (!getCookie('noPopUp')) {
console.log('your popup logic');
document.cookie = 'noPopUp=true'
}
},
reset = () => {
document.cookie = 'noPopUp=true; expires=' + new Date();
};
reset(); //Remove this and myPopUp will do nothing until you restart your browser.
myPopUp();
myPopUp();
Cookie resets (by default) when browser closes.
I used Hin Fan Chan's suggestion of using cookies, and have the following, stable working solution coded:
Just two variables now as constant names for the Cookies:
var CLOSE_CONSTANT = "CloseCookie";
var MINIMIZE_CONSTANT = "MinimizeCookie";
Simple functions for creating and getting the cookies. Note, that "error" in getCookie(...) is very important for the initialization of the script!:
function setCookie(name, state) {
var cname = name;
var value = state;
document.cookie = cname + "=" + value;
}
function getCookie(cname) {
var name = cname + "=";
var ca = document.cookie.split(';');
for (var i = 0; i < ca.length; i++) {
var c = ca[i];
while (c.charAt(0) == ' ') {
c = c.substring(1);
}
if (c.indexOf(name) == 0) {
return c.substring(name.length, c.length);
}
}
return "error";
}
I also build a cookieChecker(...) function, which automatically fills error-Cookies (those, who are not existing atmo) with "false":
function cookieChecker(name) {
switch (getCookie(name)) {
case "error":
setCookie(name, "false");
break;
default:
break;
}
}
Now the final function, which is the only function being opened by the HTML. cookieChecker(...) is used twice: For the minimization and the closing of the popup. These two functions simply set the state of a cookie to true (and fading the box out):
(function idelor() {
var minutes = false;
var interval = minutes ? 60000 : 1000;
var IDLE_TIMEOUT = 5;
var idleCounter = 0;
document.onclick = document.onmousemove = document.onkeypress = function() {
idleCounter = 0;
};
cookieChecker(MINIMIZE_CONSTANT);
cookieChecker(CLOSE_CONSTANT);
window
.setInterval(
function() {
switch (getCookie(CLOSE_CONSTANT) + " "
+ getCookie(MINIMIZE_CONSTANT)) {
case "false false":
if (++idleCounter >= IDLE_TIMEOUT) {
var scriptCode = document.createElement('p');
scriptCode.id = 'Sentotal';
document.getElementsByTagName('body')[0]
.appendChild(scriptCode);
document.getElementById("Sentotal").innerHTML = BOXTOTAL;
}
default: break;
}
}, interval);
}())

How can I get an AJAX call to NOT finish before my handlebar template is done loading?

I am using a handlebars template to stick some info on my nav bar. The info is coming from a Rails controller via an AJAX call. The AJAX, given that it's asynchronous, is finishing after the template has received it's variables, thus a variable that should be set in the AJAX call never does so. Here's the code:
export default {
name: "data-menu-item",
initialize: function(container) {
$(document).ready(function() {
var source = $("#notification-menu-item").html();
var template = Handlebars.compile(source);
var user = Discourse.User.current();
var pro = false;
var logged_user = false;
var data_url = "";
$.ajax("/custom_group_names", {
type: 'GET'
}).done(function(res){
if(res.custom_group_names){
console.log(res.group_names);
for (var i=0; i < res.group_names.length; i++) {
// Agents, Brokers, ManagingBrokers, MortageBrokers, admins
if (res.group_names[i]["name"] === "Brokers" || res.group_names[i]["name"] === "ManagingBrokers" || res.group_names[i]["name"] === "MortageBrokers") {
console.log("groups were brokers, etc.");
pro = true;
data_url = "twobydev.com/brokerdashboard";
} else if (res.group_names[i]["name"] === "admins" || res.group_names[i]["name"] === "Agents") {
console.log("groups were admin or agents");
pro = true;
data_url = "twobydev.com/agentdashboard";
console.log(pro);
console.log(data_url);
}
}
}
});
if(user) {
logged_user = true;
if(user.total_unread_notifications > 0) {
new_notification_class = "new-notifications"
notification_count = "(" + user.total_unread_notifications + ")";
}
}
var html = template({pro: pro, logged_user: logged_user, data_url: data_url});
$('body').prepend(html);
});
}
}
logged_user gets set because it is outside of the ajax call, however, I need pro and data_url to be set as well. Any advice or help is much appreciated!
Move the template processing into the ajax callback
function processTemplate(){
var html = template({pro: pro, logged_user: logged_user, data_url: data_url});
$('body').prepend(html);
}
$.ajax({
.....
}).done(function(res){
/* existing processing code */
// now process template
processTemplate()
});
You just need to move this...
if(user) {
logged_user = true;
if(user.total_unread_notifications > 0) {
new_notification_class = "new-notifications"
notification_count = "(" + user.total_unread_notifications + ")";
}
}
var html = template({pro: pro, logged_user: logged_user, data_url: data_url});
$('body').prepend(html);
inside of your done function.

Correct order in for loop using Parse

I want to create a array containing objects, and I'm using Parse to query all the data.
However, the for loop which loops over the results doesn't does that in the correct order but randomly loops over the data. If I log i each iteration, the logs show different results every time.
Here is my code:
for (var i = 0; i < results.length; i++)
{
Parse.Cloud.useMasterKey();
// retrieve params
var objectid = results[i];
var self = request.params.userid;
// start query
var Payment = Parse.Object.extend("Payments");
var query = new Parse.Query(Payment);
query.get(objectid, {
success: function (payment) {
// get all the correct variables
var from_user_id = payment.get("from_user_id");
var to_user_id = payment.get("to_user_id");
var amount = payment.get("amount");
var createdAt = payment.updatedAt;
var note = payment.get("note");
var img = payment.get("photo");
var location = payment.get("location");
var status = payment.get("status");
var fromquery = new Parse.Query(Parse.User);
fromquery.get(from_user_id, {
success: function(userObject) {
var fromusername = userObject.get("name");
var currency = userObject.get("currency");
var toquery = new Parse.Query(Parse.User);
toquery.get(to_user_id, {
success: function(touser)
{
var tousername = touser.get("name");
if(tousername !== null || tousername !== "")
{
sendArray(tousername);
}
},
error: function(touser, error)
{
var tousername = to_user_id;
if(tousername !== null || tousername !== "")
{
sendArray(tousername);
}
}
});
function sendArray(tousername) {
var array = new Array();
// create the time and date
var day = createdAt.getDate();
var year = createdAt.getFullYear();
var month = createdAt.getMonth();
var hour = createdAt.getHours();
var minutes = createdAt.getMinutes();
// create the timestamp
var time = "" + hour + ":" + minutes;
var date = "" + day + " " + month + " " + year;
var associativeArray = {};
if(self == from_user_id)
{
fromusername = "self";
}
if(self == to_user_id)
{
tousername = "self";
}
associativeArray["from"] = fromusername;
associativeArray["to"] = tousername;
associativeArray["amount"] = amount;
associativeArray["currency"] = currency;
associativeArray["date"] = date;
associativeArray["time"] = time;
associativeArray["status"] = status;
if(note == "" || note == null)
{
associativeArray["note"] = null;
}
else
{
associativeArray["note"] = note;
}
if(img == "" || img == null)
{
associativeArray["img"] = null;
}
else
{
associativeArray["img"] = img;
}
if(location == "" || location == null)
{
associativeArray["location"] = null;
}
else
{
associativeArray["location"] = location;
}
array[i] = associativeArray;
if((i + 1) == results.length)
{
response.success(array);
}
},
error: function(userObject, error)
{
response.error(106);
}
});
},
error: function(payment, error) {
response.error(125);
}
});
}
But the i var is always set to seven, so the associative arrays are appended at array[7] instead of the correct i (like 1,2,3,4,5)
The reason that this is so important is because I want to order the payment chronologically (which I have done in the query providing the results).
What can I do to solve this issue?
Success is a callback that happens at a later point in time. So what happens is, the for loop runs 7 times and calls parse 7 times. Then after it has run each of parse success calls will be executed, they look at i which is now at 7.
A simple way to fix this is to wrap the whole thing in an immediate function and create a new closure for i. Something like this
for(var i = 0; i < results.length; i++){
function(iClosure) {
//rest of code goes here, replace i's with iClosure
}(i);
}
Now what will happen is that each success function will have access to it's own iClosure variable and they will be set to the value of i at the point they were created in the loop.

Cookie Code not removing opened element of cookie

var openClose = $('.openClose');
openClose.on('click', function() {
var cook = ReadCookie('slideHide'),
miniParent = $(this).parent().parent().parent().children('.main-content'),
miniDisp = miniParent.css('display');
if (miniDisp ==="block") {
KillCookie('slideHide');
$(this).parent().parent().parent().children('.main-content').slideUp();
var slide = cook + "," + "#"+$(this)
.parent()
.parent()
.parent()
.parent().attr("id") +
" #"+$(this).parent()
.parent().parent().attr("id");
SetCookie('slideHide', slide, 100);
}
else
{
$(this).parent().parent().parent().children('.main-content').slideDown();
var newCookie=[];
var a= $('.module').children('.main-content').filter(":hidden");
for(var i=0;i<a.length;i++){
var d = $(a[i++]);
var c = "#"+d.parent('.module').attr('id');
}
newCookie= c;
console.log(newCookie);
KillCookie('slideHide');
SetCookie('slideHide',d, 100);
}
});
These are my cookie functions:
function SetCookie(cookieName,cookieValue,nDays) {
var today = new Date();
var expire = new Date();
if (nDays==null || nDays==0) nDays=1;
expire.setTime(today.getTime() + 3600000*24*nDays);
document.cookie = cookieName+"="+escape(cookieValue)
+ ";expires="+expire.toGMTString(),';path = /';
}
function KillCookie(cookieName) {
SetCookie(cookieName,"", - 1);
}
function ReadCookie(cookieName) {
var theCookie=""+document.cookie;
var ind=theCookie.indexOf(cookieName+"=");
if (ind==-1 || cookieName=="") return "";
var ind1=theCookie.indexOf(";",ind);
if (ind1==-1) ind1=theCookie.length;
return unescape(theCookie.substring(ind+cookieName.length+1,ind1));
}
Setting the cookie to make it slideUp and stay hidden works, but when I try to open it, it slidesDown, then I refresh the page it doesn't stay open like it should.
To sort of get the picture - http://jsfiddle.net/zRT9u/
If you need to know more please ask me I am willing to provide more!
I edited the javascript it almost works but I am not getting all the objects that I need
NEW EDIT- Tried the $.map() function but when I open one, and refresh all of them are now open?
else {
$(this).parent().parent().parent().children('.main-content').slideDown();
KillCookie('slideHide');
var newCookie=[];
var a= $('.module').children('.main-content').filter(":hidden");
var c = $.map(a,function(n,i){
return $(n).parent().attr('id');
});
newCookie= c;
SetCookie('slideHide',newCookie, 100);
}
Fixed it by using $.map and .join()
var openClose = $('.openClose');
openClose.on('click', function() {
var cook = ReadCookie('slideHide'),
miniParent = $(this).parent().parent().parent().children('.main-content'),
miniDisp = miniParent.css('display');
if (miniDisp ==="block") {
KillCookie('slideHide');
$(this).parent().parent().parent().children('.main-content').slideUp();
var slide = cook+","+ "#"+$(this).parent().parent().parent().attr("id");
SetCookie('slideHide', slide, 100);
} else {
$(this).parent().parent().parent().children('.main-content').slideDown();
KillCookie('slideHide');
var newCookie=[],
a= $('.module').children('.main-content').filter(":hidden"),
c = $.map(a,function(n,i){
return "#"+$(n).parent().attr('id');
});
newCookie= c.join(',');
SetCookie('slideHide',newCookie, 100);
}
});
By creating a "global" array and then using the $.map function as well as adding "#"+ to the map function I was able to get the actual ID names. Then I set newCookie to c.join(',') and everything works perfectly after that!

How to extract text from a PDF in JavaScript

I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.

Categories