Remove Duplicate Entires from Cookies read by Javascript - javascript

I have this script below that I'm using to set and read the last 5 viewed pages using JavaScript. The client does not want render any duplicate URL/Text, but I'm not having any luck with what I have tried so far.
Maybe I'm going about it all wrong. Any help would be appreciated.
// Set read, set & delete cookie functions-------------------------------------------------------------------------
function getCookie (cookie_name){
var results = document.cookie.match ( '(^|;) ?' + cookie_name + '=([^;]*)(;|$)' );
if (results) {
return ( unescape ( results[2] ) );
} else {
return null;
}
}
function setCookie (name,value,expiredays){
var exdate = new Date();
exdate.setDate(exdate.getDate()+expiredays);
document.cookie = name+"="+escape(value)+((expiredays==null)?"":";expires="+exdate.toGMTString());
}
function delete_cookie (cookie_name) {
var cookie_date = new Date ( ); // current date & time
cookie_date.setTime ( cookie_date.getTime() - 1 );
document.cookie = cookie_name += "=; expires=" + cookie_date.toGMTString();
}
// Set last 5 visited pages cookies --------------------------------------------------------------------------------
tlvp_the_last_visited_pages();
// function to get info from cookies for last five pages.
// Needs to be seperate from getCookie function for parsing reasons.
function fetchCookie(name){
if(document.cookie.length>0){
start = document.cookie.indexOf(name+"=");
if(start!=-1){
start = start+name.length+1;
end = document.cookie.indexOf(";",start);
if(end==-1){
end = document.cookie.length;
}
return unescape(document.cookie.substring(start,end));
}
}
return "";
}
function tlvp_the_last_visited_pages(){
tlvp_div = document.getElementById('the_last_visited_pages');
if(tlvp_pages_count > 0){
for(var i = tlvp_pages_count; i >= 0; i--){
if(i > 0){
setCookie("tlvp_visited_page"+i+"_link",fetchCookie("tlvp_visited_page"+(i-1)+"_link"),tlvp_expiredays);
setCookie("tlvp_visited_page"+i+"_title",fetchCookie("tlvp_visited_page"+(i-1)+"_title"),tlvp_expiredays);
} else {
setCookie("tlvp_visited_page"+i+"_link",document.URL,tlvp_expiredays);
setCookie("tlvp_visited_page"+i+"_title",document.title,tlvp_expiredays);
}
}
}
// This is where the code is created for the div...
tlvp_last_visited_pages_title = document.createElement("div");
tlvp_last_visited_pages_title.className = "tlvp_title";
tlvp_last_visited_pages_title_text = document.createTextNode(tlvp_title);
tlvp_last_visited_pages_title.appendChild(tlvp_last_visited_pages_title_text);
tlvp_div.appendChild(tlvp_last_visited_pages_title);
tlvp_last_visited_pages_content = document.createElement("div");
tlvp_last_visited_pages_content.className = "tlvp_content";
// Loops through the cookies and creates text links...
for(var i=1; i<=tlvp_pages_count; i++){
var e = fetchCookie("tlvp_visited_page"+i+"_link");
if (e != "") {
tlvp_visited_page_line = document.createElement("p");
tlvp_visited_page_a = document.createElement("a");
tlvp_visited_page_a.href = getCookie("tlvp_visited_page"+i+"_link");
tlvp_visited_page_text = document.createTextNode(getCookie("tlvp_visited_page"+i+"_title"));
tlvp_visited_page_a.appendChild(tlvp_visited_page_text);
tlvp_visited_page_line.appendChild(tlvp_visited_page_a);
tlvp_last_visited_pages_content.appendChild(tlvp_visited_page_line);
}
}
tlvp_div.appendChild(tlvp_last_visited_pages_content);
}

You could try storing the values as JSON in one cookie.
// Set read, set & delete cookie functions-------------------------------------------------------------------------
function getCookie (cookie_name){
var results = document.cookie.match ( '(^|;) ?' + cookie_name + '=([^;]*)(;|$)' );
if (results) {
return ( unescape ( results[2] ) );
} else {
return null;
}
}
function setCookie (name,value,expiredays){
var exdate = new Date();
exdate.setDate(exdate.getDate()+expiredays);
document.cookie = name+"="+escape(value)+((expiredays==null)?"":";expires="+exdate.toGMTString());
}
function delete_cookie (cookie_name) {
var cookie_date = new Date ( ); // current date & time
cookie_date.setTime ( cookie_date.getTime() - 1 );
document.cookie = cookie_name += "=; expires=" + cookie_date.toGMTString();
}
// Set last 5 visited pages cookies --------------------------------------------------------------------------------
function last_visited() {
var max_urls = 5;
var cookie = getCookie("last_visited");
var url = window.location.href;
// Get the JSON cookie or a new array.
var urls = (cookie != null) ? JSON.parse(cookie) : [];
// Build new_urls out of history that is not this url.
var new_urls = [];
for (var i=0; i < urls.length; i++) {
if (urls[i].url != url) {
new_urls.push(urls[i]);
}
}
// remove the last item if the array is full.
if (new_urls.length == max_urls) {
new_urls.pop();
}
// Add this url to the front.
new_urls.unshift({url: url, title: document.title});
// Save it
setCookie("last_visited", JSON.stringify(new_urls),1);
// Create html
var html = "<ul>\n";
for (var i = 0; i < new_urls.length; i++) {
html += "<li>" + new_urls[i].title + "</li>\n"
}
html += "</ul>\n";
// Render html.
var el = document.getElementById("last_visited");
el.innerHTML = html;
}
window.onload = function () {
last_visited();
};

Related

How can get only the last element from an array javascript

I'm trying to get the only last visited page by the user, it should update on every page. I tried to save the URL in session cookie from there I'm retrieving the last visited page.
This is my code:
function getSecondCookie(cSname) {
var name = cSname + "=";
var ca = document.cookie.split(';');
for ( var i = 0; i < ca.length; i++) {
var c = ca[i].trim();
if (c.indexOf(name) == 0)
return c.substring(name.length, c.length);
}
return "";
}
function checkHistory(targetId) {
var history = getSecondCookie("history");
var htmlContent = '';
if (history != "") {
var insert = true;
var sp = history.toString().split(",");
for ( var i = sp.length - 1; i >= 0; i--) {
htmlContent += '<a class="previous_url" href="'
+ sp[i]
+ '">'
+ sp[i].substring(sp[i].lastIndexOf('/') + 1) + '</a><br>';
if (sp[i] == document.URL) {
insert = false;
}
document.getElementById(targetId).innerHTML = htmlContent;
console.log(sp[i]);
}
if (insert) {
sp.push(document.URL);
}
setSecondCookie("history", sp.toString(), 30);
} else {
var stack = new Array();
stack.push(document.URL);
setSecondCookie("history", stack.toString(), 30);
}
}
This is working fine how it has to.
At the moment it is showing like this:
https://www.example.com,https://www.example.com/about.html
and so on.
But I want here show only last element from an array. How can I do this?
let lastItem = array[array.length-1];
You can also do:
const last = array.slice(-1)[0]

Brainlabs adwords script remove firstpagemaxbid restriction

Can some body help me modify this script.
The purpose of the script is to change bids for the keywords based on average position. One of the assumptions that the script has is that it sets a firstpagebid for the keyword but it won't allow for the bid to go below the firstpagebid even if the position is too high.
Is there a way to remove this restriction? so basically if the new cpc calculated is lower than the first page bid then it allows for the new cpc to be lower than the firstpage bid.
/**
*
* Average Position Bidding Tool
*
* This script changes keyword bids so that they target specified positions,
* based on recent performance.
*
* Version: 1.5
* Updated 2015-09-28 to correct for report column name changes
* Updated 2016-02-05 to correct label reading, add extra checks and
* be able to adjust maximum bid increases and decreases separately
* Updated 2016-08-30 to correct label reading from reports
* Updated 2016-09-14 to update keywords in batches
* Updated 2016-10-26 to avoid DriveApp bug
* Google AdWords Script maintained on brainlabsdigital.com
*
**/
// Options
var maxBid = 14.50;
// Bids will not be increased past this maximum.
var minBid = 3.0;
// Bids will not be decreased below this minimum.
var firstPageMaxBid = 10.00;
// The script avoids reducing a keyword's bid below its first page bid estimate. If you think
// Google's first page bid estimates are too high then use this to overrule them.
var dataFile = "AveragePositionData.txt";
// This name is used to create a file in your Google Drive to store today's performance so far,
// for reference the next time the script is run.
var useFirstPageBidsOnKeywordsWithNoImpressions = true;
// If this is true, then if a keyword has had no impressions since the last time the script was run
// its bid will be increased to the first page bid estimate (or the firsPageMaxBid if that is smaller).
// If this is false, keywords with no recent impressions will be left alone.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Advanced Options
var bidIncreaseProportion = 0.20;
var bidDecreaseProportion = 0.25;
var targetPositionTolerance = 0.3;
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function main() {
var fieldJoin = ",";
var lineJoin = "$";
var idJoin = "#";
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/*var files = DriveApp.getFilesByName(dataFile);
if (!files.hasNext()) {
var file = DriveApp.createFile(dataFile,"\n");
Logger.log("File '" + dataFile + "' has been created.");
} else {
var file = files.next();
if (files.hasNext()) {
Logger.log("Error - more than one file named '" + dataFile + "'");
return;
}
Logger.log("File '" + dataFile + "' has been read.");
}*/
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Get the current date/time
var currentTime = new Date(Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), "MMM dd,yyyy HH:mm:ss"));
var days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
var hourOfDay = currentTime.getHours();
var dayOfWeek = days[currentTime.getDay()]; //Added on 9/21/2015
// Prevent adjustments if not in between 8am and 11pm and Diffrent running time by date - Added on 9/21/2015 (important allows to set time based on day)
switch (dayOfWeek) {
case 'Monday':
case 'Tuesday':
case 'Wednesday':
case 'Thursday':
case 'Friday':
if (hourOfDay < 8 || hourOfDay >= 21) {
Logger.log("Not the Right Time");
return;
}
break;
case 'Saturday':
case 'Sunday':
if (hourOfDay < 8 || hourOfDay >= 18) {
Logger.log("Not the Right Time");
return;
}
break;
}
Logger.log("Right Time");
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
var labelIds = [];
var labelIterator = AdWordsApp.labels()
.withCondition("KeywordsCount > 0")
.withCondition("LabelName CONTAINS_IGNORE_CASE 'Position '")
.get();
while (labelIterator.hasNext()) {
var label = labelIterator.next();
if (label.getName().substr(0,"position ".length).toLowerCase() == "position ") {
labelIds.push(label.getId());
}
}
if (labelIds.length == 0) {
Logger.log("No position labels found.");
return;
}
Logger.log(labelIds.length + " position labels have been found.");
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
var keywordData = {
//UniqueId1: {LastHour: {Impressions: , AveragePosition: }, ThisHour: {Impressions: , AveragePosition: },
//CpcBid: , FirstPageCpc: , MaxBid, MinBid, FirstPageMaxBid, PositionTarget: , CurrentAveragePosition:,
//Criteria: }
}
var ids = [];
var uniqueIds = [];
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
var report = AdWordsApp.report(
'SELECT Id, Criteria, AdGroupId, AdGroupName, CampaignName, Impressions, AveragePosition, CpcBid, FirstPageCpc, Labels, BiddingStrategyType ' +
'FROM KEYWORDS_PERFORMANCE_REPORT ' +
'WHERE Status = ENABLED AND AdGroupStatus = ENABLED AND CampaignStatus = ENABLED ' +
'AND LabelIds CONTAINS_ANY [' + labelIds.join(",") + '] ' +
'AND AdNetworkType2 = SEARCH ' +
'AND Device NOT_IN ["HIGH_END_MOBILE"] ' +
'DURING TODAY'
);
var rows = report.rows();
while(rows.hasNext()){
var row = rows.next();
if (row["BiddingStrategyType"] != "cpc") {
if (row["BiddingStrategyType"] == "Enhanced CPC"
|| row["BiddingStrategyType"] == "Target search page location"
|| row["BiddingStrategyType"] == "Target Outranking Share"
|| row["BiddingStrategyType"] == "None"
|| row["BiddingStrategyType"] == "unknown") {
Logger.log("Warning: keyword " + row["Criteria"] + "' in campaign '" + row["CampaignName"] +
"' uses '" + row["BiddingStrategyType"] + "' rather than manual CPC. This may overrule keyword bids and interfere with the script working.");
} else {
Logger.log("Warning: keyword " + row["Criteria"] + "' in campaign '" + row["CampaignName"] +
"' uses the bidding strategy '" + row["BiddingStrategyType"] + "' rather than manual CPC. This keyword will be skipped.");
continue;
}
}
var positionTarget = "";
if (row["Labels"].trim() == "--") {
continue;
}
var labels = JSON.parse(row["Labels"].toLowerCase()); // Labels are returned as a JSON formatted string
for (var i=0; i<labels.length; i++) {
if (labels[i].substr(0,"position ".length) == "position ") {
var positionTarget = parseFloat(labels[i].substr("position ".length-1).replace(/,/g,"."),10);
break;
}
}
if (positionTarget == "") {
continue;
}
if (integrityCheck(positionTarget) == -1) {
Logger.log("Invalid position target '" + positionTarget + "' for keyword '" + row["Criteria"] + "' in campaign '" + row["CampaignName"] + "'");
continue;
}
ids.push(parseFloat(row['Id'],10));
var uniqueId = row['AdGroupId'] + idJoin + row['Id'];
uniqueIds.push(uniqueId);
keywordData[uniqueId] = {};
keywordData[uniqueId]['Criteria'] = row['Criteria'];
keywordData[uniqueId]['ThisHour'] = {};
keywordData[uniqueId]['ThisHour']['Impressions'] = parseFloat(row['Impressions'].replace(/,/g,""),10);
keywordData[uniqueId]['ThisHour']['AveragePosition'] = parseFloat(row['AveragePosition'].replace(/,/g,""),10);
keywordData[uniqueId]['CpcBid'] = parseFloat(row['CpcBid'].replace(/,/g,""),10);
keywordData[uniqueId]['FirstPageCpc'] = parseFloat(row['FirstPageCpc'].replace(/,/g,""),10);
setPositionTargets(uniqueId, positionTarget);
}
Logger.log(uniqueIds.length + " labelled keywords found");
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
setBidChange();
setMinMaxBids();
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/* var currentHour = parseInt(Utilities.formatDate(new Date(), AdWordsApp.currentAccount().getTimeZone(), "HH"), 10);
if (currentHour != 0) {
var data = file.getBlob().getDataAsString();
var data = data.split(lineJoin);
for(var i = 0; i < data.length; i++){
data[i] = data[i].split(fieldJoin);
var uniqueId = data[i][0];
if(keywordData.hasOwnProperty(uniqueId)){
keywordData[uniqueId]['LastHour'] = {};
keywordData[uniqueId]['LastHour']['Impressions'] = parseFloat(data[i][1],10);
keywordData[uniqueId]['LastHour']['AveragePosition'] = parseFloat(data[i][2],10);
}
}
}*/
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
findCurrentAveragePosition();
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
//Batch the keyword IDs, as the iterator can't take them all at once
var idBatches = [];
var batchSize = 5000;
for (var i=0; i<uniqueIds.length; i += batchSize) {
idBatches.push(uniqueIds.slice(i,i+batchSize));
}
Logger.log("Updating keywords");
// Update each batch
for (var i=0; i<idBatches.length; i++) {
try {
updateKeywords(idBatches[i]);
} catch (e) {
Logger.log("Error updating keywords: " + e);
Logger.log("Retrying after one minute.");
Utilities.sleep(60000);
updateKeywords(idBatches[i]);
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Logger.log("Writing file.");
// var content = resultsString();
// file.setContent(content);
Logger.log("Finished.");
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Functions
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function integrityCheck(target){
var n = parseFloat(target, 10);
if(!isNaN(n) && n >= 1){
return n;
}
else{
return -1;
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function setPositionTargets(uniqueId, target){
if(target !== -1){
keywordData[uniqueId]['HigherPositionTarget'] = Math.max(target-targetPositionTolerance, 1);
keywordData[uniqueId]['LowerPositionTarget'] = target+targetPositionTolerance;
}
else{
keywordData[uniqueId]['HigherPositionTarget'] = -1;
keywordData[uniqueId]['LowerPositionTarget'] = -1;
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function bidChange(uniqueId){
var newBid = -1;
if(keywordData[uniqueId]['HigherPositionTarget'] === -1){
return newBid;
}
var cpcBid = keywordData[uniqueId]['CpcBid'];
var minBid = keywordData[uniqueId]['MinBid'];
var maxBid = keywordData[uniqueId]['MaxBid'];
if (isNaN(keywordData[uniqueId]['FirstPageCpc'])) {
Logger.log("Warning: first page CPC estimate is not a number for keyword '" + keywordData[uniqueId]['Criteria'] + "'. This keyword will be skipped");
return -1;
}
var firstPageBid = Math.min(keywordData[uniqueId]['FirstPageCpc'], keywordData[uniqueId]['FirstPageMaxBid'], maxBid);
var currentPosition = keywordData[uniqueId]['CurrentAveragePosition'];
var higherPositionTarget = keywordData[uniqueId]['HigherPositionTarget'];
var lowerPositionTarget = keywordData[uniqueId]['LowerPositionTarget'];
var bidIncrease = keywordData[uniqueId]['BidIncrease'];
var bidDecrease = keywordData[uniqueId]['BidDecrease'];
if((currentPosition > lowerPositionTarget) && (currentPosition !== 0)){
var linearBidModel = Math.min(2*bidIncrease,(2*bidIncrease/lowerPositionTarget)*(currentPosition-lowerPositionTarget));
var newBid = Math.min((cpcBid + linearBidModel), maxBid);
}
if((currentPosition < higherPositionTarget) && (currentPosition !== 0)) {
var linearBidModel = Math.min(2*bidDecrease,((-4)*bidDecrease/higherPositionTarget)*(currentPosition-higherPositionTarget));
var newBid = Math.max((cpcBid-linearBidModel),minBid);
if (cpcBid > firstPageBid) {
var newBid = Math.max(firstPageBid,newBid);
}
}
if((currentPosition === 0) && useFirstPageBidsOnKeywordsWithNoImpressions && (cpcBid < firstPageBid)){
var newBid = firstPageBid;
}
if (isNaN(newBid)) {
Logger.log("Warning: new bid is not a number for keyword '" + keywordData[uniqueId]['Criteria'] + "'. This keyword will be skipped");
return -1;
}
return newBid;
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function findCurrentAveragePosition(){
for(var x in keywordData){
if(keywordData[x].hasOwnProperty('LastHour')){
keywordData[x]['CurrentAveragePosition'] = calculateAveragePosition(keywordData[x]);
} else {
keywordData[x]['CurrentAveragePosition'] = keywordData[x]['ThisHour']['AveragePosition'];
}
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function calculateAveragePosition(keywordDataElement){
var lastHourImpressions = keywordDataElement['LastHour']['Impressions'];
var lastHourAveragePosition = keywordDataElement['LastHour']['AveragePosition'];
var thisHourImpressions = keywordDataElement['ThisHour']['Impressions'];
var thisHourAveragePosition = keywordDataElement['ThisHour']['AveragePosition'];
if(thisHourImpressions == lastHourImpressions){
return 0;
}
else{
var currentPosition = (thisHourImpressions*thisHourAveragePosition-lastHourImpressions*lastHourAveragePosition)/(thisHourImpressions-lastHourImpressions);
if (currentPosition < 1) {
return 0;
} else {
return currentPosition;
}
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function keywordUniqueId(keyword){
var id = keyword.getId();
var idsIndex = ids.indexOf(id);
if(idsIndex === ids.lastIndexOf(id)){
return uniqueIds[idsIndex];
}
else{
var adGroupId = keyword.getAdGroup().getId();
return adGroupId + idJoin + id;
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function setMinMaxBids(){
for(var x in keywordData){
keywordData[x]['MinBid'] = minBid;
keywordData[x]['MaxBid'] = maxBid;
keywordData[x]['FirstPageMaxBid'] = firstPageMaxBid;
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function setBidChange(){
for(var x in keywordData){
keywordData[x]['BidIncrease'] = keywordData[x]['CpcBid'] * bidIncreaseProportion/2;
keywordData[x]['BidDecrease'] = keywordData[x]['CpcBid'] * bidDecreaseProportion/2;
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
function updateKeywords(idBatch) {
var keywordIterator = AdWordsApp.keywords()
.withIds(idBatch.map(function(str){return str.split(idJoin);}))
.get();
while(keywordIterator.hasNext()){
var keyword = keywordIterator.next();
var uniqueId = keywordUniqueId(keyword);
var newBid = bidChange(uniqueId);
if(newBid !== -1){
keyword.setMaxCpc(newBid);
}
}
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/*function resultsString(){
var results = [];
for(var uniqueId in keywordData){
var resultsRow = [uniqueId, keywordData[uniqueId]['ThisHour']['Impressions'], keywordData[uniqueId]['ThisHour']['AveragePosition']];
results.push(resultsRow.join(fieldJoin));
}
return results.join(lineJoin);
}*/
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
}
As I understand, the script increases the cpcBid when the current average position is too high, and decreases it when the position is too low.
But when the bid is decreased and the previous bid is more than firstPageBid, the new bid will not decrease below firstPageBid.
Remove
if (cpcBid > firstPageBid) {
var newBid = Math.max(firstPageBid,newBid);
}
to allow your new bid to go lower than firstPageBid.

Correct order in for loop using Parse

I want to create a array containing objects, and I'm using Parse to query all the data.
However, the for loop which loops over the results doesn't does that in the correct order but randomly loops over the data. If I log i each iteration, the logs show different results every time.
Here is my code:
for (var i = 0; i < results.length; i++)
{
Parse.Cloud.useMasterKey();
// retrieve params
var objectid = results[i];
var self = request.params.userid;
// start query
var Payment = Parse.Object.extend("Payments");
var query = new Parse.Query(Payment);
query.get(objectid, {
success: function (payment) {
// get all the correct variables
var from_user_id = payment.get("from_user_id");
var to_user_id = payment.get("to_user_id");
var amount = payment.get("amount");
var createdAt = payment.updatedAt;
var note = payment.get("note");
var img = payment.get("photo");
var location = payment.get("location");
var status = payment.get("status");
var fromquery = new Parse.Query(Parse.User);
fromquery.get(from_user_id, {
success: function(userObject) {
var fromusername = userObject.get("name");
var currency = userObject.get("currency");
var toquery = new Parse.Query(Parse.User);
toquery.get(to_user_id, {
success: function(touser)
{
var tousername = touser.get("name");
if(tousername !== null || tousername !== "")
{
sendArray(tousername);
}
},
error: function(touser, error)
{
var tousername = to_user_id;
if(tousername !== null || tousername !== "")
{
sendArray(tousername);
}
}
});
function sendArray(tousername) {
var array = new Array();
// create the time and date
var day = createdAt.getDate();
var year = createdAt.getFullYear();
var month = createdAt.getMonth();
var hour = createdAt.getHours();
var minutes = createdAt.getMinutes();
// create the timestamp
var time = "" + hour + ":" + minutes;
var date = "" + day + " " + month + " " + year;
var associativeArray = {};
if(self == from_user_id)
{
fromusername = "self";
}
if(self == to_user_id)
{
tousername = "self";
}
associativeArray["from"] = fromusername;
associativeArray["to"] = tousername;
associativeArray["amount"] = amount;
associativeArray["currency"] = currency;
associativeArray["date"] = date;
associativeArray["time"] = time;
associativeArray["status"] = status;
if(note == "" || note == null)
{
associativeArray["note"] = null;
}
else
{
associativeArray["note"] = note;
}
if(img == "" || img == null)
{
associativeArray["img"] = null;
}
else
{
associativeArray["img"] = img;
}
if(location == "" || location == null)
{
associativeArray["location"] = null;
}
else
{
associativeArray["location"] = location;
}
array[i] = associativeArray;
if((i + 1) == results.length)
{
response.success(array);
}
},
error: function(userObject, error)
{
response.error(106);
}
});
},
error: function(payment, error) {
response.error(125);
}
});
}
But the i var is always set to seven, so the associative arrays are appended at array[7] instead of the correct i (like 1,2,3,4,5)
The reason that this is so important is because I want to order the payment chronologically (which I have done in the query providing the results).
What can I do to solve this issue?
Success is a callback that happens at a later point in time. So what happens is, the for loop runs 7 times and calls parse 7 times. Then after it has run each of parse success calls will be executed, they look at i which is now at 7.
A simple way to fix this is to wrap the whole thing in an immediate function and create a new closure for i. Something like this
for(var i = 0; i < results.length; i++){
function(iClosure) {
//rest of code goes here, replace i's with iClosure
}(i);
}
Now what will happen is that each success function will have access to it's own iClosure variable and they will be set to the value of i at the point they were created in the loop.

Does this iframe buster script look safe?

We're being asked to host a number of iframe buster scripts on our site - they allow ads which are served from external domains into iframes to expand outside of them into the host page. Our hosting provider's warned us to watch out for security holes in these scripts. Specifically, they say some of them create cross-site scripting holes by allowing a piece of Javascript to be loaded into our site from any URL.
To implement the script, you host an HTML page on your site. I'm looking at an example from the ad provider Atlas. In this case the URL is like http://domain.com/atlas/atlas_rm.htm. That page contains a script tag with src at an external URL, and here's the JS it includes:
var ARMIfbLib = function () {
function documentWrite(htmlString) {
document.write(htmlString);
}
function writeIframeBustingScript() {
var imgSrvPath = getTlDirectoryFromQueryString(getParameterString());
if (imgSrvPath != "") {
var scriptURL = imgSrvPath + getScriptFileName();
ARMIfbLib.DocumentWrite("<script language='javascript' type='text/javascript' src='" + scriptURL + "'></scr" + "ipt>");
}
}
return {
WriteIframeBustingScript: writeIframeBustingScript,
DocumentWrite: documentWrite
}
}();
function getValueFromDelimitedString(paramKey, delimiter, queryString) {
if (paramKey == "imgSrv")
return getValueFromProperties();
var re = new RegExp(paramKey + "=" + "(.*?)" + "(" + delimiter + "|$)");
var matchArray = queryString.match(re);
if (matchArray == null)
return "";
else
return matchArray[1];
}
function getValueFromProperties() {
var iframename = unescape(self.name);
if (iframename.indexOf("<form") >= 0) {
var params = iframename.split("<input ");
for (var i = 1; i < params.length; i++) {
var parts = params[i].split(" ");
for (var j = 0; j < parts.length; j++) {
var param = parts[j].split("=");
if (param[0].indexOf("name") >= 0 && param[1].indexOf("TL_files_path") >= 0) {
param = parts[j + 1].split("=");
if (param[0].indexOf("value") >= 0) {
var value = param[1].substr(1, param[1].indexOf(">"));
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return unescape(value);
}
}
}
}
}
else if (iframename.indexOf("adparamdelim") >= 0) {
var params = iframename.split("adparamdelim");
for (var i = 0; i < params.length; i++) {
var param = params[i].split("=");
if (param[0].indexOf("TL_files_path") >= 0) {
var value = param[1];
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
}
}
}
else if (/^\{.*\}$/.test(iframename)) {
try {
eval('var results = ' + iframename);
var value = results.TL_files_path;
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
} catch (e) {
return "";
}
} else {
var params = iframename.split("&");
for (var i = 0; i < params.length; i++) {
var param = params[i].split("=");
if (param[0].indexOf("TL_files_path") >= 0) {
var value = unescape(param[1]);
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
}
}
}
return "";
}
function getTlDirectoryFromQueryString(sLocation) {
var queryVar = getValueFromDelimitedString("imgSrv", "a4edelim", sLocation);
var temp = queryVar.substr(0, queryVar.lastIndexOf("/"));
var tlDir = temp.substr(0, temp.lastIndexOf("/") + 1);
return tlDir;
}
function getDocumentQueryString() {
return window.location.search;
}
function getIframeParameterString() {
var ret = "";
var qs = getDocumentQueryString();
if (qs.length > 0)
ret = qs.substring(1);
return ret;
}
function getScriptParameterString() {
var ret = "";
var scripts = document.getElementsByTagName('script');
for (var i = 0; i < scripts.length; i++) {
var scriptSrc = scripts[i].src;
if (scriptSrc.toLowerCase().indexOf("newiframescript") != -1 && scriptSrc.indexOf("?") != -1) {
ret = scriptSrc.substr(scriptSrc.indexOf("?") + 1);
break;
}
}
return ret;
}
function getParameterString() {
var qs = getIframeParameterString();
if (qs.length > 0 && qs.indexOf("a4edelim") > 0)
return qs;
return getScriptParameterString();
}
function getScriptFileName() {
var armdelim = ",";
var fileName = "ifb.0";
var queryString = getParameterString();
var parmValue = "";
if (queryString.length > 0) {
parmValue = getValueFromDelimitedString("armver", "a4edelim", queryString);
}
if (parmValue.length > 0) {
var fileNames = parmValue.split(armdelim);
for (var i = 0; i < fileNames.length; i++) {
if (fileNames[i].toLowerCase().indexOf("ifb") != -1) {
fileName = fileNames[i];
break;
}
}
}
return fileName + ".js";
}
if (typeof(armTestMode) == "undefined") {
ARMIfbLib.WriteIframeBustingScript();
}
I've spent a couple of hours studying this to try and work out what it's doing, but I've got bogged down in the different function calls. It seems to be grabbing a query string parameter or else a value from the name of an iframe, presumably the iframe the contains the ad.
Can anyone understand what this JS is doing? Does it look fairly safe from a XSS point of view?
=========================================
EDIT
In case useful to anybody else, we mentioned this concern to the providers, and their response was:
The iframe buster page will only work if it is in an iframe
The code in the ftlocal.html file will only work if the domain of the iframe is already the same as the domain of the parent page – So any code would already have access to the parent page anyway
The the JS script creates a dynamically generated script tag in your page.
ARMIfbLib.DocumentWrite("<script language='javascript' type='text/javascript' src='" + scriptURL + "'></scr" + "ipt>");
If you dig into where scriptURL comes from, it appears to be a parameter passed to window.location.search (the query string).
From what I can see this effectively allows any script to be passed to your page on the query string rendering it vulnerable to DOM XSS, unless it is effectively secured to allow the domain to be set by the frame name in your page. I'd do some testing using your own domains and passing the query string variables that are searched for (the string literals in the JS).

How to extract text from a PDF in JavaScript

I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.

Categories