I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.
Related
I am trying to customize the Split node in Node-Red in a way to send the next message only when the first arrives to the Join node; as I am doing some processing in between, and would like to process each msg separately before joining them.
So I have cloned the Split node from Node-Red project, and at the part where the splitting of an array happens; I register listeners to events (random IDs generated by the original Split node).
else if (Array.isArray(msg.payload)) { // then split array into messages
msg.parts.type = 'array';
var count = msg.payload.length / node.arraySplt;
if (Math.floor(count) !== count) {
count = Math.ceil(count);
}
msg.parts.count = count;
var pos = 0;
var data = msg.payload;
msg.parts.len = node.arraySplt;
for (let i = 0; i < count; i++) {
msg.payload = data.slice(pos, pos + node.arraySplt);
if (node.arraySplt === 1) {
msg.payload = msg.payload[0];
}
msg.parts.index = i;
pos += node.arraySplt;
if (i === 0) {
send(RED.util.cloneMessage(msg));
continue;
}
let eventName = msg.parts.id + '-' + i;
addHandler(eventName, send, msg);
}
done();
}
My handler function
function addHandler(eventName, send, msg) {
RED.events.addListener(eventName, () => {
send(RED.util.cloneMessage(msg));
});
}
And at the Join node (which is in the same js file)
// commented below is the original code of the join function
// if ((tcnt > 0 && group.currentCount > tcnt) || msg.hasOwnProperty('complete')) {
// completeSend(partId);
// return;
// } else
if (msg.parts.index <= (msg.parts.count - 2)) {
let eventName = msg.parts.id + '-' + (parseInt(msg.parts.index) + 1);
RED.events.emit(eventName);
} else if (msg.parts.index >= (msg.parts.count - 1)) {
completeSend(partId);
return;
}
However, this would send the first msg (as I directly send it, and not through an event), and the last msg only; it would skip whatever in between.
Modifying the split node is the wrong way to do this.
There are rate limiting nodes that will do this for you. e.g. https://flows.nodered.org/node/node-red-contrib-semaphore
Or you can use the delay node with it's release function
Is it possible to compare filenames for a set of files that are imported as Photoshop layers ?
I have a folder of 50 jpg images which I have used in a PSD file.
Now I want to check whether all the JPG files are used or not ?
Is it possible to do so ?
As I've said, Photoshop scripting can help you achieve this by using File Objects and basic javascript knowledge. I've modified my old script as you've desired and now it should work well with any nested groups and images.
I highly encourage you to learn scripting and ask questions here wherever you feels confused.
Save below code as 'Script.jsx' and run it from 'File > Scripts > Browse'
Update 2 : Now it saves log.txt file too as per you requested. P.S. Learn from this script and tweak it to your desired result.
// Managing Document
var docs = app.documents;
// Progress Bar
var win = new Window("window{text:'Progress',bounds:[100,100,400,150],bar:Progressbar{bounds:[20,20,280,31] , value:0,maxvalue:100}};");
// assigning activeDocument
if (docs.length != 0) {
var docRef = app.activeDocument;
// Defining the folder
alert("You will be prompted for the folder containing your images.\n" +
"Files will be selected with a '.png'/'.jpg/.jpeg' on the end in the same folder.");
var folder = Folder.selectDialog();
if (!folder) {
exit;
}
var photoFiles = folder.getFiles(/\.(jpg|jpeg|png)$/i);
var matchFiles = [];
var photoFilesName = [];
//Searching for used images
var increment = parseFloat(0);
var divider = parseFloat(100/photoFiles.length);
win.show();
for (var i = 0; i < photoFiles.length; i++) {
increment = increment + divider;
var indexPhotoName = removeExtension(photoFiles[i].displayName);
photoFilesName.push(indexPhotoName);
var doc = activeDocument;
var curLayer;
goThroughLayers(doc, indexPhotoName);
}
function goThroughLayers(parentLayer, targetName) {
for (var i = 0; i < parentLayer.layers.length; i++) {
curLayer = parentLayer.layers[i];
doc.activeLayer = curLayer;
if (curLayer.typename == 'LayerSet') {
goThroughLayers(curLayer, targetName)
} else {
if (curLayer.name == targetName) {
// if (curLayer.name.match(/[e]/ig)) {
matchFiles.push(targetName);
// }
} //end if
} //end else
} //end loop
} //end function
function arr_diff(a1, a2) {
var a = [],
diff = [];
for (var i = 0; i < a1.length; i++) {
a[a1[i]] = true;
}
for (var i = 0; i < a2.length; i++) {
if (a[a2[i]]) {
delete a[a2[i]];
} else {
a[a2[i]] = true;
}
}
for (var k in a) {
diff.push(k);
}
return diff;
}
function removeExtension(str) {
return str.split('.').slice(0, -1).join('.');
}
var missItems = arr_diff(matchFiles, photoFilesName);
if (missItems.length > 0) {
var missFolder = new Folder(photoFiles[0].path + '/Missed%20Files');
if(!missFolder.exists){
missFolder.create();
}
for (var y = 0; y < photoFiles.length; y++) {
var photoTrimName = removeExtension(photoFiles[y].displayName);
for( var x = 0; x < missItems.length ; x++){
if(photoTrimName == missItems[x]){
photoFiles[y].copy(new File(missFolder+'/'+photoFiles[y].displayName));
}
}
};
win.close();
alert("You've missed total " + missItems.length + " files. Press OK to open folder containing missing files. Log report is generated wherever PSD is saved.");
var FileStr = "";
for(var m=0; m<missItems.length; m++){
FileStr = FileStr + '\n' + (m+1) + '. ' + missItems[m];
}
var str = "Your missed files are : " + FileStr;
saveTxt(str);
missFolder.execute();
} else {
win.close();
saveTxt('All Photos are used');
alert('All Photos are used');
}
} else {
alert('Open atleast one document');
}
function saveTxt(txt)
{
var Name = "LogReport_" + app.activeDocument.name.replace(/\.[^\.]+$/, '');
var Ext = decodeURI(app.activeDocument.name).replace(/^.*\./,'');
if (Ext.toLowerCase() != 'psd')
return;
var Path = app.activeDocument.path;
var saveFile = File(Path + "/" + Name +".txt");
if(saveFile.exists)
saveFile.remove();
saveFile.encoding = "UTF8";
saveFile.open("e", "TEXT", "????");
saveFile.writeln(txt);
saveFile.close();
}
In Javascript, it is possible to get some information related to PSD file layers using PSD.js library
We're being asked to host a number of iframe buster scripts on our site - they allow ads which are served from external domains into iframes to expand outside of them into the host page. Our hosting provider's warned us to watch out for security holes in these scripts. Specifically, they say some of them create cross-site scripting holes by allowing a piece of Javascript to be loaded into our site from any URL.
To implement the script, you host an HTML page on your site. I'm looking at an example from the ad provider Atlas. In this case the URL is like http://domain.com/atlas/atlas_rm.htm. That page contains a script tag with src at an external URL, and here's the JS it includes:
var ARMIfbLib = function () {
function documentWrite(htmlString) {
document.write(htmlString);
}
function writeIframeBustingScript() {
var imgSrvPath = getTlDirectoryFromQueryString(getParameterString());
if (imgSrvPath != "") {
var scriptURL = imgSrvPath + getScriptFileName();
ARMIfbLib.DocumentWrite("<script language='javascript' type='text/javascript' src='" + scriptURL + "'></scr" + "ipt>");
}
}
return {
WriteIframeBustingScript: writeIframeBustingScript,
DocumentWrite: documentWrite
}
}();
function getValueFromDelimitedString(paramKey, delimiter, queryString) {
if (paramKey == "imgSrv")
return getValueFromProperties();
var re = new RegExp(paramKey + "=" + "(.*?)" + "(" + delimiter + "|$)");
var matchArray = queryString.match(re);
if (matchArray == null)
return "";
else
return matchArray[1];
}
function getValueFromProperties() {
var iframename = unescape(self.name);
if (iframename.indexOf("<form") >= 0) {
var params = iframename.split("<input ");
for (var i = 1; i < params.length; i++) {
var parts = params[i].split(" ");
for (var j = 0; j < parts.length; j++) {
var param = parts[j].split("=");
if (param[0].indexOf("name") >= 0 && param[1].indexOf("TL_files_path") >= 0) {
param = parts[j + 1].split("=");
if (param[0].indexOf("value") >= 0) {
var value = param[1].substr(1, param[1].indexOf(">"));
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return unescape(value);
}
}
}
}
}
else if (iframename.indexOf("adparamdelim") >= 0) {
var params = iframename.split("adparamdelim");
for (var i = 0; i < params.length; i++) {
var param = params[i].split("=");
if (param[0].indexOf("TL_files_path") >= 0) {
var value = param[1];
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
}
}
}
else if (/^\{.*\}$/.test(iframename)) {
try {
eval('var results = ' + iframename);
var value = results.TL_files_path;
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
} catch (e) {
return "";
}
} else {
var params = iframename.split("&");
for (var i = 0; i < params.length; i++) {
var param = params[i].split("=");
if (param[0].indexOf("TL_files_path") >= 0) {
var value = unescape(param[1]);
value = value.substr(value, value.lastIndexOf("/"));
value = value.substr(value, value.lastIndexOf("/") + 1);
return value;
}
}
}
return "";
}
function getTlDirectoryFromQueryString(sLocation) {
var queryVar = getValueFromDelimitedString("imgSrv", "a4edelim", sLocation);
var temp = queryVar.substr(0, queryVar.lastIndexOf("/"));
var tlDir = temp.substr(0, temp.lastIndexOf("/") + 1);
return tlDir;
}
function getDocumentQueryString() {
return window.location.search;
}
function getIframeParameterString() {
var ret = "";
var qs = getDocumentQueryString();
if (qs.length > 0)
ret = qs.substring(1);
return ret;
}
function getScriptParameterString() {
var ret = "";
var scripts = document.getElementsByTagName('script');
for (var i = 0; i < scripts.length; i++) {
var scriptSrc = scripts[i].src;
if (scriptSrc.toLowerCase().indexOf("newiframescript") != -1 && scriptSrc.indexOf("?") != -1) {
ret = scriptSrc.substr(scriptSrc.indexOf("?") + 1);
break;
}
}
return ret;
}
function getParameterString() {
var qs = getIframeParameterString();
if (qs.length > 0 && qs.indexOf("a4edelim") > 0)
return qs;
return getScriptParameterString();
}
function getScriptFileName() {
var armdelim = ",";
var fileName = "ifb.0";
var queryString = getParameterString();
var parmValue = "";
if (queryString.length > 0) {
parmValue = getValueFromDelimitedString("armver", "a4edelim", queryString);
}
if (parmValue.length > 0) {
var fileNames = parmValue.split(armdelim);
for (var i = 0; i < fileNames.length; i++) {
if (fileNames[i].toLowerCase().indexOf("ifb") != -1) {
fileName = fileNames[i];
break;
}
}
}
return fileName + ".js";
}
if (typeof(armTestMode) == "undefined") {
ARMIfbLib.WriteIframeBustingScript();
}
I've spent a couple of hours studying this to try and work out what it's doing, but I've got bogged down in the different function calls. It seems to be grabbing a query string parameter or else a value from the name of an iframe, presumably the iframe the contains the ad.
Can anyone understand what this JS is doing? Does it look fairly safe from a XSS point of view?
=========================================
EDIT
In case useful to anybody else, we mentioned this concern to the providers, and their response was:
The iframe buster page will only work if it is in an iframe
The code in the ftlocal.html file will only work if the domain of the iframe is already the same as the domain of the parent page – So any code would already have access to the parent page anyway
The the JS script creates a dynamically generated script tag in your page.
ARMIfbLib.DocumentWrite("<script language='javascript' type='text/javascript' src='" + scriptURL + "'></scr" + "ipt>");
If you dig into where scriptURL comes from, it appears to be a parameter passed to window.location.search (the query string).
From what I can see this effectively allows any script to be passed to your page on the query string rendering it vulnerable to DOM XSS, unless it is effectively secured to allow the domain to be set by the frame name in your page. I'd do some testing using your own domains and passing the query string variables that are searched for (the string literals in the JS).
I am using an old version of UltraWebGrid by Infragistics and need to replace some of the built in javascript. The compiled js looks like its adding a bunch of functions to an object type as an api of sorts. formatted like:
var igtbl_ptsBand = ["functionname1",function(){...},"functionname2",function(){...},...
and so on. How would I override this?
Basically the control is adding html to the page in a way that is not compatible with newer browsers and the javascript code that does this just needs a little tweak. I found the code... I just need to change it.
The code can be found here
I added an answer to dump code examples in and whatnot. I will not select this answer
Similar SO question
The array you mentioned seems to be a function table of sorts:
var igtbl_ptsBand = ["func1", function() { }, "func2", function() { } ]
I would recommend using chaining instead of just an override. With chaining you can inject your own code, but still call the original function. Let's say you want to replace "func2" and chain. You could do something like this:
var origFunc, findex, ix;
if (igtbl_ptsBand.indexOf) {
// indexOf is supported, use it
findex = igtbl_ptsBand.indexOf("func2") + 1;
} else {
// Crippled browser such as IE, no indexOf, use loop
findex = -1;
for (ix = 0; ix < igtbl_ptsBand.length; ix += 2) {
if (igtbl_ptsBand[ix] === "func2") {
findex = ix + 1;
break;
}
}
}
if (findex >= 0) {
// Found it, chain
origFunc = igtbl_ptsBand[findex];
igtbl_ptsBand[findex] = function() {
// Your new pre-code here
// Call original func (chain)
origFunc();
// Your new post-code here
};
}
origFunc may have arguments, of course, and you may want to use the JavaScript call() function to set the "this pointer" to something specific, e.g.:
origFunc.call(customThis, arg1, arg2...);
If the arguments are in an array, you can use apply() instead of call().
I would not recommend doing this. You should always try to work with a third party library, not against it. That being said, this should work:
igtbl_ptsBand[igtbl_ptsBand.indexOf("functionYouWantToOverwrite") + 1] = function () {
// your new stuff...
};
Ok here is what I am doing. I will update this with my progress.
This fixes my problem. It turns out I had to apply the functionarray to all child objects of the parent "rows". To do this I added the code in my "Fix Rows" function. I split it up because i am running accross other browser JS error which I am fixing in this js file.
Here is the js file that I added to my .net page like so...
</form>
<script type="text/javascript" src="../../scripts/BrowserCompat.js"></script>
</body>
</html>
.
Brows_FixUltraWebGrid();
function Brows_FixUltraWebGrid() {
FixRows();
}
function FixRows() {
FixGridRows_render();
for (var i = 0; i < igtbl_ptsRows.length; i += 2)
igtbl_Rows.prototype[igtbl_ptsRows[i]] = igtbl_ptsRows[i + 1];
}
function FixGridRows_render() {
var origFunc, findex, ix;
if (igtbl_ptsRows.indexOf) {
// indexOf is supported, use it
findex = igtbl_ptsRows.indexOf("render") + 1;
} else {
// Crippled browser such as IE, no indexOf, use loop
findex = -1;
for (ix = 0; ix < igtbl_ptsRows.length; ix += 2) {
if (igtbl_ptsRows[ix] === "render") {
findex = ix + 1;
break;
}
}
}
if (findex >= 0) {
// Found it, chain
origFunc = igtbl_ptsRows[findex];
igtbl_ptsRows[findex] = function() {
// Your new pre-code here
// Call original func (chain)
//origFunc();
// Your new post-code here
var strTransform = this.applyXslToNode(this.Node);
if (strTransform) {
var anId = (this.AddNewRow ? this.AddNewRow.Id : null);
//new logic to include tbody if it is not there
var tadd1 = '';
var tadd2 = '';
if (!(/\<tbody\>/.test(strTransform))) {
tadd1 = '<tbody>';
tadd2 = '</tbody>';
}
this.Grid._innerObj.innerHTML =
"<table style=\"table-layout:fixed;\">" + tadd1 + strTransform + tadd2 + "</table>";
//old line
//this.Grid._innerObj.innerHTML = "<table style=\"table-layout:fixed;\">" + strTransform + "</table>";
var tbl = this.Element.parentNode;
igtbl_replaceChild(tbl, this.Grid._innerObj.firstChild.firstChild, this.Element);
igtbl_fixDOEXml();
var _b = this.Band;
var headerDiv = igtbl_getElementById(this.Grid.Id + "_hdiv");
var footerDiv = igtbl_getElementById(this.Grid.Id + "_fdiv");
if (this.AddNewRow) {
if (_b.Index > 0 || _b.AddNewRowView == 1 && !headerDiv || _b.AddNewRowView == 2 && !footerDiv) {
var anr = this.AddNewRow.Element;
anr.parentNode.removeChild(anr);
if (_b.AddNewRowView == 1 && tbl.tBodies[0].rows.length > 0)
tbl.tBodies[0].insertBefore(anr, tbl.tBodies[0].rows[0]);
else
tbl.tBodies[0].appendChild(anr);
}
this.AddNewRow.Element = igtbl_getElementById(anId);
this.AddNewRow.Element.Object = this.AddNewRow;
}
this.Element = tbl.tBodies[0];
this.Element.Object = this;
this._setupFilterRow();
for (var i = 0; i < this.Band.Columns.length; i++) {
var column = this.Band.Columns[i];
if (column.Selected && column.hasCells()) {
var col = this.getColumn(i);
if (col)
igtbl_selColRI(this.Grid.Id, col, this.Band.Index, i);
}
}
if (this.ParentRow) {
this.ParentRow.ChildRowsCount = this.length;
this.ParentRow.VisChildRowsCount = this.length;
}
}
console.log('overridden row render function executed');
};
}
}
So I have been able to figure out in Alfresco, there is a form called skin.css that allows me to change the highlighted color of data table items. However, I only want to be able to change this property during the course of a workflow and not as it applies to all data list elements throughout the entire Share website.
To start, I have a script which kicks off based on a rule and moves any updated/new files into a specified folder and then kicks off a workflow for that file. Within starting the workflow, the package items list is populated with all the documents within the same folder as the document that just got moved/the workflow started on. Below is the script:
function main()
{
var counter=0;
//Administrative Adjudication space/folder MUST exist under companyhome.
var rootSpaceName = companyhome.childByNamePath("mainFolder");
//If the rootspacename is null (not previously created), then exit the program as we have nothing to do.
if(rootSpaceName == null)
{
logger.log("Company Home/mainFolder does not exist, so we have nothing to do.");
return;
}
else
{
logger.log("Company Home/mainFolder exists, so carry on our process.");
//Creates an array of all the children under the rootSpaceName
var childList = rootSpaceName.children;
//Creates a variable which counts the number of children in the childList array
var count = childList.length;
//var seconds = new Date().getTime() / 1000;
//If there are no children in the rootSpaceName folder, exit the program.
if(count == 0)
{
logger.log("Company Home/mainFolder does not have child, nothing to do.");
return;
}
else
{
for(var i = 0; i < count; i++)
{
//Title MUST exist.
var childTitle = childList[i].properties["hearing:childTitle"];
//Author MUST exist.
var childAuthor = childList[i].properties["hearing:childAuthor"];
logger.log("childTitle: " + childTitle);
logger.log("childAuthor: " + childAuthor);
if(childTitle == null || childAuthor == null)
{
logger.log(i + ". Both the childTitle and childAuthor are null...");
continue;
}
var child = childList[i];
if(child.isContainer == false)
{
for(var j = 0; j < count; j++)
{
var newChildName = childList[j].properties.name;
logger.log("New child name: " + newChildName);
var newChild = childList[j];
if((newChild.isContainer == true) && (childTitle == newChildName))
{
logger.log("There is a currently existing folder with the same name as the title of original child");
var newSpaceName = rootSpaceName.childByNamePath(newChildName);
var newChildList = newSpaceName.children;
var newCount = newChildList.length;
for(var k = 0; k < newCount; k++)
{
var newNewChildName = newChildList[k].properties.name;
var newNewchildAuthor = newChildList[k].properties.author;
var newNewChild = newChildList[k];
if((newNewChild.isContainer == true) && (newNewchildAuthor == childAuthor))
{
var currentSpace = newSpaceName.childByNamePath(newNewChildName);
if(child.isDocument == true)
{
//Only want the workflow to run once so we increment count
counter=counter+1;
child.move(currentSpace);
//If Count is 1, then run workflow
if(counter==1)
{
//starts HelloWorldUI workflow
var wfdef=workflow.getDefinitionByName("activiti$helloWorldUI");
if(wfdef)
{
var wfparams=new Array();
wfparams["bpm:workflowDescription"]="";
wfparams["bpm:groupAssignee"]=people.getGroup("GROUP_Managers");
var wfpackage=workflow.createPackage();
var rootSpaceName=currentSpace;
var childList=rootSpaceName.children;
var count=childList.length;
//add all existing documents in the space to the workflow
for(var i = 0; i < count; i++)
{
wfpackage.addNode(childList[i]);
}
var wfpath=wfdef.startWorkflow(wfpackage,wfparams);
var tasks=wfpath.getTasks();
for each(task in tasks)
{
task.endTask(null);
}
}
}
}
}
}
}
else
{
// If title folder is already created, not need to create again.
var newSpaceName = companyhome.childByNamePath("mainFolder/" + childTitle);
if(newSpaceName == null)
{
newSpaceName = rootSpaceName.createFolder(childTitle);
logger.log("mainFolder/" + childTitle + " is created.");
}
// If author folder is already created, not need to create again.
var newNewSpaceName = companyhome.childByNamePath("mainFolder/" + childTitle + "/" + childAuthor);
if(newNewSpaceName == null)
{
newNewSpaceName = newSpaceName.createFolder(childAuthor);
logger.log("mainFolder/" + childTitle + "/" + childAuthor + " is created.");
}
if(child.isDocument == true)
{
counter=counter + 1;
child.move(newNewSpaceName);
if(counter == 1)
{
var wfdef=workflow.getDefinitionByName("activiti$helloWorldUI");
if(wfdef)
{
var wfparams=new Array();
wfparams["bpm:workflowDescription"]="";
wfparams["bpm:groupAssignee"]=people.getGroup("GROUP_Managers");
var wfpackage=workflow.createPackage();
var rootSpaceName=newNewSpaceName;
var childList=rootSpaceName.children;
var count=childList.length;
//add all items from the space to the workflow
for(var i = 0; i <c ount; i++)
{
wfpackage.addNode(childList[i]);
}
var wfpath=wfdef.startWorkflow(wfpackage,wfparams);
var tasks=wfpath.getTasks();
for each(task in tasks)
{
task.endTask(null);
}
}
}
logger.log("Moving file " + child.properties.name);
}
}
}
}
}
}
}
return;
}
main();
I would like to be able to create a function of some sort that can be called to access the skin.css file only during the course of the workflow and basically set .yui-skin-default tr.yui-dt-first{background-color:#FFF} in the CSS file. Does anyone know how I would go about doing that?
If you want to change only in start workflow page,
your css should write in start-workflow.css which is pointed by start-workflow.get.head.ftl. This css will override in other css file like skin.css.
Like this way, you can override any css to affect in only start workflow page not others.
You can try for other workflow related pages.
I've found a bookmarklet that will allow you to inject a CSS file on any page you'd like. Only down side is that you'll have to run it every time you load your page.
http://allben.net/post/2010/01/30/CSS-JavaScript-Injection-Bookmarklets.aspx