Struggling with the below script. I modified a script found so that it extracts all stories as txt files and saves the txt file with the filename of the the text in the paragraph.
It turned out we didn't want all stories extracted however the ones we did want had a set paragraph style for the first paragraph of story.
The part im struggling with is the syntax of the if statement that checks what the currently applied paragraph style is.
any help appreciated, sorry if my problem is unclear
main();
function main(){
//Make certain that user interaction (display of dialogs, etc.) is turned on.
app.scriptPreferences.userInteractionLevel = UserInteractionLevels.interactWithAll;
if(app.documents.length != 0){
if (app.activeDocument.stories.length != 0){
myDisplayDialog();
}
else{
alert("The document does not contain any text. Please open a document containing text and try again.");
}
}
else{
alert("No documents are open. Please open a document and try again.");
}
}
function myDisplayDialog(){
with(myDialog = app.dialogs.add({name:"ExportAllStories"})){
//Add a dialog column.
myDialogColumn = dialogColumns.add()
with(myDialogColumn){
with(borderPanels.add()){
staticTexts.add({staticLabel:"Export as:"});
with(myExportFormatButtons = radiobuttonGroups.add()){
radiobuttonControls.add({staticLabel:"Text Only", checkedState:true});
radiobuttonControls.add({staticLabel:"RTF"});
radiobuttonControls.add({staticLabel:"InDesign Tagged Text"});
}
}
}
myReturn = myDialog.show();
if (myReturn == true){
//Get the values from the dialog box.
myExportFormat = myExportFormatButtons.selectedButton;
myDialog.destroy;
myFolder= Folder.selectDialog ("Choose a Folder");
if((myFolder != null)&&(app.activeDocument.stories.length !=0)){
myExportAllStories(myExportFormat, myFolder);
}
}
else{
myDialog.destroy();
}
}
}
//myExportStories function takes care of exporting the stories.
//myExportFormat is a number from 0-2, where 0 = text only, 1 = rtf, and 3 = tagged text.
//myFolder is a reference to the folder in which you want to save your files.
function myExportAllStories(myExportFormat, myFolder){
for(myCounter = 0; myCounter < app.activeDocument.stories.length; myCounter++){
myStory = app.activeDocument.stories.item(myCounter);
myID = myStory.id;
switch(myExportFormat){
case 0:
myFormat = ExportFormat.textType;
myExtension = ".txt"
break;
case 1:
myFormat = ExportFormat.RTF;
myExtension = ".rtf"
break;
case 2:
myFormat = ExportFormat.taggedText;
myExtension = ".txt"
break;
}
if(myStory.paragraphs[0].appliedParagraphStyle = "PRODUCT HEADING"){
myFileName = myStory.paragraphs[0].contents;
myFilePath = myFolder + "/" + myFileName;
myFile = new File(myFilePath);
myStory.exportFile(myFormat, myFile);
}
}
}
The type of appliedParagraphStyle is [Object ParagraphStyle], so you need to compare it against either another paragraph style (i.e., app.activeDocument.paragraphStyles.item("PRODUCT HEADING") which does return a paragraph style), or compare the names of the styles.
Also, do not use = to test. A single = is 'apply'; to test for (in)equality, use a double ==. (Javascript also has a 'strictly equals' comparison: ===, but in this case it should not be used.)
Your script will work if you change the comparison line to
if (myStory.paragraphs[0].appliedParagraphStyle.name == "PRODUCT HEADING")
Additionally, the line
myFileName = myStory.paragraphs[0].contents;
grabs the entire paragraph to use for a file name, and usually this will include the paragraph return at the end. (The exception is when this paragraph is the last one in a story.) Since you use this string as a new file name, you must remove the paragraph return if it's there. That can be done in several ways, but the easiest is to use a RegEx replace:
myFileName = myStory.paragraphs[0].contents.replace(/\s*$/,'');
because that will also remove all stray spaces and tabs at the end for free.
Related
I'm working on my final project of the Winter 2017 quarter to demonstrate how to use Regular Expressions in both C# and JavaScript code behind pages. I've got the C# version of my demonstration program done, but the JavaScript version is making me pull what little hair I have left on my head out (no small achievement since I got a fresh buzz cut this morning!). The problem involves not getting any output after applying a Regular Expression in a While loop to get each instance of the expression and printing it out.
On my HTML page I have an input textarea, seven radio buttons, an output textarea, and two buttons underneath (one button is to move the output text to the input area to perform multiple iterations of applying expressions, and the other button to clear all textareas for starting from scratch). Each radio button links to a function that applies a regular expression to the text in the input area. Five of my seven functions work; the sixth is the one I can't figure out, and the seventh is essentially the same but with a slightly different RegEx pattern, so if I fix the sixth function, the seventh function will be a snap.
(I tried to insert/upload a JPG of the front end, but the photo upload doesn't seem to be working. Hopefully you get the drift of what I've set up.)
Here are my problem children from my JS code behind:
// RegEx_Demo_JS.js - code behind for RegEx_Demo_JS
var inputString; // Global variable for the input from the input text box.
var pattern; // Global variable for the regular expression.
var result; // Global variable for the result of applying the regular expression to the user input.
// Initializes a new instance of the StringBuilder class
// and appends the given value if supplied
function StringBuilder()
{
var strings = [];
this.append = function (string)
{
string = verify(string);
if (string.length > 0) strings[strings.length] = string;
}
this.appendLine = function (string)
{
string = verify(string);
if (this.isEmpty())
{
if (string.length > 0) strings[strings.length] = string;
else return;
}
else strings[strings.length] = string.length > 0 ? "\r\n" + string : "\r\n";
}
this.clear = function () { strings = []; };
this.isEmpty = function () { return strings.length == 0; };
this.toString = function () { return strings.join(""); };
var verify = function (string)
{
if (!defined(string)) return "";
if (getType(string) != getType(new String())) return String(string);
return string;
}
var defined = function (el)
{
// Changed per Ryan O'Hara's comment:
return el != null && typeof(el) != "undefined";
}
var getType = function (instance)
{
if (!defined(instance.constructor)) throw Error("Unexpected object type");
var type = String(instance.constructor).match(/function\s+(\w+)/);
return defined(type) ? type[1] : "undefined";
}
}
Within the code of the second radio button (which will be the seventh and last function to complete), I tested the ScriptBuilder with data in a local variable, and it ran successfully and produced output into the output textarea. But I get no output from this next function that invokes a While loop:
function RegEx_Match_TheOnly_AllInstances()
{
inputString = document.getElementById("txtUserInput").value;
pattern = /(\s+the\s+)/ig; // Using an Flag (/i) to select either lowercase or uppercase version. Finds first occurrence either as a standalone word or inside a word.
//result = pattern.exec(inputString); // Finds the first index location
var arrResult; // Array for the results of the search.
var sb = getStringBuilder(); // Variable to hold iterations of the result and the text
while ((arrResult = pattern.exec(inputString)) !==null)
{
sb.appendLine = "Match: " + arrResult[0] ;
}
document.getElementById("txtRegExOutput").value = sb.toString();
/* Original code from C# version:
// string pattern = #"\s+(?i)the\s+"; // Same as above, but using Option construct for case insensitive search.
string pattern = #"(^|\s+)(?i)the(\W|\s+)";
MatchCollection matches = Regex.Matches(userTextInput, pattern);
StringBuilder outputString = new StringBuilder();
foreach (Match match in matches)
{
string outputRegExs = "Match: " + "\"" + match.Value + "\"" + " at index [" + match.Index + ","
+ (match.Index + match.Length) + "]" + "\n";
outputString.Append(outputRegExs);
}
txtRegExOutput.Text = outputString.ToString();
*/
} // End RegEx_Match_The_AllInstances
I left the commented code in to show what I had used in the C# code behind version to illustrate what I'm trying to accomplish.
The test input/string I used for this function is:
Don’t go there. If you want to be the Man, you have to beat The Man.
That should return two hits. Ideally, I want it to show the word that it found and the index where it found the word, but at this point I'd be happy to just get some output showing every instance it found, and then build on that with the index and possibly the lastIndex.
So, is my problem in my While loop, the way I'm applying the StringBuilder, or a combination of the two? I know the StringBuilder code works, at least when not being used in a loop and using some test data from the site I found that code. And the code for simply finding the first instance of "the" as a standalone or inside another word does work and returns output, but that doesn't use a loop.
I've looked through Stack Overflow and several other JavaScript websites for inspiration, but nothing I've tried so far has worked. I appreciate any help anyone can provide! (If you need me to post any other code, please advise and I'll be happy to oblige.)
I want to be able to link any word of my choice to a specific URL for example:
I want the word "goat" to link to "http://goat.com" across the entire website. So all "goat"/s will link to that URL right across the website.
I am using wordpress and I have not yet found a plugin to do this. If I can get a solution to this I would most likely create a plugin for this functionality.
I know how to target one word on a single page. But I would like it to be across all the pages and all the words in those pages( I used JavaScript for this).
Something like this may work for you.
function replaceWithUri(textToReplace, element){
element.innerHTML = element.innerHTML.replace(textToReplace, '<a href="http://www.' + textToReplace + '.com" >' + textToReplace + '</a>');
}
replaceWithUri('goat', document.getElementsByTagName('body')[0]);
Here's a crappy solution but it's better than nothing:
I found some code here which searches for a world across the whole page so I copy pasted that and modified it.
The replaceWord variable cannot contain the same string as word, otherwise it'll loop infinitely.
var word = " goat",
replaceWord = " <a href = 'http://goat.com'>goat</a>",
queue = [document.body],
curr
;
while (curr = queue.pop()) {
if (!curr.textContent.match(word)) continue;
for (var i = 0; i < curr.childNodes.length; ++i) {
switch (curr.childNodes[i].nodeType) {
case Node.TEXT_NODE : // 3
if (curr.childNodes[i].textContent.match(word)) {
curr.innerHTML = curr.innerHTML.replace(word,replaceWord);
}
break;
case Node.ELEMENT_NODE : // 1
queue.push(curr.childNodes[i]);
break;
}
}
}
Hello goat
<div>Look a goat</div>
This might be a bit resource intensive and replaceWord cannot contain the same string as word, otherwise it'll loop forever.
document.onload = function() {
var word = " goat",
replaceWord = " <a href = 'http://goat.com'>goat</a>";
while(document.body.innerHTML.indexOf(word) !== -1) {
document.body.innerHTML = document.body.innerHTML.replace(word,replaceWord);
}
}
Hello goat
<div>Look a goat</div>
I've written an extension for firefox which highlights all words on a web page (excluding some words in a given list).
What i've noticed is that (besides that my extension is terribly slow) some web pages get "destroyed", more specifically the layout gets destroyed (particularly websites with overlay advertising or fancy drop-down menus).
My code wraps <span> tags around every "word", or to be precise around every token, because i'm splitting the text nodes with a whitespace as seperator.
So is it possible anyway to realize this task without destroying the page's layout?
I'm iterating over all text nodes, split them, and iterate over every token.
When the token is in my list, i don't highlight it, else i wrap the <span> tag around it.
So any suggestions how this could be done faster would be helpful, too.
Here are some screenshots for a correctly highlighted and a not correctly highlighted web page:
right:
en.wikipedia.org before highlighting,
en.wikipedia.org after highlighting.
wrong:
developer.mozilla.org before highlighting,
developer.mozilla.org after highlighting.
OK. Study this code. It searches for all instances of "is" and highlights if it is not surrounded by word characters. Put this in your scratchpad while this tab is focused. You will see that words like "List" and other words containing "Is" are no highlighted, but all the "Is"'s are.
I basically made an addon here for you. You can now release this as an addon called RegEx FindBar and take all the credit....
var doc = gBrowser.contentDocument;
var ctrler = _getSelectionController(doc.defaultView);
var searchRange = doc.createRange();
searchRange.selectNodeContents(doc.documentElement);
let startPt = searchRange.cloneRange();
startPt.collapse(true);
let endPt = searchRange.cloneRange();
endPt.collapse(false);
let retRane = null;
let finder = Cc["#mozilla.org/embedcomp/rangefind;1"].createInstance().QueryInterface(Ci.nsIFind);
finder.caseSensitive = false;
var i = 0;
while (retRange = finder.Find('is', searchRange, startPt, endPt)) {
i++;
var stCont = retRange.startContainer;
var endCont = retRange.endContainer;
console.log('retRange(' + i + ') = ', retRange);
console.log('var txt = retRange.commonAncestorContainer.data',retRange.commonAncestorContainer.data);
//now test if one posiion before startOffset and one position after endOffset are WORD characters
var isOneCharBeforeStCharWordChar; //var that holds if the character before the start character is a word character
if (retRange.startOffset == 0) {
//no characters befor this characte so obviously not a word char
isOneCharBeforeStCharWordChar = false;
} else {
var oneCharBeforeStChar = stCont.data.substr(retRange.startOffset-1,1);
if (/\w/.test(oneCharBeforeStChar)) {
isOneCharBeforeStCharWordChar = true;
} else {
isOneCharBeforeStCharWordChar = false;
}
console.log('oneCharBeforeStChar',oneCharBeforeStChar);
}
var isOneCharAfterEndCharWordChar; //var that holds if the character before the start character is a word character
if (retRange.endOffset == endCont.length - 1) {
//no characters after this characte so obviously not a word char
isOneCharAfterEndCharWordChar = false;
} else {
var oneCharAferEndChar = endCont.data.substr(retRange.endOffset,1); //no need to subtract 1 from endOffset, it takes into account substr 2nd arg is length and is treated like length I THINK
if (/\w/.test(oneCharAferEndChar)) {
isOneCharAfterEndCharWordChar = true;
} else {
isOneCharAfterEndCharWordChar = false;
}
console.log('oneCharAferEndChar',oneCharAferEndChar);
}
if (isOneCharBeforeStCharWordChar == false && isOneCharAfterEndCharWordChar == false) {
//highlight it as surrounding characters are no word characters
_highlightRange(retRange, ctrler);
console.log('highlighted it as it was not surrounded by word charactes');
} else {
console.log('NOT hilte it as it was not surrounded by word charactes');
}
//break;
startPt = retRange.cloneRange();
startPt.collapse(false);
}
/*********************/
function _getEditableNode(aNode) {
while (aNode) {
if (aNode instanceof Ci.nsIDOMNSEditableElement)
return aNode.editor ? aNode : null;
aNode = aNode.parentNode;
}
return null;
}
function _highlightRange(aRange, aController) {
let node = aRange.startContainer;
let controller = aController;
let editableNode = this._getEditableNode(node);
if (editableNode)
controller = editableNode.editor.selectionController;
let findSelection = controller.getSelection(Ci.nsISelectionController.SELECTION_FIND);
findSelection.addRange(aRange);
if (editableNode) {
// Highlighting added, so cache this editor, and hook up listeners
// to ensure we deal properly with edits within the highlighting
if (!this._editors) {
this._editors = [];
this._stateListeners = [];
}
let existingIndex = this._editors.indexOf(editableNode.editor);
if (existingIndex == -1) {
let x = this._editors.length;
this._editors[x] = editableNode.editor;
this._stateListeners[x] = this._createStateListener();
this._editors[x].addEditActionListener(this);
this._editors[x].addDocumentStateListener(this._stateListeners[x]);
}
}
}
function _getSelectionController(aWindow) {
// display: none iframes don't have a selection controller, see bug 493658
if (!aWindow.innerWidth || !aWindow.innerHeight)
return null;
// Yuck. See bug 138068.
let docShell = aWindow.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsIWebNavigation)
.QueryInterface(Ci.nsIDocShell);
let controller = docShell.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsISelectionDisplay)
.QueryInterface(Ci.nsISelectionController);
return controller;
}
Oh edit my solution out, will update with proper solution, I see you want to highlight all words
This is the code how firefox highlights stuff without changing document: Finder.jsm - _highlight function. You will have to copy this and use it for the whole document, if you need help let me know and I'll do it.
Here was my solution to highlight all matches of single word: https://stackoverflow.com/a/22206366/1828637
Here man this is how you are going to highlight the whole document, I didn't finish the snippet but this is the start of it: Gist - HighlightTextInDocument
Here's the copy paste answer to highlight everything in the document. As you learn more about it share with us, like how you can highlight with a different color, right now its all pink O_O
function _getEditableNode(aNode) {
while (aNode) {
if (aNode instanceof Ci.nsIDOMNSEditableElement)
return aNode.editor ? aNode : null;
aNode = aNode.parentNode;
}
return null;
}
function _highlightRange(aRange, aController) {
let node = aRange.startContainer;
let controller = aController;
let editableNode = this._getEditableNode(node);
if (editableNode)
controller = editableNode.editor.selectionController;
let findSelection = controller.getSelection(Ci.nsISelectionController.SELECTION_FIND);
findSelection.addRange(aRange);
if (editableNode) {
// Highlighting added, so cache this editor, and hook up listeners
// to ensure we deal properly with edits within the highlighting
if (!this._editors) {
this._editors = [];
this._stateListeners = [];
}
let existingIndex = this._editors.indexOf(editableNode.editor);
if (existingIndex == -1) {
let x = this._editors.length;
this._editors[x] = editableNode.editor;
this._stateListeners[x] = this._createStateListener();
this._editors[x].addEditActionListener(this);
this._editors[x].addDocumentStateListener(this._stateListeners[x]);
}
}
}
function _getSelectionController(aWindow) {
// display: none iframes don't have a selection controller, see bug 493658
if (!aWindow.innerWidth || !aWindow.innerHeight)
return null;
// Yuck. See bug 138068.
let docShell = aWindow.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsIWebNavigation)
.QueryInterface(Ci.nsIDocShell);
let controller = docShell.QueryInterface(Ci.nsIInterfaceRequestor)
.getInterface(Ci.nsISelectionDisplay)
.QueryInterface(Ci.nsISelectionController);
return controller;
}
var doc = gBrowser.contentDocument;
var searchRange = doc.createRange();
searchRange.selectNodeContents(doc.documentElement);
_highlightRange(searchRange,_getSelectionController(gBrowser.contentWindow))
#jervis, I can't make a comment on your comment under #Noitidart code as I don't have 50rep yet. So I have to post here.
Re:
I did it with 'gFindBar._highlightDoc(true, word)' now. I'm using firefox 17, so i dont know if gFindBar is state of the art. – jervis 40 mins ago
But I tested his code and and it works.
Don't use gFindBar.
Copy it and then paste it into your Scratchpad.
Why are you using gFindBar._highlightDoc(true, word) ? I thoght you wanted to highlight everything in the document? Where did you get _highlightDoc from? I don't see that anywhere in #Noitidart's code.
Regading yoru comment on iterate all words and use gFindBar._highlightDoc:
I did it with 'gFindBar._highlightDoc(true, word)' now. I'm using firefox 17, so i dont know if gFindBar is state of the art. – jervis 39 mins ago
Dude why do that.... I saw #Noitidart posted a per word solution on the linked topic: gBrowser.tabContainer.childNodes[0].linkedBrowser.finder.highlight(true, 'YOUR_WORD_HERE'); that is extremely easy, one line and no need to create text nodes spans or anything. You have to run this code on each tab you want to highlight in.
I've got a script which takes the filename from a file input on a form and puts it into a text field after stripping away some details which aren't required.
All files I upload will be labeled with a key word
Note that this is for a Chrome extension so X-browser support isn't necessary.
var filename = $("uploaded_data").val().toLowerCase();
filename = filename.replace(/_/g, " ").replace(/-/g, " ").replace("c:\\fakepath\\", "");
type_index[1] = filename.indexOf("red");
type_index[2] = filename.indexOf("blue");
type_index[3] = filename.indexOf("green");
type_index[4] = filename.indexOf("purple");
type_index[5] = filename.indexOf("magenta");
for (i=0 ; i > -1 ; i++ ) {
if (type_index[i] > -1)
{
filename_final = filename.substring(type_index[i]);
break;
}
}
$("#material_title").val(filename_final);
Now this code works fine, however, I don't want the colour to be part of the file name.
For example, if the input file was called 'test_red_name_low.jpg' the text field should be 'name low.jpg'. Currently, the code above outputs 'red name low.jpg'. Other times, the filename might be 'this_is_a_test-blue_happy.jpg', which should output 'happy.jpg'.
The type_index array will eventually hold a very large number of values so a replace would be a very long winded way of doing it.
Any suggestions on a way around this?
This regex will do it.
// add as many colors you like here.
var colors = ['red','green', 'blue', 'magenta', 'purple'];
filename = filename.replace(/-|_|c:\\fakepath\\/g,' ')
.replace(new RegExp("("+colors.join("|")+")", "g"), " ")
.replace(/.* /,'').trim()
What I need to do
I display an iframe with javascript in the body of an HTML page.
With something like that document.write('<iframe ...></iframe'>);
In this iframe there is my javascript function witch search a keyword in the body of the parent document, and replace it with an html link keyword in the parent document.
What I've tried
Javascript Bookmarklet to replace text with a link : complex script, but I need the skipTags
and Javascript .replace command replace page text? : very short and nice script, but there is not the skipTags function...
Those worked like a charm when the script is in the document but not in an iframe to work with the parent document.
My problems/questions
The problem is that the 'keyword' is replaced with a
'non-interpreted' html as text. (Browser displays keyword).
My second question is how to do the replace just once, and not for
all the matching expressions ?
Usualy I use some jQuery but in this project I need to use only some javascript without any library.
Any idea to help me ? (I don't want anyone to "write my code", I just want some advices to make it by myself)
P.S. 1 : I use Chrome, but I would like to make it work in every browser.
P.S. 2 : English is not my first language, so if you don't understand something, don't hesitate to ask it to me, I'll try to explain it better.
Edit 2
First script now works for the HTML, so question 1 is solved, but how to do the replace only once, even if the keyword is repeated several times ? (question 2)
With the help of xiaoyi, I've found some solutions :
Stop the loop and replace only the first match
Globalize the functions to search/replace multiple keywords
I think that it could be optimized, but for me it works like a charm, and I share it with you, if it can help anyone (don't forget to change the target of the document, here "parent") :
(function(){
// don't replace text within these tags
var skipTags = { 'a': 1, 'style': 1, 'script': 1, 'iframe': 1, 'meta':1, 'title':1, 'img':1, 'h':1 };
// find text nodes to apply replFn to
function findKW( el, term, replFn )
{
var child, tag,found=false;
for (var i = 0;i<=el.childNodes.length - 1 && !found; i++)
{
child = el.childNodes[i];
if (child.nodeType == 1)
{ // ELEMENT_NODE
tag = child.nodeName.toLowerCase();
if (!(tag in skipTags))
{
findKW(child, term, replFn);
}
}
else if (child.nodeType == 3)
{ // TEXT_NODE
found=replaceKW(child, term, replFn); // if found=true, we stop the loop
}
}
};
// replace terms in text according to replFn
function replaceKW( text, term, replFn)
{
var match,
matches = [],found=false;
while (match = term.exec(text.data))
{
matches.push(match);
}
for (var i = 0;i<=matches.length - 1 && !found; i++)
{
match = matches[i];
// cut out the text node to replace
text.splitText(match.index);
text.nextSibling.splitText(match[1].length);
text.parentNode.replaceChild(replFn(match[1]), text.nextSibling);
if(matches[i])found=true;// To stop the loop
}
return found;
};
// First search/replace
var replTerm = 'keyword';
findKW(
parent.document.body,
new RegExp('\\b(' + replTerm + ')\\b', 'gi'),
function (match)
{
var link = parent.document.createElement('a');
link.href = 'http://www.okisurf.com/#q=' + replTerm;
link.target = '_blank';
link.innerHTML = match;
return link;
}
);
// A second search/replace
var replTerm = 'word';
findKW(
parent.document.body,
new RegExp('\\b(' + replTerm + ')\\b', 'gi'),
function (match)
{
var link = parent.document.createElement('a');
link.href = 'http://www.okisurf.com/#q=' + replTerm;
link.target = '_blank';
link.innerHTML = match;
return link;
}
);
// Other search/replace
// ...
}());
I've also discovered that the second solution doesn't works with Internet Explorer witch doesn't accept the createTreeWalker() DOM function