Userscript to replace a word in a webpage [duplicate] - javascript

This question already has an answer here:
How to replace lots of words in AJAX-driven page text, and in select attributes -- using a Tampermonkey script?
(1 answer)
Closed 4 years ago.
Disclaimer : I'm a dick in JS, I really don't know it at all, sorry if this is really a noob question.
I found out a userscript that allows you to replace words/phrases on a webpage.
Howevever, I have an error that states "expected a conditional expression and instead saw an assignment" on line 57
The line is :
for (i = 0; text = texts.snapshotItem(i); i += 1) {
The whole script :
(function () { 'use strict';
var words = {
'test 1':'test 2',
'bleh': 'blah'
};
var regexs = [],
replacements = [],
tagsWhitelist = ['PRE', 'BLOCKQUOTE', 'CODE', 'INPUT', 'BUTTON', 'TEXTAREA'],
rIsRegexp = /^\/(.+)\/([gim]+)?$/,
word, text, texts, i, userRegexp;
// prepareRegex by JoeSimmons
// used to take a string and ready it for use in new RegExp()
function prepareRegex (string) {
return string.replace (/([\[\]\^\&\$\.\(\)\?\/\\\+\{\}\|])/g, '\\$1');
}
// function to decide whether a parent tag will have its text replaced or not
function isTagOk (tag) {
return tagsWhitelist.indexOf (tag) === -1;
}
delete words['']; // so the user can add each entry ending with a comma,
// I put an extra empty key/value pair in the object.
// so we need to remove it before continuing
// convert the 'words' JSON object to an Array
for (word in words) {
if (typeof word === 'string' && words.hasOwnProperty (word) ) {
userRegexp = word.match (rIsRegexp);
// add the search/needle/query
if (userRegexp) {
regexs.push (
new RegExp (userRegexp[1], 'g')
);
}
else {
regexs.push (
new RegExp (prepareRegex (word)
.replace (/\\?\*/g, function (fullMatch) {
return fullMatch === '\\*' ? '*' : '[^ ]*';
} ),
'g'
)
);
}
// add the replacement
replacements.push(words[word]);
}
}
// do the replacement
texts = document.evaluate ('//body//text()[ normalize-space(.) != "" ]', document, null, 6, null);
for (i = 0; text = texts.snapshotItem (i); i += 1) {
if (isTagOk (text.parentNode.tagName) ) {
regexs.forEach (function (value, index) {
text.data = text.data.replace (value, replacements[index]);
} );
}
}
} () );
Can someone explain me what it means (I like to understand what I do), and how I can fix this ?

The = is an assignment operator.
Assignment is not valid in this context, because the for loop expects a condition.
You need to use a comparison operator such as ==, <= or >=
One possible fix is:
for (i = 0; text = texts.snapshotItem(i); i += 1) {
to
for (i = 0; text == texts.snapshotItem(i); i += 1) {

Related

Extract Variables From String Regex

This might be a repeat question but I'm not sure how to look for the answer :P
I'm trying to extract and remove variables from a string.
The string might look like this: !text (<123456789>=<#$111111111>) (<7654312> = <#$222222222>) (🛠 =<#$3333333333>) Some text that I will need!
I need the two items in each block?
e.g. [["123456789", 111111111],['7654312','222222222'],["🛠","3333333333"]]
Then I need the string exactly but with the variables removed?
e.g. Some more text that I will need!
I'm not sure of the best way to do this, any help is appreciated.
You don't always have to use regexes, for instance why not write a parser? This gives you much more flexibility. Note that I added <> around the 🛠 for simplicity, but you could make brackets optional in the parser.
The parser assumes anything that isin't within () is free text and captures it as string nodes.
For instance if you wanted only the last text node you could do...
const endingText = parse(text).filter(t => typeof t === 'string').pop();
const text = '!text (<123456789>=<#$111111111>) (<7654312> = <#$222222222>) (<🛠> =<#$3333333333>) Some text that I will need!';
console.log(parse(text));
function parse(input) {
let i = 0, char = input[i], text = [];
const output = [];
while (char) {
if (char === '(') {
if (text.length) output.push(text.join(''));
output.push(entry());
text = [];
} else {
text.push(char);
consume();
}
}
if (text.length) output.push(text.join(''));
return output;
function entry() {
match('(');
const key = value();
whitespace();
match('=');
whitespace();
const val = value();
match(')');
return [key, val];
}
function value() {
const val = [];
match('<');
while (char && char !== '>') val.push(char), consume();
match('>');
return val.join('');
}
function whitespace() {
while (/\s/.test(char)) consume();
}
function consume() {
return char = input[++i];
}
function match(expected) {
if (char !== expected) throw new Error(`Expected '${expected}' at column ${i}.`);
consume();
}
}

If array item includes string but does not include other string then remove string

I have a big block of text which I've split by new line, so each item in the array is a line of text.
I'm looping through these lines and trying to detect where a line includes a </mark> but doesn't include a <mark>, and if this condition has been met then it removes the </mark> (as it's missing an opening tag).
final_formatted_log_split = logtext.split("\n");
for (i = 0, l = final_formatted_log_split.length; i < l; i++) {
if (final_formatted_log_split[i].includes("<mark>") === false) {
if (final_formatted_log_split[i].includes("</mark>") === true) {
var removed_mark = final_formatted_log_split[i].replace("</mark>", "");
}
}
}
var final_formatted_log = final_formatted_log_split.join("\n");
console.log(final_formatted_log);
and this console log still includes in the text where it doesn't include a
Just to be super clear, the expected outcome is the following:
if a line is like this:
line of text here</mark>
then it needs to remove the </mark> because it does not contain an opening <mark>
I suspect it is to do with the === false, but from what I've read online that is how others have used .includes to see if something does 'not include'
You can do it with simple String.prototype.includes:
const arr = [
'<mark>1</mark>',
'2</mark>',
'3</mark></mark>',
'<mark>4</mark>',
]
const replaceMark = (arr) => {
return arr.map(e => {
if (e.includes('</mark>') && !e.includes('<mark>')) e = e.replace(/\<\/mark\>/g, '')
return e
})
}
console.log('original:', arr)
console.log('replaced:', replaceMark(arr))
This solution doesn't handle complex situations like <mark>text</mark></mark>, only the most basic ones.
There's nothing wrong with ===false.Its working properly.To check this one you just put a console.log inside if block.
What you are doing here is,you are not replacing array value with modified one.So replace this line
var removed_mark = final_formatted_log_split[i].replace("</mark>", "");
with
final_formatted_log_split[i] = final_formatted_log_split[i].replace("</mark>", "");
And you can use one if block instead of two if block.
var final_formatted_log_split = logtext.split("\n");;
for (i = 0, l = final_formatted_log_split.length; i < l; i++) {
if (!final_formatted_log_split[i].includes("<mark>") && final_formatted_log_split[i].includes("</mark>")) {
final_formatted_log_split[i] = final_formatted_log_split[i].replace("</mark>", "");
}
}
var final_formatted_log = final_formatted_log_split.join("\n");
console.log(final_formatted_log);

How to get an Array of all words used on a page

So I'm trying to get an array of all the words used in my web page.
Should be easy, right?
The problem I run into is that $("body").text().split(" ") returns an array where the words at the beginning of one element and end of another are joined as one.
i.e:
<div id="1">Hello
<div id="2">World</div>
</div>
returns ["HelloWorld"] when I want it to return ["Hello", "World"].
I also tried:
wordArr = [];
function getText(target)
{
if($(this).children())
{
$(this).children(function(){getText(this)});
}
else
{
var testArr = $(this).text().split(" ");
for(var i =0; i < testArr.length; i++)
wordArr.push(testArr[i]);
}
}
getText("body");
but $(node).children() is truthy for any node in the DOM that exists, so that didn't work.
I'm sure I'm missing something obvious, so I'd appreciate an extra set of eyes.
For what it's worth, I don't need unique words, just every word in the body of the document as an element in the array. I'm trying to use it to generate context and lexical co-occurrence with another set of words, so duplicates just up the contextual importance of a given word.
Thanks in advance for any ideas.
See Fiddle
How about something like this?
var res = $('body *').contents().map(function () {
if (this.nodeType == 3 && this.nodeValue.trim() != "")
return this.nodeValue.trim();
}).get().join(" ");
console.log(res);
Demo
Get the array of words:
var res = $('body *').contents().map(function () {
if (this.nodeType == 3 && this.nodeValue.trim() != "") //check for nodetype text and ignore empty text nodes
return this.nodeValue.trim().split(/\W+/); //split the nodevalue to get words.
}).get(); //get the array of words.
console.log(res);
Demo
function getText(target) {
var wordArr = [];
$('*',target).add(target).each(function(k,v) {
var words = $('*',v.cloneNode(true)).remove().end().text().split(/(\s+|\n)/);
wordArr = wordArr.concat(words.filter(function(n){return n.trim()}));
});
return wordArr;
}
FIDDLE
you can do this
function getwords(e){
e.contents().each(function(){
if ( $(this).children().length > 0 ) {
getwords($(this))
}
else if($.trim($(this).text())!=""){
words=words.concat($.trim($(this).text()).split(/\W+/))
}
});
}
http://jsfiddle.net/R55eM/
The question assumes that words are not internally separated by elements. If you simply create an array of words separated by white space and elements, you will end up with:
Fr<b>e</b>d
being read as
['Fr', 'e', 'd'];
Another thing to consider is punctuation. How do you deal with: "There were three of them: Mark, Sue and Tom. They were un-remarkable. One—the red head—was in the middle." Do you remove all punctuation? Or replace it with white space before trimming? How do you re-join words that are split by markup or characters that might be inter–word or intra–word punctuation? Note that while it is popular to write a dash between words with a space at either side, "correct" punctuation uses an m dash with no spaces.
Not so simple…
Anyhow, an approach that just splits on spaces and elements using recursion and works in any browser in use without any library support is:
function getWords(element) {
element = element || document.body;
var node, nodes = element.childNodes;
var words = [];
var text, i=0;
while (node = nodes[i++]) {
if (node.nodeType == 1) {
words = words.concat(getWords(node));
} else if (node.nodeType == 3) {
text = node.data.replace(/^\s+|\s+$/g,'').replace(/\s+/g,' ');
words = !text.length? words : words.concat(text.split(/\s/));
}
}
return words;
}
but it does not deal with the issues above.
Edit
To avoid script elements, change:
if (node.nodeType == 1) {
to
if (node.nodeType == 1 && node.tagName.toLowerCase() != 'script') {
Any element that should be avoided can be added to the condition. If a number of element types should be avoided, you can do:
var elementsToAvoid = {script:'script', button:'button'};
...
if (node.nodeType == 1 && node.tagName && !(node.tagName.toLowerCase() in elementsToAvoid)) {

Regular expression to get class name with specific substring

I need a regular expression in javascript that will get a string with a specific substring from a list of space delimited strings.
For example, I have;
widget util cookie i18n-username
I want to be able to return only i18n-username.
How
You could use the following function, using a regex to match for your string surrounded by either a space or the beginning or end of a line. But you'll have to be careful about preparing any regular expression special characters if you plan to use them, since the search argument will be interpreted as a string instead of a RegExp literal:
var hasClass = function(s, klass) {
var r = new RegExp("(?:^| )(" + klass + ")(?: |$)")
, m = (""+s).match(r);
return (m) ? m[1] : null;
};
hasClass("a b c", "a"); // => "a"
hasClass("a b c", "b"); // => "b"
hasClass("a b c", "x"); // => null
var klasses = "widget util cookie i18n-username";
hasClass(klasses, "username"); // => null
hasClass(klasses, "i18n-username"); // => "i18n-username"
hasClass(klasses, "i18n-\\w+"); // => "i18n-username"
As others have pointed out, you could also simply use a "split" and "indexOf":
var hasClass = function(s, klass) {
return (""+s).split(" ").indexOf(klass) >= 0;
};
However, note that the "indexOf" function was introduced to JavaScript somewhat recently, so for older browsers you might have to implement it yourself.
var hasClass = function(s, klass) {
var a=(""+s).split(" "), len=a.length, i;
for (i=0; i<len; i++) {
if (a[i] == klass) return true;
}
return false;
};
[Edit]
Note that the split/indexOf solution is likely faster for most browsers (though not all). This jsPerf benchmark shows which solution is faster for various browsers - notably, Chrome must have a really good regular expression engine!
function getString(subString, string){
return (string.match(new RegExp("\S*" + subString + "\S*")) || [null])[0];
}
To Use:
var str = "widget util cookie i18n-username";
getString("user", str); //returns i18n-username
Does this need to be a regex? Would knowing if the string existed be sufficient? Regular expressions are inefficient (slower) and should be avoided if possible:
var settings = 'widget util cookie i18n-username',
// using an array in case searching the string is insufficient
features = settings.split(' ');
if (features.indexOf('i18n-username') !== -1) {
// do something based on having this feature
}
If whitespace wouldn't cause an issue in searching for a value, you could just search the string directly:
var settings = 'widget util cookie i18n-username';
if (settings.indexOf('i18n-username') !== -1) {
// do something based on having this value
}
It then becomes easy to make this into a reusable function:
(function() {
var App = {},
features = 'widget util cookie i18n-username';
App.hasFeature = function(feature) {
return features.indexOf(feature) !== -1;
// or if you prefer the array:
return features.split(' ').indexOf(feature) !== -1;
};
window.App = App;
})();
// Here's how you can use it:
App.hasFeature('i18n-username'); // returns true
EDIT
You now say you need to return all strings that start with another string, and it is possible to do this with a regular expression as well, although I am unsure about how efficient it is:
(function() {
var App = {},
features = 'widget util cookie i18n-username'.split(' ');
// This contains an array of all features starting with 'i18n'
App.i18nFeatures = features.map(function(value) {
return value.indexOf('i18n') === 0;
});
window.App = App;
})();
/i18n-\w+/ ought to work. If your string has any cases like other substrings can start with i18n- or your user names have chars that don't fit the class [a-zA-Z0-9_], you'll need to specify that.
var str = "widget util cookie i18n-username";
alert(str.match(/i18n-\w+/));
Edit:
If you need to match more than one string, you can add on the global flag (/g) and loop through the matches.
var str = "widget i18n-util cookie i18n-username";
var matches = str.match(/i18n-\w+/g);
if (matches) {
for (var i = 0; i < matches.length; i++)
alert(matches[i]);
}
else
alert("phooey, no matches");

Replacing the nth instance of a regex match in Javascript

I'm trying to write a regex function that will identify and replace a single instance of a match within a string without affecting the other instances. For example, I have this string:
12||34||56
I want to replace the second set of pipes with ampersands to get this string:
12||34&&56
The regex function needs to be able to handle x amount of pipes and allow me to replace the nth set of pipes, so I could use the same function to make these replacements:
23||45||45||56||67 -> 23&&45||45||56||67
23||34||98||87 -> 23||34||98&&87
I know that I could just split/replace/concat the string at the pipes, and I also know that I can match on /\|\|/ and iterate through the resulting array, but I'm interested to know if it's possible to write a single expression that can do this. Note that this would be for Javascript, so it's possible to generate a regex at runtime using eval(), but it's not possible to use any Perl-specific regex instructions.
A more general-purpose function
I came across this question and, although the title is very general, the accepted answer handles only the question's specific use case.
I needed a more general-purpose solution, so I wrote one and thought I'd share it here.
Usage
This function requires that you pass it the following arguments:
original: the string you're searching in
pattern: either a string to search for, or a RegExp with a capture group. Without a capture group, it will throw an error. This is because the function calls split on the original string, and only if the supplied RegExp contains a capture group will the resulting array contain the matches.
n: the ordinal occurrence to find; eg, if you want the 2nd match, pass in 2
replace: Either a string to replace the match with, or a function which will take in the match and return a replacement string.
Examples
// Pipe examples like the OP's
replaceNthMatch("12||34||56", /(\|\|)/, 2, '&&') // "12||34&&56"
replaceNthMatch("23||45||45||56||67", /(\|\|)/, 1, '&&') // "23&&45||45||56||67"
// Replace groups of digits
replaceNthMatch("foo-1-bar-23-stuff-45", /(\d+)/, 3, 'NEW') // "foo-1-bar-23-stuff-NEW"
// Search value can be a string
replaceNthMatch("foo-stuff-foo-stuff-foo", "foo", 2, 'bar') // "foo-stuff-bar-stuff-foo"
// No change if there is no match for the search
replaceNthMatch("hello-world", "goodbye", 2, "adios") // "hello-world"
// No change if there is no Nth match for the search
replaceNthMatch("foo-1-bar-23-stuff-45", /(\d+)/, 6, 'NEW') // "foo-1-bar-23-stuff-45"
// Passing in a function to make the replacement
replaceNthMatch("foo-1-bar-23-stuff-45", /(\d+)/, 2, function(val){
//increment the given value
return parseInt(val, 10) + 1;
}); // "foo-1-bar-24-stuff-45"
The Code
var replaceNthMatch = function (original, pattern, n, replace) {
var parts, tempParts;
if (pattern.constructor === RegExp) {
// If there's no match, bail
if (original.search(pattern) === -1) {
return original;
}
// Every other item should be a matched capture group;
// between will be non-matching portions of the substring
parts = original.split(pattern);
// If there was a capture group, index 1 will be
// an item that matches the RegExp
if (parts[1].search(pattern) !== 0) {
throw {name: "ArgumentError", message: "RegExp must have a capture group"};
}
} else if (pattern.constructor === String) {
parts = original.split(pattern);
// Need every other item to be the matched string
tempParts = [];
for (var i=0; i < parts.length; i++) {
tempParts.push(parts[i]);
// Insert between, but don't tack one onto the end
if (i < parts.length - 1) {
tempParts.push(pattern);
}
}
parts = tempParts;
} else {
throw {name: "ArgumentError", message: "Must provide either a RegExp or String"};
}
// Parens are unnecessary, but explicit. :)
indexOfNthMatch = (n * 2) - 1;
if (parts[indexOfNthMatch] === undefined) {
// There IS no Nth match
return original;
}
if (typeof(replace) === "function") {
// Call it. After this, we don't need it anymore.
replace = replace(parts[indexOfNthMatch]);
}
// Update our parts array with the new value
parts[indexOfNthMatch] = replace;
// Put it back together and return
return parts.join('');
}
An Alternate Way To Define It
The least appealing part of this function is that it takes 4 arguments. It could be simplified to need only 3 arguments by adding it as a method to the String prototype, like this:
String.prototype.replaceNthMatch = function(pattern, n, replace) {
// Same code as above, replacing "original" with "this"
};
If you do that, you can call the method on any string, like this:
"foo-bar-foo".replaceNthMatch("foo", 2, "baz"); // "foo-bar-baz"
Passing Tests
The following are the Jasmine tests that this function passes.
describe("replaceNthMatch", function() {
describe("when there is no match", function() {
it("should return the unmodified original string", function() {
var str = replaceNthMatch("hello-there", /(\d+)/, 3, 'NEW');
expect(str).toEqual("hello-there");
});
});
describe("when there is no Nth match", function() {
it("should return the unmodified original string", function() {
var str = replaceNthMatch("blah45stuff68hey", /(\d+)/, 3, 'NEW');
expect(str).toEqual("blah45stuff68hey");
});
});
describe("when the search argument is a RegExp", function() {
describe("when it has a capture group", function () {
it("should replace correctly when the match is in the middle", function(){
var str = replaceNthMatch("this_937_thing_38_has_21_numbers", /(\d+)/, 2, 'NEW');
expect(str).toEqual("this_937_thing_NEW_has_21_numbers");
});
it("should replace correctly when the match is at the beginning", function(){
var str = replaceNthMatch("123_this_937_thing_38_has_21_numbers", /(\d+)/, 2, 'NEW');
expect(str).toEqual("123_this_NEW_thing_38_has_21_numbers");
});
});
describe("when it has no capture group", function() {
it("should throw an error", function(){
expect(function(){
replaceNthMatch("one_1_two_2", /\d+/, 2, 'NEW');
}).toThrow('RegExp must have a capture group');
});
});
});
describe("when the search argument is a string", function() {
it("should should match and replace correctly", function(){
var str = replaceNthMatch("blah45stuff68hey", 'stuff', 1, 'NEW');
expect(str).toEqual("blah45NEW68hey");
});
});
describe("when the replacement argument is a function", function() {
it("should call it on the Nth match and replace with the return value", function(){
// Look for the second number surrounded by brackets
var str = replaceNthMatch("foo[1][2]", /(\[\d+\])/, 2, function(val) {
// Get the number without the [ and ]
var number = val.slice(1,-1);
// Add 1
number = parseInt(number,10) + 1;
// Re-format and return
return '[' + number + ']';
});
expect(str).toEqual("foo[1][3]");
});
});
});
May not work in IE7
This code may fail in IE7 because that browser incorrectly splits strings using a regex, as discussed here. [shakes fist at IE7]. I believe that this is the solution; if you need to support IE7, good luck. :)
here's something that works:
"23||45||45||56||67".replace(/^((?:[0-9]+\|\|){n})([0-9]+)\|\|/,"$1$2&&")
where n is the one less than the nth pipe, (of course you don't need that first subexpression if n = 0)
And if you'd like a function to do this:
function pipe_replace(str,n) {
var RE = new RegExp("^((?:[0-9]+\\|\\|){" + (n-1) + "})([0-9]+)\|\|");
return str.replace(RE,"$1$2&&");
}
function pipe_replace(str,n) {
m = 0;
return str.replace(/\|\|/g, function (x) {
//was n++ should have been m++
m++;
if (n==m) {
return "&&";
} else {
return x;
}
});
}
Thanks Binda, I have modified the code for generic uses:
private replaceNthMatch(original, pattern, n, replace) {
let m = -1;
return original.replaceAll(pattern, x => {
m++;
if ( n == m ) {
return replace;
} else {
return x;
}
});
}

Categories