Extract Variables From String Regex - javascript

This might be a repeat question but I'm not sure how to look for the answer :P
I'm trying to extract and remove variables from a string.
The string might look like this: !text (<123456789>=<#$111111111>) (<7654312> = <#$222222222>) (🛠 =<#$3333333333>) Some text that I will need!
I need the two items in each block?
e.g. [["123456789", 111111111],['7654312','222222222'],["🛠","3333333333"]]
Then I need the string exactly but with the variables removed?
e.g. Some more text that I will need!
I'm not sure of the best way to do this, any help is appreciated.

You don't always have to use regexes, for instance why not write a parser? This gives you much more flexibility. Note that I added <> around the 🛠 for simplicity, but you could make brackets optional in the parser.
The parser assumes anything that isin't within () is free text and captures it as string nodes.
For instance if you wanted only the last text node you could do...
const endingText = parse(text).filter(t => typeof t === 'string').pop();
const text = '!text (<123456789>=<#$111111111>) (<7654312> = <#$222222222>) (<🛠> =<#$3333333333>) Some text that I will need!';
console.log(parse(text));
function parse(input) {
let i = 0, char = input[i], text = [];
const output = [];
while (char) {
if (char === '(') {
if (text.length) output.push(text.join(''));
output.push(entry());
text = [];
} else {
text.push(char);
consume();
}
}
if (text.length) output.push(text.join(''));
return output;
function entry() {
match('(');
const key = value();
whitespace();
match('=');
whitespace();
const val = value();
match(')');
return [key, val];
}
function value() {
const val = [];
match('<');
while (char && char !== '>') val.push(char), consume();
match('>');
return val.join('');
}
function whitespace() {
while (/\s/.test(char)) consume();
}
function consume() {
return char = input[++i];
}
function match(expected) {
if (char !== expected) throw new Error(`Expected '${expected}' at column ${i}.`);
consume();
}
}

Related

How not to check a value twice?

I have a function that performs some operations on a given symbol. But first it checks if the code of that symbol is in a specific range (to prevent errors):
function transformSpecialChar(char) {
const shouldTransform = char.charCodeAt(0) > someNumber && char.charCodeAt(0) < someOtherNumber;
if(shouldTransform) {
// do something
return transformed;
}
return undefined;
}
I also have a function stringToArray that breaks a given string into characters. Then if some character is special, it calls a transformSpecialChar function and passes that char as an argument. If the character is not special then the function does something else.
function stringToArray(str) {
const result = Array.from(str);
result = result.map(el => {
const isSpecial = el.charCodeAt(0) > someNumber && el.charCodeAt(0) < someOtherNumber;
if(isSpecial) {
return transformSpecialChar(el)
}
// else do something else with a character
})
return result
}
The problem is that I check the character twice. First when I check what kind of operation should a stringToArray function perform on an other character of a given string. And then I check the character again inside transformSpecialChar function to make sure that the given char could be transformed as it has to.
I can't drop that check process from the transformSpecialChar function since this function is used not only in stringToArray and it should always check the given char to avoid unexpected errors.
And I also can't drop that check process form the stringToArray function because different types of characters should be processed differently.
I tried to drop the check process from the stringToArray function and see if transformSpecialChar function returns undefined.
function stringToArray(str) {
const result = Array.from(str);
result = result.map(el => {
const transformed = transformSpecialChar(el);
if (transformed) {
return transormed
}
// else do something else with a character
})
return result
}
But in this case the transform function is run for every character in a string and it seems super inefficient.
Make transformSpecialChar a factory function to use for either mapping an Array of characters, or as a stand alone function. Something like:
const str = `{Some string with speçíal charäcters}`;
console.log(`Original string: ${str}`);
// note: undefined will map to null and will not be
// included in `join`
console.log(`stringToArray(str).join(\`\`) => ${
stringToArray(str).join(``)}`);
console.log(`stringToArray(str, true).join(\`\`) => ${
stringToArray(str, true).join(``)}`);
// use the factory as stand alone function
const transformer = transformSpecialChar(false);
console.log(`transformer(\`4\`) => ${transformer(`4`)}`);
console.log(`transformer(\`A\`) => ${transformer(`A`)}`);
// This factory function returns a function
// to be used in Array.map or stand alone.
// It uses [returnValues] from closure
function transformSpecialChar(returnValues = false) {
return chr => {
const chrCode = chr.charCodeAt(0);
return chrCode > 200 || chrCode < 62 ?
`#${chrCode};` : (returnValues && chr || undefined);
};
}
function stringToArray(str, includeNotTransformed = false) {
return [...str].map(transformSpecialChar(includeNotTransformed));
}

A "translate" function in javascript like that in php?

Yes, I want a character "translate" function in javascript like that in php.
I made the following, but it is ghastly. Surely there must be a better way -- using regular expressions?
<html>
<head>
<script>
window.onload = function() {
"use strict";
console.log(translate("abcdefg", "bdf", "XYZ")); // gives aXcYeZg -=-=-
}
function translate(v1, xlatfrom, xlatto) {
var ch, ipos, retstr = "";
if (xlatfrom.length != xlatto.length) return ""; // lengths must be =
for (var i1=0; i1<v1.length; i1+=1) { // go through string
ch = v1.substring(i1, i1+1); // character by character
ipos = xlatfrom.indexOf(ch); // ck if in xlatfrom
if (ipos >= 0) ch = xlatto.substring(ipos, ipos+1); // if yes, replace
retstr += ch; } // build up return string
return retstr;
}
</script>
</head>
<body>
</body>
</html>
EDIT: I've accepted the #dani-sc answer. I'm not going to pursue performance. But it's so DIDACTIC! And thanks for the "spread operator" info. Here's how I might use his answer:
function translate(v1, xlatfrom, xlatto) { // like the PHP translate
var mapobj = strsToObject(xlatfrom, xlatto); // make str1.ch's:str2ch's object
return [...v1].map(ch => mapobj[ch] || ch).join(''); // ... is js "spread operator"
}
function strsToObject(str1, str2) { // make object from strings
if (str1.length != str2.length) return {}; // lengths must be =
var retobj = {};
for (var i1=0; i1<str1.length; i1+=1) { // just str[i1]: str2[i1]
retobj[str1.substring(i1, i1+1)] = str2.substring(i1, i1+1); }
return retobj;
}
or (this is GREAT! THANKS!)
function translate(v1, xlatfrom, xlatto) { // like the PHP translate
if (xlatfrom.length != xlatto.length) return ""; // lengths must be =
var mapobj = {}; // make object for mapping
for (var i1=0; i1<xlatfrom.length; i1+=1) { // just str[i1]: str2[i1]
mapobj[xlatfrom.substring(i1, i1+1)] = xlatto.substring(i1, i1+1); }
return [...v1].map(ch => mapobj[ch] || ch).join(''); // ... is js "spread operator"
}
Well, if you want, you could use regular expressions like this:
function translate(input, oldCharacters, newCharacters) {
let output = input;
const oldChArr = [...oldCharacters];
const newChArr = [...newCharacters];
for (let i = 0; i < oldChArr.length; i += 1) {
output = output.replace(new RegExp(oldChArr[i], 'g'), newChArr[i]);
}
return output;
}
function translateFixed(input, replacements) {
return input.replace(/./g, ch => replacements[ch] || ch);
}
function translateFixedNoRegEx(input, replacements) {
return [...input].map(ch => replacements[ch] || ch).join('');
}
console.log(translate("abcdefgbdb", "bdf", "XYZ"));
console.log(translate("abcdefg", "cde", "dec"));
console.log(translateFixed("abcdefg", {c: 'd', d: 'e', e: 'c'}));
console.log(translateFixedNoRegEx("abcdefg", {c: 'd', d: 'e', e: 'c'}));
If you would be okay with changing the method's signature, it could be made a bit more concise of course.
Edit: I've added two more methods which actually achieve what you're looking for. Just for reference, I left the original method translate in there as well.
translateFixed uses regular expressions to match every single character and replace it if it was specified in the replacements parameter.
translateFixedNoRegex just creates an array of characters out of the input string and iterates over them. If the character ch matches one in the replacements parameter, it's replaced, otherwise it's left unchanged. Afterwards, we'll convert it back to a string by concatenating the characters.
You asked about [...array]: It's the spread operator, introduced with ES6. When used on a string, it just takes every character and puts it as a single entry into an array. That means, these both lines are equivalent:
console.log([..."mystring"]);
console.log("mystring".split(''));
function translate(val, xlatfrom, xlatto) { //
if (xlatfrom.length !== xlatto.length) return "";
Array.from(xlatfrom).forEach((key, index) => {
val = val.replace(key, xlatto[index]);
})
return val;
}
console.log(translate("abcdefg", "bdf", "XYZ"));

Inefficient/ugly replacement function

I wrote a function that is supposed to replace code in between of two delimiters with the value, it returns (The string I'm applying this to is the .outerHTML of a HTML-Object).
This will be used similar to how it is used in e.g. Vue.js or Angular.
It looks like this:
static elemSyntaxParse(elem) {
let elem = _elem.outerHTML;
if (elem.includes("{{") || elem.includes("}}")) {
let out = "";
if ((elem.match(/{{/g) || []).length === (elem.match(/}}/g) || []).length) {
let occurs = elem.split("{{"),
key,
temp;
for (let i = 1; i < occurs.length; i++) {
if (occurs[i].includes("}}")) {
key = occurs[i].substring(0, occurs[i].indexOf("}}"));
temp = eval(key) + occurs[i].substring(occurs[i].indexOf("}}") + 2);
out += temp;
} else {
ModularCore.err("Insert-Delimiters \"{{\" and \"}}\" do not match.");
break;
return elem;
}
}
return occurs[0] + out;
} else {
ModularCore.err("Insert-Delimiters \"{{\" and \"}}\" do not match.");
return elem;
}
}
return elem;
}
(The function is inside of a class and refers to some external functions.)
Example use:
<body>
<p id="test">{{ Test }}</p>
<script>
let Test = 27;
document.getElementById("test").outerHTML = elemSyntaxParse(document.getElementById("test"));
</script>
</body>
Returns this string:
<p id="test">27</p>
It works but it is rather ugly and kinda slow.
How would I go about cleaning this up a bit? I am open to ES6.
PS: I now "eval() is evil" but this is the only occurrence in my code and it is (as far as i know) not replaceable in this situation.
Thanks!
I think you can omit a few checks and end up at:
const text = elem.outerHTML.split("{{");
let result = text.shift();
for(const part of text) {
const [key, rest, overflow] = part.split("}}");
if(!key || rest == undefined || overflow) {
ModularCore.err("Insert-Delimiters \"{{\" and \"}}\" do not match.");
return elem.outerHTML;
}
result += eval(key) + rest;
}
return result;
Invert the testing logic to get rid of nesting and else clauses
if (! elem.includes("{{") || !elem.includes("}}")) {
ModularCore.err("Insert-Delimiters \"{{\" and \"}}\" do not match.");
return elem;
}
// original loop code here
Don't double check - as #Bergi comment says.
test for return values indicating "not found, etc"
// if is removed. next line...
let occurs = elem.split("{{"), key, temp;
// if the separator is not in the string,
// it returns a one-element array with the original string in it.
if(occurs[0] === elem) return "no substring found";
The above should eliminate 2 nesting levels. You can then do a similar thing in that inner for loop.
Simplify compound logic.
!a || !b is equivalent to !(a && b). This is De Morgan's law

Userscript to replace a word in a webpage [duplicate]

This question already has an answer here:
How to replace lots of words in AJAX-driven page text, and in select attributes -- using a Tampermonkey script?
(1 answer)
Closed 4 years ago.
Disclaimer : I'm a dick in JS, I really don't know it at all, sorry if this is really a noob question.
I found out a userscript that allows you to replace words/phrases on a webpage.
Howevever, I have an error that states "expected a conditional expression and instead saw an assignment" on line 57
The line is :
for (i = 0; text = texts.snapshotItem(i); i += 1) {
The whole script :
(function () { 'use strict';
var words = {
'test 1':'test 2',
'bleh': 'blah'
};
var regexs = [],
replacements = [],
tagsWhitelist = ['PRE', 'BLOCKQUOTE', 'CODE', 'INPUT', 'BUTTON', 'TEXTAREA'],
rIsRegexp = /^\/(.+)\/([gim]+)?$/,
word, text, texts, i, userRegexp;
// prepareRegex by JoeSimmons
// used to take a string and ready it for use in new RegExp()
function prepareRegex (string) {
return string.replace (/([\[\]\^\&\$\.\(\)\?\/\\\+\{\}\|])/g, '\\$1');
}
// function to decide whether a parent tag will have its text replaced or not
function isTagOk (tag) {
return tagsWhitelist.indexOf (tag) === -1;
}
delete words['']; // so the user can add each entry ending with a comma,
// I put an extra empty key/value pair in the object.
// so we need to remove it before continuing
// convert the 'words' JSON object to an Array
for (word in words) {
if (typeof word === 'string' && words.hasOwnProperty (word) ) {
userRegexp = word.match (rIsRegexp);
// add the search/needle/query
if (userRegexp) {
regexs.push (
new RegExp (userRegexp[1], 'g')
);
}
else {
regexs.push (
new RegExp (prepareRegex (word)
.replace (/\\?\*/g, function (fullMatch) {
return fullMatch === '\\*' ? '*' : '[^ ]*';
} ),
'g'
)
);
}
// add the replacement
replacements.push(words[word]);
}
}
// do the replacement
texts = document.evaluate ('//body//text()[ normalize-space(.) != "" ]', document, null, 6, null);
for (i = 0; text = texts.snapshotItem (i); i += 1) {
if (isTagOk (text.parentNode.tagName) ) {
regexs.forEach (function (value, index) {
text.data = text.data.replace (value, replacements[index]);
} );
}
}
} () );
Can someone explain me what it means (I like to understand what I do), and how I can fix this ?
The = is an assignment operator.
Assignment is not valid in this context, because the for loop expects a condition.
You need to use a comparison operator such as ==, <= or >=
One possible fix is:
for (i = 0; text = texts.snapshotItem(i); i += 1) {
to
for (i = 0; text == texts.snapshotItem(i); i += 1) {

JavaScript Throws Undefined Error

What it is supposed to do -
Example
url1(pages,"ALT") returns "www.xyz.ac.uk"
url1(pages,"xyz") returns ""
The error - TypeError: Cannot call method 'toUpperCase' of undefined
This is just for some coursework, Im stuck with these errors. Any help would be much appreciated
function index(string,pattern,caseSensitive) {
if(caseSensitive == false) {
var v = string.toUpperCase();
} else {
var v = string;
}
return indexNumber = v.indexOf(pattern);
}
var pages = [ "|www.lboro.ac.uk|Loughborough University offers degree programmes and world class research.", "!www.xyz.ac.uk!An alternative University" , "%www%Yet another University"];
alert(url1(pages, "ALT"));
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
if(pages[i].indexOf(seperator)>0){
siteContent = pages[i].split(pages[i].indexOf(seperator));
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
}
}
if(pages[i].indexOf(seperator)>0){
siteContent = pages[i].split(pages[i].indexOf(seperator));
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
If pages[i].indexOf(seperator)<=0, siteContent is still whatever it was from the last iteration. If that happens on the first iteration, siteContent is still [], and siteContent[2] is undefined.
Another problem: the expression pages[i].indexOf(seperator) returns a number, and pages[i].split expects a delimiting string as an argument. Since the number doesn't appear in your input, you'll always get a single-element array, and siteContent[2] will always be undefined. Get rid of .indexOf(seperator), change it to siteContent = pages[i].split(seperator).
One more: get rid of the else { return ""; }. Add a return ""; after the for loop.
Finally, in the first if statement condition, change .indexOf(seperator) > 0 to .indexOf(seperator, 1) !== -1. Since you're getting seperator from the first character of the string, it will be found at 0. You want the second occurrence, so start the search at 1. In addition, .indexOf returns -1 if it doesn't find the substring. You'll need to account for this in both if conditions.
Side note, as this is not causing your problem: never use == false. JS will coerce stuff like 0 and "" to == false. If that's what you want, just use the ! operator, because the expression has nothing to do with the value false.
My final answer is http://jsfiddle.net/QF237/
Right here:
alert(url1(pages, ALT)); // ALT ISN'T DEFINED
I believe you forgot to quote it:
alert(url1(pages, "ALT"));
You should split the string passing the separator character itself. Your function then will look like:
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
console.log(seperator);
if(pages[i].indexOf(seperator)>=0){
siteContent = pages[i].split(seperator); //fixed here
}
console.log(siteContent);
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}else{
return "";
}
}
}
Tell us if it worked, please.
EDIT: It seeems your index() also has a little problem. Please try the function below.
function index(string,pattern,caseSensitive) {
var v;
if(caseSensitive == false) {
v = string.toUpperCase();
pattern = pattern.toUpperCase(); //to clarify: pattern should be uppercased also if caseSensitiveness is false
} else {
v = string;
}
return v.indexOf(pattern);
}
EDIT 2:
And url1() is finally like this:
function url1(pages,pattern) {
var siteContent = [];
for(i=0;i<pages.length;i++) {
var seperator = pages[i].charAt(0);
if(pages[i].indexOf(seperator)>=0){
siteContent = pages[i].split(seperator);
}
if( index(siteContent[2],pattern,false)>=0){
return siteContent[1];
}
}
return "";
}
In this case, the first occurrence of pattern in all pages will be returned.

Categories