Regex test is getting failed in nodejs? - javascript

I have two sentences which I need to compare and produce the result. The sentences are listed below.
var msg = "Hi this is LT ~ ! # # $ % ^ & * ( ) _ - + = { [ } ] | \\ : ; \" ' < , > . ? / End {#val#}"
var msg2 = "Hi this is LT ~ ! # # $ % ^ & * ( ) _ - + = { [ } ] | \\ : ; \" ' < , > . ? / End 123"
Both the sentences are equal except for the val portion and it can be ignored. That's what below code is trying to do.
//Trying to add escape character for special characters
msg = msg.replace(/[-[\]{}()*+?.,\\^$|#]/g, '\\$&');
msg2 = msg2.replace(/[-[\]{}()*+?.,\\^$|#]/g, '\\$&');
//Adding space only if two {#val#} exists, else updating \\s* (can be many spaces or without spaces)
msg = msg.replace(/(^|.)\s($|.)/g, (x, g1, g2) => (x == "} {" ? x : g1 + "\\s\*" + g2));
//Replacing val with 1,29 (characters can be up to 29 in place of val)
var separators =/{#val#}|((\\s\*))/gi;
msg= msg.replace(separators, (x, y) => y ? y : ".(\\S{1,29})");
let regex = RegExp("^" + msg+ "$");
//Comparing two sentences
console.log(regex.test(msg2);
It is getting failed. I don't have issues with val and pace, but if i add special characters in the sentences it gives me failure result only.

At the end of the script, this is the value of msg:
Hi\s*this\s*is\s*DLT\s*test\s*~ !\s*# \#\s*\$\s*% \^\s*& \*\s*\(\s*\)\s* _\s*\-\s*\+\s*= \{\s*\[\s*\}\s*\]\s*\|\s*\\\s*: ;\s*" '\s*< \,\s*> \.\s*\?\s*/ End\s*\{\#val\#\}
And this is the value of msg2:
Hi this is DLT test ~ ! # \# \$ % \^ & \* \( \) _ \- \+ = \{ \[ \} \] \| \\ : ; " ' < \, > \. \? / End 123
The first is not a valid regular expression for the second, because
\{\#val\#\} does not match 123 (use instead: var separators =/\\{\\#val\\#\\}|((\\s\*))/gi;);
msg2 characters should not be escaped.
See a working example below:
var msg = "Hi this is LT ~ ! # # $ % ^ & * ( ) _ - + = { [ } ] | \\ : ; \" ' < , > . ? / End {#val#}"
var msg2 = "Hi this is LT ~ ! # # $ % ^ & * ( ) _ - + = { [ } ] | \\ : ; \" ' < , > . ? / End 123"
//Trying to add escape character for special characters
msg = msg.replace(/[-[\]{}()*+?.,\\^$|#]/g, '\\$&');
//Adding space only if two {#val#} exists, else updating \\s* (can be many spaces or without spaces)
msg = msg.replace(/(^|.)\s($|.)/g, (x, g1, g2) => (x == "} {" ? x : g1 + "\\s\*" + g2));
//Replacing val with 1,29 (characters can be upto 29 in place of val)
var separators =/\\{\\#val\\#\\}|((\\s\*))/gi;
msg = msg.replace(separators, (x, y) => y ? y : ".(\\S{1,29})");
let regex = RegExp("^" + msg+ "$");
//Comparing two sentences
console.log(regex.test(msg2)); //logs 'true'

Related

Find query matches within a string

I have a task to calculate characters to highlight in the text based on a query.
Let's say the given text is "London, United Kingdom" and query is "lond". Then the result should be [[0, 4]].
I have a simple implementation which works fine for this case:
// ...
.reduce((result, word) => {
const wordLen = word.length;
const prefix = wordCharacterRegex.test(word[0]) ? "\\b" : "";
const regex = new RegExp(prefix + escapeRegexCharacters(word), "i");
const index = text.search(regex);
if (index > -1) {
result.push([index, index + wordLen]);
text =
text.slice(0, index) +
new Array(wordLen + 1).join(" ") +
text.slice(index + wordLen);
}
return result;
}, [])
// ...
but then if text is "EC2V 6DB, London, United Kingdom" and the query is "ec2v6db" it doesn't work because the regular expression will be /\bec2v6db/i.
So, how can I change my code and fix the problem?
First of all, the word boundary you add if the first char is a word char should probably be consistent with regards to non-word chars: if you add \b before word chars, add \B before non-word chars to get the same behavior.
const prefix = wordCharacterRegex.test(word[0]) ? "\\b" : "\\B";
Then, it is not clear what your escapeRegexCharacters method looks like but it is there where you may insert \s* between each char of the keyword:
function escapeRegexCharacters(s) {
var res = s.replace(/([-\/\\^$*+?.()|[\]{}])|[\s\S]/g, (m,g) => (g ? "\\" + g : m) + "\\s*");
return res.substring(0, res.length -3);
}
Here is a demo:
let word = "ec2v6db"; // lond is checked
let text = "EC2V 6DB, London, United Kingdom";
const wordCharacterRegex = /\w/;
function escapeRegexCharacters(s) {
var res = s.replace(/([-\/\\^$*+?.()|[\]{}])|[\s\S]/g, (m,g) => (g ? "\\" + g : m) + "\\s*");
return res.substring(0, res.length -3);
}
const prefix = wordCharacterRegex.test(word[0]) ? "\\b" : "\\B";
const regex = new RegExp(prefix + escapeRegexCharacters(word), "i");
// Replacing text with spaces
console.log(text.replace(regex, m => " ".repeat(m.length)));
// => " , London, United Kingdom"
// Adding tags around the match
console.log(text.replace(regex, "<highlight>$&</highlight>"));
// Getting the indices:
let match = regex.exec(text);
if (match) {
console.log([match.index, match.index+match[0].length]);
}

Regex to match string with contains closed brackets

I need to create regex rule to match string with doesnt' contain ( ) character and also strings that have them inside but always closed (but not nested. Another thing that empty () is also wrong
Good strings (should be matched):
aaaaaa
(asdasd)
aaaa(bbb)a
(aaa)aaaa
aaaaaa(aaaa)
aaaa(bbb)(ccc)ddd
aaaa(bbbb)cccc(dddd)eeee
Bad strings (there shouldn't be match):
)aaaa
)aaaa(asd)
aaaaaa(
aaaa(bbb))
aaa(bbb
aaaaa((bbbb)cccc
aaaa(bbbb))ccc
aaaa(aasd(adssad))ad
adassd(aas(add)adsa(asda)ad)
()
Tried and created something like this (?!.*[(]{2,})(?!.*[)]{2,})(?![)])(?!.*[(]$).*$ but still it isn't good. Any help with this?
You can use this regex for your job:
/^(?!$)(?:[^)(]*\([^()]+\))*[^)(]*$/gm
RegEx Demo
RegEx Breakup:
^ - Line start
(?!$) - Negative lookahead to make sure we don't match empty string
(?: - Start of a non-capturing group
[^)(]* - Match 0 or more of anything but ( and )
\( - Match a (
[^()]+ - Match 1 or more of anything but ( and )
\) - Match a literal )
)* - End of the non-capturing group, * makes it match 0 or more times
[^)(]*- Match 0 or more of anything but ( and )
$ - Line end
If you want to check for balanced parens, you can use a function like this:
function balanced(str) {
var a = 0;
for(var i = 0; i < str.length; i++) { // for each character in str
if(str.charAt(i) == '(') a++; // if it's an open paren, increment a
else if(str.charAt(i) == ')') a--; // if it's a close one, decrement a
}
return a == 0; // if a == 0 then it's balanced (true), if not then it's not balanced (false)
}
var s1 = "aaaa(bbbb)cccc(dddd)eeee";
var s2 = "aaaa(bbbb(cccc(dddd)eeee";
var s3 = "aaaa";
console.log(s1 + " => " + balanced(s1));
console.log(s2 + " => " + balanced(s2));
console.log(s3 + " => " + balanced(s3));
Or if you insist on using regexp, then use two regexp to check for balanced parens like this:
function balanced(str) {
var opened = str.match(/\(/g); // match open parens
var closed = str.match(/\)/g); // match close parens
opened = opened? opened.length: 0; // get the count of opened parens, if nothing is matched then 0
closed = closed? closed.length: 0; // get the count of closed parens, if nothing is matched then 0
return opened == closed; // balanced means the count of both is equal
}
var s1 = "aaaa(bbbb)cccc(dddd)eeee";
var s2 = "aaaa(bbbb(cccc(dddd)eeee";
var s3 = "aaaa";
console.log(s1 + " => " + balanced(s1));
console.log(s2 + " => " + balanced(s2));
console.log(s3 + " => " + balanced(s3));
This should do the trick:
^([^()]|\([^()]+\))+$
reads "match not a paren or ( no parens here ), once or more, whole string"
If you want to match balanced parens at any level, a single expression is not possible in js, due to lack of recursion support, but a function will be rather trivial.
let balanced = function(s) {
var re = /\([^()]*\)/g
while (s.match(re)) s = s.replace(re, '')
return !s.match(/[()]/)
}
console.log(balanced('a(b((d))e) (f) g'))
console.log(balanced('a(b((d))e? (f) g'))
or without regexes:
let balanced = s => {
let c = 0;
for (let x of s) {
if (x == '(') c++;
if (x == ')' && !c--) return false;
}
return !c;
};

Design a regular expression for a sentence and its subset

For the following two sentences,
var first_sentence = 'My cat is sleeping';
var second_sentence = 'My cat is sleeping with a blanket';
I have tried to use the following regexp to get both verb (sleeping) and the noun (a blanket).
var regex = /My cat is (.+?)\s+with.?(.+)?/gi.exec('My cat is sleeping with a blanket');
console.log(regex);
/*
[ 0 : 'My cat is sleeping with a blanket'
1 : 'sleeping'
2 : 'a blanket'
index : 0
input : 'My cat is sleeping with a blanket'
length : 3 ]
*/
This regular expression got it well but when I apply it to the first sentence, it returns null, any idea about that ?
var regex = /My cat is (.+?)\s+with.?(.+)?/gi.exec('My cat is sleeping');
console.log(regex);
// null
In the first sentence, there is no \s+with.?(.+)? part that requires some text to be present (1+ whitespaces and then with). You need to wrap the part of the pattern that is optional with (?:....)?:
/My cat is (\S+)(?:\s+with\s+(.*))?/gi
See the regex demo
Details:
My cat is - a literal text
(\S+) - Group 1 capturing 1+ non-whitespace symbols
(?:\s+with\s+(.*))? - an optional sequence of:
\s+with\s+ - with word enclosed with 1+ whitespaces on both sides
(.*) - Group 2 capturing any 0+ chars other than line break symbols
JS:
var ss = [ "My cat is sleeping", "My cat is sleeping with a blanket"];
var rx = /My cat is (\S+)(?:\s+with\s+(.*))?/i;
for (var s = 0; s < ss.length; s++) {
document.body.innerHTML += "Testing \"<i>" + ss[s] + "</i>\"... ";
if ((m = ss[s].match(rx))!==null) {
document.body.innerHTML += "Found: <b>" + m[1] + "</b>" + (m[2] ? " and <b>" + m[2] : "") + "</b><br/>";
} else {
document.body.innerHTML += "NOT Matched: <b>" + ss[s] + "</b><br/>";
}
}

Regular Expressions first Alphabetic rest alpanumeric

I'm trying to write a Regex
What I need is:
To start only with: A-z (Alphabetic)
Min. Length: 5
Max. Length: 10
The rest can be A-z0-9(Alphanumeric) but contain at least one number
What I have: ^[A-z][A-z0-9]{5,10}$
You can use
/^(?=.{5,10}$)[a-z][a-z]*\d[a-z\d]*$/i
See the regex demo
Details:
^ - start of string
(?=.{5,10}$) - the string should contain 5 to 10 any chars other than line break chars (this will be restricted by the consuming pattern later) up to the end of string
[a-z] - the first char must be an ASCII letter (i modifier makes the pattern case insensitive)
[a-z]* - 0+ ASCII letters
\d - 1 digit
[a-z\d]* - 0+ ASCII letters of digits
$ - end of string.
var ss = [ "ABCABCABC1","ABCA1BCAB","A1BCABCA","A1BCAB","A1BCA","A1BC","1BCABCABC1","ABCABC","ABCABCABCD"]; // Test strings
var rx = /^(?=.{5,10}$)[a-z][a-z]*\d[a-z\d]*$/i; // Build the regex dynamically
document.body.innerHTML += "Pattern: <b>" + rx.source
+ "</b><br/>"; // Display resulting pattern
for (var s = 0; s < ss.length; s++) { // Demo
document.body.innerHTML += "Testing \"<i>" + ss[s] + "</i>\"... ";
document.body.innerHTML += "Matched: <b>" + rx.test(ss[s]) + "</b><br/>";
}
var pattern = /^[a-z]{1}\w{4,9}$/i;
/* PATTERN
^ : Start of line
[a-z]{1} : One symbol between a and z
\w{4,9} : 4 to 9 symbols of any alphanumeric type
$ : End of line
/i : Case-insensitive
*/
var tests = [
"1abcdefghijklmn", //false
"abcdefghijklmndfvdfvfdv", //false
"1abcde", //false
"abcd1", //true
];
for (var i = 0; i < tests.length; i++) {
console.log(
tests[i],
pattern.test(tests[i])
)
}

For the regex gurus, split by -> and avoiding bracket/quotes contents

Given string:
funny -> A_gre$" [ "at -> looks -> great/*54[ [funny " -> [ " -> [great -> yolo] -> looks]][great] -> a2afg34423*/- -> yolo" -> [ "
Split to Array:
funny
A_gre$' [ 'at
looks
great/*54[ [funny ' -> [ ' -> [great -> yolo] -> looks]][great]
a2afg34423*/-
yolo' -> [ '
A Regex Solution??!?
Basically, if there are quotes around the bracket(s), avoid the brackets' function as open/close delimiters otherwise make sure the text between the open bracket and close bracket are voided. How would i achieve this using Regex?
My Parser Solution Test
var s = "funny -> A_gre$' [ 'at -> looks -> great/\*54[ [funny ' -> [ ' ->"
+ "[great -> yolo] -> looks]][great] -> a2afg34423*/- -> yolo' -> [ '",
p = 0,
z = [0],
q = 0,
x = s.split('');
//Looking for \" not \'
for(var i = 0; i< x.length; i++){
var b = x[i],
c = x[i + 1],
q = b == "'" ? ++q : q,
p = !(q % 2) ? b == '[' ? ++p : b == ']' ? --p : p : p;
if(b == '-' && c == '>' && !p && !(q % 2))
z.push(i + 2);
if(i == x.length - 1){
z.push(x.length); x = [];
for(var u = 0; u < z.length; u++)
z[u + 1] !== undefined ?
x.push(s.substring(z[u], z[u + 2] !== undefined ?
z[u + 1]-2 : z[u + 1]).trim()) : 0;
}
}
console.log(x)
Ouput:
->>> [
"funny",
"A_gre$' [ 'at",
"looks",
"great/*54[ [funny ' -> [ ' -> [great -> yolo] -> looks]][great]",
"a2afg34423*/-",
"yolo' -> [ '"
]
Try this pattern:
([^\s\[\"]*\[[^\]]+\])\S*|([^\s\[\"]*\"[^\"]+\")\S*|(\w\S*)
Use regexpal to see what it matches. It consists of three parts. The description for one of them is as follows:
([^\s\[\"]*\[[^\]]+\])\S*
expressions that reads non-space, non-quote, and non-bracket characters, until it reaches an open bracket, then reads the bracket contents until it reaches the closing bracket, then reads any non-space characters occurring after it. This is more detailed description of bracket matching part:
\[ : opening bracket character
[ : regex syntax for starting a set definition
^ : It's a negative set, i.e., set of characters which are NOT:
\] : closing bracket character
]+ : regex syntax for ending a set definition and the + operator for matching 1 or more occurrences
\] : closing bracket character
Another section handles the quotes, and another section matches words without bracket and quotes.
The following code shows how to see the matches, and how to extract them:
var input = 'funny -> A_gre$" [ "at -> looks -> great/54[ [funny " -> [ " -> [great -> yolo] -> looks]][great] -> a2afg34423/- -> yolo" -> [ "'
var regexp = /([^\s\[\"]*\[[^\]]+\])\S*|([^\s\[\"]*\"[^\"]+\")\S*|(\w\S*)/g;
var result = input.match(regexp)
console.log("Array of matches are:");
console.log(result);
var results = regexp.exec(input);
while(results != null) {
console.log("index: " + results.index + " found: " + results[0]);
results = regexp.exec(input);
}
This can be seen live here: http://jsfiddle.net/LXqch/1/

Categories