JavaScript regex \G + offset equivalent - javascript

Is there an equivalent of \G in JavaScript regular expressions? I need to match a pattern at an exact offset. Setting the g flag and .lastIndex searches forward from the given index, but won't match at the offset exactly.
XRegExp has this y modifier which might be what I'm looking for, but it doesn't appear to work in Node/V8.

I think the only thing you can do is set .lastIndex to your starting point, try a match with .exec(), and then if you get a match check the updated value of .lastIndex. If it's equal to your starting position plus the length of the match result, then the match began exactly where you wanted.
So:
var re = /banana/g;
re.lastIndex = 6;
var mr = re.exec("hello banana");
if (mr[0].length + 6 === re.lastIndex)
alert("good banana");
I would never claim that this is a "good" way of doing things, it's just the only possibility I know of.

Did some hackery:
var XRegExp = require('xregexp').XRegExp;
function makeOffsetPatt(patt, flags) {
if(_.isUndefined(flags)) flags = 'gnsm';
return XRegExp('(?:'+patt+')|(?<__NO_MATCH>)', flags);
}
XRegExp.prototype.match = function(str, offset) {
var result = XRegExp.exec(str, this, offset);
if(result === null || !_.isUndefined(result.__NO_MATCH)) return null;
delete result.__NO_MATCH;
return result;
};
var word = makeOffsetPatt('\\w+');
console.log(word.match('html { head {'));
console.log(word.match('html { head {',4));
console.log(word.match('html { head {',7));
Outputs:
[ 'html', index: 0, input: 'html { head {' ]
null
[ 'head', index: 7, input: 'html { head {' ]
If your pattern isn't found, it will try to match an empty string instead, which should always succeed. I then check for the existence of this empty match to determine if your pattern failed, clean up the match object and return it.

Related

Exec regex inside dowhile loop [duplicate]

I'm trying to parse the following kind of string:
[key:"val" key2:"val2"]
where there are arbitrary key:"val" pairs inside. I want to grab the key name and the value.
For those curious I'm trying to parse the database format of task warrior.
Here is my test string:
[description:"aoeu" uuid:"123sth"]
which is meant to highlight that anything can be in a key or value aside from space, no spaces around the colons, and values are always in double quotes.
In node, this is my output:
[deuteronomy][gatlin][~]$ node
> var re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g
> re.exec('[description:"aoeu" uuid:"123sth"]');
[ '[description:"aoeu" uuid:"123sth"]',
'uuid',
'123sth',
index: 0,
input: '[description:"aoeu" uuid:"123sth"]' ]
But description:"aoeu" also matches this pattern. How can I get all matches back?
Continue calling re.exec(s) in a loop to obtain all the matches:
var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';
var m;
do {
m = re.exec(s);
if (m) {
console.log(m[1], m[2]);
}
} while (m);
Try it with this JSFiddle: https://jsfiddle.net/7yS2V/
str.match(pattern), if pattern has the global flag g, will return all the matches as an array.
For example:
const str = 'All of us except #Emran, #Raju and #Noman were there';
console.log(
str.match(/#\w*/g)
);
// Will log ["#Emran", "#Raju", "#Noman"]
To loop through all matches, you can use the replace function:
var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';
s.replace(re, function(match, g1, g2) { console.log(g1, g2); });
This is a solution
var s = '[description:"aoeu" uuid:"123sth"]';
var re = /\s*([^[:]+):\"([^"]+)"/g;
var m;
while (m = re.exec(s)) {
console.log(m[1], m[2]);
}
This is based on lawnsea's answer, but shorter.
Notice that the `g' flag must be set to move the internal pointer forward across invocations.
str.match(/regex/g)
returns all matches as an array.
If, for some mysterious reason, you need the additional information comes with exec, as an alternative to previous answers, you could do it with a recursive function instead of a loop as follows (which also looks cooler :).
function findMatches(regex, str, matches = []) {
const res = regex.exec(str)
res && matches.push(res) && findMatches(regex, str, matches)
return matches
}
// Usage
const matches = findMatches(/regex/g, str)
as stated in the comments before, it's important to have g at the end of regex definition to move the pointer forward in each execution.
We are finally beginning to see a built-in matchAll function, see here for the description and compatibility table. It looks like as of May 2020, Chrome, Edge, Firefox, and Node.js (12+) are supported but not IE, Safari, and Opera. Seems like it was drafted in December 2018 so give it some time to reach all browsers, but I trust it will get there.
The built-in matchAll function is nice because it returns an iterable. It also returns capturing groups for every match! So you can do things like
// get the letters before and after "o"
let matches = "stackoverflow".matchAll(/(\w)o(\w)/g);
for (match of matches) {
console.log("letter before:" + match[1]);
console.log("letter after:" + match[2]);
}
arrayOfAllMatches = [...matches]; // you can also turn the iterable into an array
It also seem like every match object uses the same format as match(). So each object is an array of the match and capturing groups, along with three additional properties index, input, and groups. So it looks like:
[<match>, <group1>, <group2>, ..., index: <match offset>, input: <original string>, groups: <named capture groups>]
For more information about matchAll there is also a Google developers page. There are also polyfills/shims available.
If you have ES9
(Meaning if your system: Chrome, Node.js, Firefox, etc supports Ecmascript 2019 or later)
Use the new yourString.matchAll( /your-regex/g ).
If you don't have ES9
If you have an older system, here's a function for easy copy and pasting
function findAll(regexPattern, sourceString) {
let output = []
let match
// auto-add global flag while keeping others as-is
let regexPatternWithGlobal = RegExp(regexPattern,[...new Set("g"+regexPattern.flags)].join(""))
while (match = regexPatternWithGlobal.exec(sourceString)) {
// get rid of the string copy
delete match.input
// store the match data
output.push(match)
}
return output
}
example usage:
console.log( findAll(/blah/g,'blah1 blah2') )
outputs:
[ [ 'blah', index: 0 ], [ 'blah', index: 6 ] ]
Based on Agus's function, but I prefer return just the match values:
var bob = "> bob <";
function matchAll(str, regex) {
var res = [];
var m;
if (regex.global) {
while (m = regex.exec(str)) {
res.push(m[1]);
}
} else {
if (m = regex.exec(str)) {
res.push(m[1]);
}
}
return res;
}
var Amatch = matchAll(bob, /(&.*?;)/g);
console.log(Amatch); // yeilds: [>, <]
Iterables are nicer:
const matches = (text, pattern) => ({
[Symbol.iterator]: function * () {
const clone = new RegExp(pattern.source, pattern.flags);
let match = null;
do {
match = clone.exec(text);
if (match) {
yield match;
}
} while (match);
}
});
Usage in a loop:
for (const match of matches('abcdefabcdef', /ab/g)) {
console.log(match);
}
Or if you want an array:
[ ...matches('abcdefabcdef', /ab/g) ]
Here is my function to get the matches :
function getAllMatches(regex, text) {
if (regex.constructor !== RegExp) {
throw new Error('not RegExp');
}
var res = [];
var match = null;
if (regex.global) {
while (match = regex.exec(text)) {
res.push(match);
}
}
else {
if (match = regex.exec(text)) {
res.push(match);
}
}
return res;
}
// Example:
var regex = /abc|def|ghi/g;
var res = getAllMatches(regex, 'abcdefghi');
res.forEach(function (item) {
console.log(item[0]);
});
If you're able to use matchAll here's a trick:
Array.From has a 'selector' parameter so instead of ending up with an array of awkward 'match' results you can project it to what you really need:
Array.from(str.matchAll(regexp), m => m[0]);
If you have named groups eg. (/(?<firstname>[a-z][A-Z]+)/g) you could do this:
Array.from(str.matchAll(regexp), m => m.groups.firstName);
Since ES9, there's now a simpler, better way of getting all the matches, together with information about the capture groups, and their index:
const string = 'Mice like to dice rice';
const regex = /.ice/gu;
for(const match of string.matchAll(regex)) {
console.log(match);
}
// ["mice", index: 0, input: "mice like to dice rice", groups:
undefined]
// ["dice", index: 13, input: "mice like to dice rice",
groups: undefined]
// ["rice", index: 18, input: "mice like to dice
rice", groups: undefined]
It is currently supported in Chrome, Firefox, Opera. Depending on when you read this, check this link to see its current support.
Use this...
var all_matches = your_string.match(re);
console.log(all_matches)
It will return an array of all matches...That would work just fine....
But remember it won't take groups in account..It will just return the full matches...
I would definatly recommend using the String.match() function, and creating a relevant RegEx for it. My example is with a list of strings, which is often necessary when scanning user inputs for keywords and phrases.
// 1) Define keywords
var keywords = ['apple', 'orange', 'banana'];
// 2) Create regex, pass "i" for case-insensitive and "g" for global search
regex = new RegExp("(" + keywords.join('|') + ")", "ig");
=> /(apple|orange|banana)/gi
// 3) Match it against any string to get all matches
"Test string for ORANGE's or apples were mentioned".match(regex);
=> ["ORANGE", "apple"]
Hope this helps!
This isn't really going to help with your more complex issue but I'm posting this anyway because it is a simple solution for people that aren't doing a global search like you are.
I've simplified the regex in the answer to be clearer (this is not a solution to your exact problem).
var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);
// We only want the group matches in the array
function purify_regex(reResult){
// Removes the Regex specific values and clones the array to prevent mutation
let purifiedArray = [...reResult];
// Removes the full match value at position 0
purifiedArray.shift();
// Returns a pure array without mutating the original regex result
return purifiedArray;
}
// purifiedResult= ["description", "aoeu"]
That looks more verbose than it is because of the comments, this is what it looks like without comments
var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);
function purify_regex(reResult){
let purifiedArray = [...reResult];
purifiedArray.shift();
return purifiedArray;
}
Note that any groups that do not match will be listed in the array as undefined values.
This solution uses the ES6 spread operator to purify the array of regex specific values. You will need to run your code through Babel if you want IE11 support.
Here's a one line solution without a while loop.
The order is preserved in the resulting list.
The potential downsides are
It clones the regex for every match.
The result is in a different form than expected solutions. You'll need to process them one more time.
let re = /\s*([^[:]+):\"([^"]+)"/g
let str = '[description:"aoeu" uuid:"123sth"]'
(str.match(re) || []).map(e => RegExp(re.source, re.flags).exec(e))
[ [ 'description:"aoeu"',
'description',
'aoeu',
index: 0,
input: 'description:"aoeu"',
groups: undefined ],
[ ' uuid:"123sth"',
'uuid',
'123sth',
index: 0,
input: ' uuid:"123sth"',
groups: undefined ] ]
My guess is that if there would be edge cases such as extra or missing spaces, this expression with less boundaries might also be an option:
^\s*\[\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*\]\s*$
If you wish to explore/simplify/modify the expression, it's been
explained on the top right panel of
regex101.com. If you'd like, you
can also watch in this
link, how it would match
against some sample inputs.
Test
const regex = /^\s*\[\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*\]\s*$/gm;
const str = `[description:"aoeu" uuid:"123sth"]
[description : "aoeu" uuid: "123sth"]
[ description : "aoeu" uuid: "123sth" ]
[ description : "aoeu" uuid : "123sth" ]
[ description : "aoeu"uuid : "123sth" ] `;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
RegEx Circuit
jex.im visualizes regular expressions:
const re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g
const matches = [...re.exec('[description:"aoeu" uuid:"123sth"]').entries()]
console.log(matches)
Basically, this is ES6 way to convert Iterator returned by exec to a regular Array
Here is my answer:
var str = '[me nombre es] : My name is. [Yo puedo] is the right word';
var reg = /\[(.*?)\]/g;
var a = str.match(reg);
a = a.toString().replace(/[\[\]]/g, "").split(','));

RegExp doesn't work fine

I'm working on a template engine, I try to catch all strings inside <% %>, but when I work it on the <%object.property%> pattern, everything fails.
My code:
var render = function(input, data){
var re = /<%([^%>]+)?%>/g;
var templateVarArray;
// var step = "";
while((templateVarArray = re.exec(input))!=null){
var strArray = templateVarArray[1].split(".");
// step+= templateVarArray[1]+" ";
if(strArray.length==1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if(strArray.length==2){
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
// return step;
return input;
}
var input = "<%test.child%><%more%><%name%><%age%>";
document.write(render(input,{
test: { child: "abc"},
more: "MORE",
name:"ivan",
age: 22
}));
My result:
abc<%more%><%name%>22
what I want is: abc MORE ivan 22
Also, the RegExp /<%([^%>]+)?%>/g is referenced online, I did search its meaning, but still quite not sure the meaning. Especially why does it need "+" and "?", thanks a lot!
If you add a console.log() statement it will show where the next search is going to take place:
while((templateVarArray = re.exec(input))!=null){
console.log(re.lastIndex); // <-- insert this
var strArray = templateVarArray[1].split(".");
// step+= templateVarArray[1]+" ";
if(strArray.length==1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if(strArray.length==2){
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
You will see something like:
14
26
This means that the next time you run re.exec(...) it will start at index 14 and 26 respectively. Consequently, you miss some of the matches after you substitute data in.
As #Alexander points out take the 'g' off the end of the regex. Now you will see something like this:
0
0
This means the search will start each time from the beginning of the string, and you should now get what you were looking for:
abcMOREivan22
Regarding your questions on the RegEx and what it is doing, let's break the pieces apart:
<% - this matches the literal '<' followed immediately by '%'
([^%>]+) - the brackets (...) indicate we want to capture the portion of the string that matches the expression within the brackets
[^...] - indicates to match anything except what follows the '^'; without the '^' would match whatever pattern is within the []
[^%>] - indicates to match and exclude a single character - either a '%' or '>'
[^%>]+ - '+' indicates to match one or more; in other words match one or more series of characters that is not a '%' and not a '>'
? - this indicates we want to do reluctant matching (without it we do what is called 'greedy' matching)
%> - this matches the literal '%' followed immediately by '>'
The trickiest part to understand is the '?'. Used in this context it means that we stop matching with the shortest pattern that will still match the overall regex. In this case, it doesn't make any difference whether you include it though there are times where it will matter depending on the matching patterns.
Suggested Improvement
The current logic is limited to data that nests two levels deep. To make it so it can handle an arbitrary nesting you could do this:
First, add a small function to do the substitution:
var substitute = function (str, data) {
return str.split('.').reduce(function (res, item) {
return res[item];
}, data);
};
Then, change your while loop to look like this:
while ((templateVarArray = re.exec(input)) != null) {
input = input.replace(templateVarArray[0], substitute(templateVarArray[1], data));
}
Not only does it handle any number of levels, you might find other uses for the 'substitute()' function.
The RegExp.prototype.exec() documentation says:
If your regular expression uses the "g" flag, you can use the exec() method multiple times to find successive matches in the same string. When you do so, the search starts at the substring of str specified by the regular expression's lastIndex property (test() will also advance the lastIndex property).
But you are replacing each match in the original string so next re.exec with a lastIndex already set not to zero will continue to search not from beginning and will omit something.
So if you want to search and substitute found results in original string - just omit \g global key:
var render = function(input, data) {
var re = /<%([^%>]+)?%>/;
var templateVarArray;
// var step = "";
while (!!(templateVarArray = re.exec(input))) {
var strArray = templateVarArray[1].split(".");
if (strArray.length == 1)
input = input.replace(templateVarArray[0], data[templateVarArray[1]]);
if (strArray.length == 2) {
input = input.replace(templateVarArray[0], data[strArray[0]][strArray[1]]);
}
}
// return step;
return input;
}
var input = "<%test.child%><%more%><%name%><%age%>";
document.write(render(input, {
test: {
child: "abc"
},
more: "MORE",
name: "ivan",
age: 22
}));

Javascript - extract all element attributes by regex [duplicate]

I'm trying to parse the following kind of string:
[key:"val" key2:"val2"]
where there are arbitrary key:"val" pairs inside. I want to grab the key name and the value.
For those curious I'm trying to parse the database format of task warrior.
Here is my test string:
[description:"aoeu" uuid:"123sth"]
which is meant to highlight that anything can be in a key or value aside from space, no spaces around the colons, and values are always in double quotes.
In node, this is my output:
[deuteronomy][gatlin][~]$ node
> var re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g
> re.exec('[description:"aoeu" uuid:"123sth"]');
[ '[description:"aoeu" uuid:"123sth"]',
'uuid',
'123sth',
index: 0,
input: '[description:"aoeu" uuid:"123sth"]' ]
But description:"aoeu" also matches this pattern. How can I get all matches back?
Continue calling re.exec(s) in a loop to obtain all the matches:
var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';
var m;
do {
m = re.exec(s);
if (m) {
console.log(m[1], m[2]);
}
} while (m);
Try it with this JSFiddle: https://jsfiddle.net/7yS2V/
str.match(pattern), if pattern has the global flag g, will return all the matches as an array.
For example:
const str = 'All of us except #Emran, #Raju and #Noman were there';
console.log(
str.match(/#\w*/g)
);
// Will log ["#Emran", "#Raju", "#Noman"]
To loop through all matches, you can use the replace function:
var re = /\s*([^[:]+):\"([^"]+)"/g;
var s = '[description:"aoeu" uuid:"123sth"]';
s.replace(re, function(match, g1, g2) { console.log(g1, g2); });
This is a solution
var s = '[description:"aoeu" uuid:"123sth"]';
var re = /\s*([^[:]+):\"([^"]+)"/g;
var m;
while (m = re.exec(s)) {
console.log(m[1], m[2]);
}
This is based on lawnsea's answer, but shorter.
Notice that the `g' flag must be set to move the internal pointer forward across invocations.
str.match(/regex/g)
returns all matches as an array.
If, for some mysterious reason, you need the additional information comes with exec, as an alternative to previous answers, you could do it with a recursive function instead of a loop as follows (which also looks cooler :).
function findMatches(regex, str, matches = []) {
const res = regex.exec(str)
res && matches.push(res) && findMatches(regex, str, matches)
return matches
}
// Usage
const matches = findMatches(/regex/g, str)
as stated in the comments before, it's important to have g at the end of regex definition to move the pointer forward in each execution.
We are finally beginning to see a built-in matchAll function, see here for the description and compatibility table. It looks like as of May 2020, Chrome, Edge, Firefox, and Node.js (12+) are supported but not IE, Safari, and Opera. Seems like it was drafted in December 2018 so give it some time to reach all browsers, but I trust it will get there.
The built-in matchAll function is nice because it returns an iterable. It also returns capturing groups for every match! So you can do things like
// get the letters before and after "o"
let matches = "stackoverflow".matchAll(/(\w)o(\w)/g);
for (match of matches) {
console.log("letter before:" + match[1]);
console.log("letter after:" + match[2]);
}
arrayOfAllMatches = [...matches]; // you can also turn the iterable into an array
It also seem like every match object uses the same format as match(). So each object is an array of the match and capturing groups, along with three additional properties index, input, and groups. So it looks like:
[<match>, <group1>, <group2>, ..., index: <match offset>, input: <original string>, groups: <named capture groups>]
For more information about matchAll there is also a Google developers page. There are also polyfills/shims available.
If you have ES9
(Meaning if your system: Chrome, Node.js, Firefox, etc supports Ecmascript 2019 or later)
Use the new yourString.matchAll( /your-regex/g ).
If you don't have ES9
If you have an older system, here's a function for easy copy and pasting
function findAll(regexPattern, sourceString) {
let output = []
let match
// auto-add global flag while keeping others as-is
let regexPatternWithGlobal = RegExp(regexPattern,[...new Set("g"+regexPattern.flags)].join(""))
while (match = regexPatternWithGlobal.exec(sourceString)) {
// get rid of the string copy
delete match.input
// store the match data
output.push(match)
}
return output
}
example usage:
console.log( findAll(/blah/g,'blah1 blah2') )
outputs:
[ [ 'blah', index: 0 ], [ 'blah', index: 6 ] ]
Based on Agus's function, but I prefer return just the match values:
var bob = "> bob <";
function matchAll(str, regex) {
var res = [];
var m;
if (regex.global) {
while (m = regex.exec(str)) {
res.push(m[1]);
}
} else {
if (m = regex.exec(str)) {
res.push(m[1]);
}
}
return res;
}
var Amatch = matchAll(bob, /(&.*?;)/g);
console.log(Amatch); // yeilds: [>, <]
Iterables are nicer:
const matches = (text, pattern) => ({
[Symbol.iterator]: function * () {
const clone = new RegExp(pattern.source, pattern.flags);
let match = null;
do {
match = clone.exec(text);
if (match) {
yield match;
}
} while (match);
}
});
Usage in a loop:
for (const match of matches('abcdefabcdef', /ab/g)) {
console.log(match);
}
Or if you want an array:
[ ...matches('abcdefabcdef', /ab/g) ]
Here is my function to get the matches :
function getAllMatches(regex, text) {
if (regex.constructor !== RegExp) {
throw new Error('not RegExp');
}
var res = [];
var match = null;
if (regex.global) {
while (match = regex.exec(text)) {
res.push(match);
}
}
else {
if (match = regex.exec(text)) {
res.push(match);
}
}
return res;
}
// Example:
var regex = /abc|def|ghi/g;
var res = getAllMatches(regex, 'abcdefghi');
res.forEach(function (item) {
console.log(item[0]);
});
If you're able to use matchAll here's a trick:
Array.From has a 'selector' parameter so instead of ending up with an array of awkward 'match' results you can project it to what you really need:
Array.from(str.matchAll(regexp), m => m[0]);
If you have named groups eg. (/(?<firstname>[a-z][A-Z]+)/g) you could do this:
Array.from(str.matchAll(regexp), m => m.groups.firstName);
Since ES9, there's now a simpler, better way of getting all the matches, together with information about the capture groups, and their index:
const string = 'Mice like to dice rice';
const regex = /.ice/gu;
for(const match of string.matchAll(regex)) {
console.log(match);
}
// ["mice", index: 0, input: "mice like to dice rice", groups:
undefined]
// ["dice", index: 13, input: "mice like to dice rice",
groups: undefined]
// ["rice", index: 18, input: "mice like to dice
rice", groups: undefined]
It is currently supported in Chrome, Firefox, Opera. Depending on when you read this, check this link to see its current support.
Use this...
var all_matches = your_string.match(re);
console.log(all_matches)
It will return an array of all matches...That would work just fine....
But remember it won't take groups in account..It will just return the full matches...
I would definatly recommend using the String.match() function, and creating a relevant RegEx for it. My example is with a list of strings, which is often necessary when scanning user inputs for keywords and phrases.
// 1) Define keywords
var keywords = ['apple', 'orange', 'banana'];
// 2) Create regex, pass "i" for case-insensitive and "g" for global search
regex = new RegExp("(" + keywords.join('|') + ")", "ig");
=> /(apple|orange|banana)/gi
// 3) Match it against any string to get all matches
"Test string for ORANGE's or apples were mentioned".match(regex);
=> ["ORANGE", "apple"]
Hope this helps!
This isn't really going to help with your more complex issue but I'm posting this anyway because it is a simple solution for people that aren't doing a global search like you are.
I've simplified the regex in the answer to be clearer (this is not a solution to your exact problem).
var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);
// We only want the group matches in the array
function purify_regex(reResult){
// Removes the Regex specific values and clones the array to prevent mutation
let purifiedArray = [...reResult];
// Removes the full match value at position 0
purifiedArray.shift();
// Returns a pure array without mutating the original regex result
return purifiedArray;
}
// purifiedResult= ["description", "aoeu"]
That looks more verbose than it is because of the comments, this is what it looks like without comments
var re = /^(.+?):"(.+)"$/
var regExResult = re.exec('description:"aoeu"');
var purifiedResult = purify_regex(regExResult);
function purify_regex(reResult){
let purifiedArray = [...reResult];
purifiedArray.shift();
return purifiedArray;
}
Note that any groups that do not match will be listed in the array as undefined values.
This solution uses the ES6 spread operator to purify the array of regex specific values. You will need to run your code through Babel if you want IE11 support.
Here's a one line solution without a while loop.
The order is preserved in the resulting list.
The potential downsides are
It clones the regex for every match.
The result is in a different form than expected solutions. You'll need to process them one more time.
let re = /\s*([^[:]+):\"([^"]+)"/g
let str = '[description:"aoeu" uuid:"123sth"]'
(str.match(re) || []).map(e => RegExp(re.source, re.flags).exec(e))
[ [ 'description:"aoeu"',
'description',
'aoeu',
index: 0,
input: 'description:"aoeu"',
groups: undefined ],
[ ' uuid:"123sth"',
'uuid',
'123sth',
index: 0,
input: ' uuid:"123sth"',
groups: undefined ] ]
My guess is that if there would be edge cases such as extra or missing spaces, this expression with less boundaries might also be an option:
^\s*\[\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*\]\s*$
If you wish to explore/simplify/modify the expression, it's been
explained on the top right panel of
regex101.com. If you'd like, you
can also watch in this
link, how it would match
against some sample inputs.
Test
const regex = /^\s*\[\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*([^\s\r\n:]+)\s*:\s*"([^"]*)"\s*\]\s*$/gm;
const str = `[description:"aoeu" uuid:"123sth"]
[description : "aoeu" uuid: "123sth"]
[ description : "aoeu" uuid: "123sth" ]
[ description : "aoeu" uuid : "123sth" ]
[ description : "aoeu"uuid : "123sth" ] `;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
RegEx Circuit
jex.im visualizes regular expressions:
const re = /^\[(?:(.+?):"(.+?)"\s*)+\]$/g
const matches = [...re.exec('[description:"aoeu" uuid:"123sth"]').entries()]
console.log(matches)
Basically, this is ES6 way to convert Iterator returned by exec to a regular Array
Here is my answer:
var str = '[me nombre es] : My name is. [Yo puedo] is the right word';
var reg = /\[(.*?)\]/g;
var a = str.match(reg);
a = a.toString().replace(/[\[\]]/g, "").split(','));

how to parse variables using regex

Assume there is a string "aaaa/{helloworld}/dddd/{good}/ccc",
I want to get an array which contains the variables "helloworld" and "good" which are in braces {}.
Is there a simple way to implement this using regex?
Below function doesn't work:
function parseVar(str) {
var re = /\{.*\}/; // new RegExp('{[.*]}');// /{.*}/;
var m = str.match(re);
console.log(m)
if (m != null) {
console.log((m));
console.log(JSON.stringify(m));
}
}
parseVar("aaaa/{helloworld}/dddd/{good}/ccc");
The global flag (g) allow the regex to find more than one match. .* is greedy, meaning it will take up as many characters as possible but you don't want that so you have to use ? which makes it take up as little characters as possible. It is helpful to use regex101 to test regular expressions.
function parseVar(str) {
var re = /\{(.*?)\}/g;
var results = []
var match = re.exec(str);
while (match != null) {
// matched text: match[0]
// match start: match.index
// capturing group n: match[n]
results.push(match[1])
match = re.exec(str);
}
return results
}
var r = parseVar("aaaa/{helloworld}/dddd/{good}/ccc")
document.write(JSON.stringify(r))

Get string between 2 words, that contain this words inside him too

I have strings, and i want to find in them 2 words: 'start' and 'end'.
'start' and 'end' always come together (maybe i will have another characters between them, but if i have 'start', i will have 'end' too).
I try to do with regEx source that find the first 'start' and than his own 'end', and it will return the correct substring.
examples of strings: [i wrote in this examples index for every couple of 'start' and 'end' just for clarity (in the real strings i will not have this indexes)- the answer always between index (1)]
something start something_needed end something // print 'something_needed'
start(1) something start(2) something end(2) something end(1) start something end // print 'something start(2) something end(2) something'
start(1) something start(2) start(3) something end(3) something start(4) end(4) something end(2) something end(1) something start(5) something end(5) // print 'something start**(2) start(3) something end(3) something start(4) end(4) something end(2) something'
This is my solution in Javascript, but i prefer the answer in regEx only.
i find all the start, and after that all the end, and than- for every start: count++, for every end: count--. when count == 0, it the position of the correct end.
function getStartEnd(str) {
str = " "+str+" ";
var start = matchPosArr(str, /[\d\s\r\n,\(\)\[\]\{\}]+START+(?=[\d\s\r\n,\(\)\[\]\{\}])/gi);
var end = matchPosArr(str, /[\d\s\r\n,\(\)\[\]\{\}]+END+(?=[\d\s\r\n,\(\)\[\]\{\}])/gi);
var count = 0; // counter
var si = 0; // index of start array
var ei = 0; // index of end array
var isStart = false;
while (true) {
if (ei >= end.length) {
alert('error');
break;
}
else if (si >= start.length) {
ei++;
count--;
if (count == 0) {
ei--;
}
}
else if (start[si] > end[ei]) {
ei++;
count--;
}
else if (start[si] < end[ei]) {
si++;
count++;
}
if (count == 0 && isStart==true) {
break;
}
isStart = true;
}
return str.substring(start[0]+("start ".length),end[ei]);
}
function matchPosArr(str, regEx) {
var pos = [];
while ((match = regEx.exec(str)) != null) {
pos.push(match.index);
}
return pos;
}
alert( getSelectFrom(str) );
Here is a possible solution from Matching Nested Constructs in JavaScript, Part 2.
Example usage:
matchRecursiveRegExp("START text START text END text more END text", "START", "END");
// (c) 2007 Steven Levithan <stevenlevithan.com>
// MIT License
/*** matchRecursiveRegExp
Accepts a string to search, a left and right format delimiter
as regex patterns, and optional regex flags. Returns an array
of matches, allowing nested instances of left/right delimiters.
Use the "g" flag to return all matches, otherwise only the
first is returned. Be careful to ensure that the left and
right format delimiters produce mutually exclusive matches.
Backreferences are not supported within the right delimiter
due to how it is internally combined with the left delimiter.
When matching strings whose format delimiters are unbalanced
to the left or right, the output is intentionally as a
conventional regex library with recursion support would
produce, e.g. "<<x>" and "<x>>" both produce ["x"] when using
"<" and ">" as the delimiters (both strings contain a single,
balanced instance of "<x>").
examples:
matchRecursiveRegExp("test", "\\(", "\\)")
returns: []
matchRecursiveRegExp("<t<<e>><s>>t<>", "<", ">", "g")
returns: ["t<<e>><s>", ""]
matchRecursiveRegExp("<div id=\"x\">test</div>", "<div\\b[^>]*>", "</div>", "gi")
returns: ["test"]
*/
function matchRecursiveRegExp (str, left, right, flags) {
var f = flags || "",
g = f.indexOf("g") > -1,
x = new RegExp(left + "|" + right, "g" + f),
l = new RegExp(left, f.replace(/g/g, "")),
a = [],
t, s, m;
do {
t = 0;
while (m = x.exec(str)) {
if (l.test(m[0])) {
if (!t++) s = x.lastIndex;
} else if (t) {
if (!--t) {
a.push(str.slice(s, m.index));
if (!g) return a;
}
}
}
} while (t && (x.lastIndex = s));
return a;
}
document.write(matchRecursiveRegExp("something start something_needed end something", "start", "end") + "<br/>");
document.write(matchRecursiveRegExp("start something start something end something end start something end", "start", "end")+ "<br/>");
document.write(matchRecursiveRegExp("start something start start something end something start end something end something end something start something end", "start", "end")+ "<br/>");
what you are looking for is to find 'start' count the amount of times another 'start' is found, and then ignore an equal amount of 'end's. This is a thing that cannot be done with regex.
its impossible to compare the amount of times 2 strings match with pure regex.
instead, here's several semi-regex solution for this problem:
var string = "start(1) something start(2) start(3) something end(3) something start(4) end(4) something end(2) something end(1) something start(5) something end(5)";
var stop;
do {
stop = true;
string = string.replace(/start((?:[^s]|s(?!tart))*?)end/, function($0, $1) {
stop = false;
var result = $1;
//do stuff with result here..
console.log(result);
return ""; //replaces the match with empty so it can continue processing
});
} while (!stop);
whats good about this method is that is simple, and you can have an infinite number of nested statements.
I'm having a hard time understanding what you exactly want, but if I understand correctly: you cannot do this with pure regex in javascript because lookbehind (positive (?<=...) and negative (?<!...)) is not supported, and thus you would not be able to match the 'start(n)' before the match result.
but instead you can use subgroups (subgroups aren't fully supported in javascript so you'll need to use replace):
var string = "something start(1) something_needed end(1) something";
var regex = /start\((\d+)\)(.*)end\(\1\)/;
string.replace(regex, function($0, $1, $2) {
var result = $2;
console.log($2)
//do stuff with $2 here
});
$0 is the original match (start\((\d+)\)(.*)end\(\1\))
$1 and $2 are the groups that are outputted by the regex.
$1 refers to (\d+). It's already used to 'store' the number behind start (1 in this case). But here's where the magic happens: it gets loaded again and matched against with \1 inside the regex.
$2 is where the info you need is stored. it refers to (.*)

Categories