Removing duplicates in a comma-separated list with a regex? - javascript

I'm trying to figure out how to filter out duplicates in a string with a regular expression, where the string is comma separated. I'd like to do this in javascript, but I'm getting caught up with how to use the back-references.
For example:
1,1,1,2,2,3,3,3,3,4,4,4,5
Becomes:
1,2,3,4,5
Or:
a,b,b,said,said, t, u, ugly, ugly
Becomes
a,b,said,t,u,ugly

Why use regex when you can do it in javascript code? Here is sample code (messy though):
var input = 'a,b,b,said,said, t, u, ugly, ugly';
var splitted = input.split(',');
var collector = {};
for (i = 0; i < splitted.length; i++) {
key = splitted[i].replace(/^\s*/, "").replace(/\s*$/, "");
collector[key] = true;
}
var out = [];
for (var key in collector) {
out.push(key);
}
var output = out.join(','); // output will be 'a,b,said,t,u,ugly'
p/s: that one regex in the for-loop is to trim the tokens, not to make them unique

If you insist on RegExp, here's an example in Javascript:
"1,1,1,2,2,3,3,3,3,4,4,4,5".replace (
/(^|,)([^,]+)(?:,\2)+(,|$)/ig,
function ($0, $1, $2, $3)
{
return $1 + $2 + $3;
}
);
To handle trimming of whitespace, modify slightly:
"1,1,1,2,2,3,3,3,3,4,4,4,5".replace (
/(^|,)\s*([^,]+)\s*(?:,\s*\2)+\s*(,|$)\s*/ig,
function ($0, $1, $2, $3)
{
return $1 + $2 + $3;
}
);
That said, it seems better to tokenise via split and handle duplicates.

Here's a example:
s/,([^,]+),\1/,$1/g;
Perl regex substitution, but should be convertible to JS-style by anyone who knows the syntax.

I don't use Regular Expressions for that.
Here's the function I use. It accepts a string containing comma separated values and returns an array of unique values regardless of position in the original string.
Note: If you pass CSV string containing quoted values, Split will not treat commas inside quoted values any differently. So if you want to handle real CSV, you are best to use a 3rd party CSV parser.
function GetUniqueItems(s)
{
var items=s.split(",");
var uniqueItems={};
for (var i=0;i<items.length;i++)
{
var key=items[i];
var val=items[i];
uniqueItems[key]=val;
}
var result=[];
for(key in uniqueItems)
{
// Assign to output result field using hasOwnProperty so we only get
// relevant items
if(uniqueItems.hasOwnProperty(key))
{
result[result.length]=uniqueItems[key];
}
}
return result;
}

With javascript regex
x="1,1,1,2,2,3,3,3,3,4,4,4,5"
while(/(\d),\1/.test(x))
x=x.replace(/(\d),\1/g,"$1")
1,2,3,4,5
x="a,b,b,said,said, t, u, ugly, ugly"
while(/\s*([^,]+),\s*\1(?=,|$)/.test(x))
x=x.replace(/\s*([^,]+),\s*\1(?=,|$)/g,"$1")
a,b,said, t, u,ugly
Not well tested, let me know if there is any issue.

Related

Dynamically compare accuracy of regex patterns for match

Supposing I have two regexes and both match a string, but one of them matches it in a stricter way, is there a way to figure that out programmatically?
For example, I'm matching this string:
/path/on/file/system
and I have the following two regular expressions:
const opt1 = /\/path\/on/;
const opt2 = /\/path/;
I can see with my eyes that opt1 is stricter, but how can javascript know about that?
Is converting the regex to a string and checking for character length a good measure of strictness?
You can implement this function by:
Sorting your regular expressions by length.
loop through your sorted regular expressions array to check if there is a match
then return the most strict matching element.
function check(arrayOfRegEx, str) {
//sort the regex array by length
var sortedArr = arrayOfRegEx.sort(function(a, b) {
return a.toString().length - b.toString().length || a.toString().localeCompare(b);
});
let mostStrict = '';
sortedArr.forEach(function(reg) {
if(new RegExp((reg.toString()).replace(/\\/g, "").substring(1)).test(str)) {
mostStrict = reg;
}
});
return mostStrict;
}
var result = check([/\/path/, /\/test\/test/, /\/path\/on/], '/path/on/file/system');
console.log(result); // returns /\/path\/on/
And of course you can tweak this function to fit your needs

RegExp to strip a variable in ES6-like format

I'm using an ES6-like variable formatting with the syntax of ${varName}, and while processing them I'm trying to enumerate all unique variables specified, using the following code:
function enumVars(txt) {
var v, names = [];
var reg = /\$\{\s*[a-zA-Z\$_][a-zA-Z0-9\$_]*\s*}/g;
while (v = reg.exec(txt)) {
var svn = v[0].replace(/???/, ''); // stripped variable name;
if (names.indexOf(svn) === -1) {
names.push(svn);
}
}
return names;
}
I haven't been able to figure out the correct RegExp for stripping the variable name from the exec result.
When I use v[0], it gives me ${varName} string, and I need to strip it into just varName, removing leading ${, trailing }, and all white spaces that may reside inside { } around the variable.
The variable is expected to follow the javascript naming convention, which means:
a valid variable starts with a letter, underscore or '$' symbol, followed by any combination of letters, digits, underscores or '$';
leading and trailing spaces around the variable are to be ignored.
In all, we may have a variable returned from exec as ${ _$abc12_$ }, and I need a RegExp for calling replace that would return just _$abc12_$.
Thanks everyone for helping!
Your replace regexp could be
/^\$\{\s*|\s*}$/g
In English, this says "remove both ${... at the beginning, or ...} at the end.
It could be slightly easier to just grab all the strings, and transform them all at once, then filter out duplicates:
function enumVars(txt) {
return txt
// Find all matches of form ${var}
. match(/\$\{\s*[a-z$_][a-z0-9$_]*\s*}/gi)
// Strip off ${ and }, yielding just variable name
. map(function(v) { return v.replace( /^\$\{\s*|\s*}$/g, ''); })
// Filter out duplicates
. filter(function(v, i, a) { return a.indexOf(v) === i; });
}

JavaScript reverse the order of letters for each word in a string

I am trying to get around the following but no success:
var string = 'erehT era a tsav rebmun fo secruoser rof gninrael erom tpircsavaJ';
var x = string.split(' ');
for (i = 0; i <= x.length; i++) {
var element = x[i];
}
element now represents each word inside the array. I now need to reverse not the order of the words but the order of each letter for each word.
var string = "erehT era a tsav rebmun fo secruoser rof gninrael erom tpircsavaJ";
// you can split, reverse, join " " first and then "" too
string.split("").reverse().join("").split(" ").reverse().join(" ")
Output: "There are a vast number of resources for learning more Javascript"
You can do it like this using Array.prototype.map and Array.prototype.reverse.
var result = string.split(' ').map(function (item) {
return item.split('').reverse().join('');
}).join(' ');
what's the map function doing there?
It traverses the array created by splitting the initial string and calls the function (item) we provided as argument for each elements. It then takes the return value of that function and push it in a new array. Finally it returns that new array, which in our example, contains the reversed words in order.
You can do the following:
let stringToReverse = "tpircsavaJ";
stringToReverse.split("").reverse().join("").split(" ").reverse().join(" ")
//let keyword allows you declare variables in the new ECMAScript(JavaScript)
You can do the following.
var string = "erehT era a tsav rebmun fo secruoser rof gninrael erom tpircsavaJ";
arrayX=string.split(" ");
arrayX.sort().reverse();
var arrayXX='';
arrayX.forEach(function(item){
items=item.split('').sort().reverse();
arrayXX=arrayXX+items.join('');
});
document.getElementById('demo').innerHTML=arrayXX;
JavaScript split with regular expression:
Note: ([\s,.]) The capturing group matches whitespace, commas, and periods.
const string = "oT eb ro ton ot eb, taht si a noitseuq.";
function reverseHelper(str) {
return str.split(/([\s,.])/).
map((item) => {
return item.split``.reverse().join``;
}).join``;
}
console.log(reverseHelper(string));

Splitting an array at only certain places but not others

I understand the .split() function quite well. But what I can seem to figure out is how to split in certain places but not in others. Sounds confusing? Well I mean for example, lets say I use .split(",") on the following string:
div:(li,div),div
Is it possible to split it so that only the commas ouside of the parentheses get split.
So the string above with the split method should return:
['div:(li,div)', 'div']
Of course at the moment it is also splitting the first comma inside of the parentheses, returning:
['div:(li', 'div)', 'div']
Is there some way to make this work like I desire?
If your expected strings are not going to become more complicated than this, you don't have to worry about writing code to parse them. Regex will work just fine.
http://jsfiddle.net/dC5HN/1/
var str = "div:(li,div),div:(li,div),div";
var parts = str.split(/,(?=(?:[^\)]|\([^\)]*\))*$)/g);
console.log(parts);
outputs:
["div:(li,div)", "div:(li,div)", "div"]
REGEX is not built for this sort of thing, which is essentially parsing.
When faced with this sort of situation previously I've first temporarily replaced the parenthesised parts with a placeholder, then split, then replaced the placeholders with the original parenthised parts.
A bit hacky, but it works:
var str = 'div:(li,div),div',
repls = [];
//first strip out parenthesised parts and store in array
str = str.replace(/\([^\)]*\)/g, function($0) {
repls.push($0);
return '*repl'+(repls.length - 1)+'*';
});
//with the parenthisised parts removed, split the string then iteratively
//reinstate the removed parenthisised parts
var pieces = str.split(',').map(function(val, index) {
return val.replace(/\*repl(\d+)\*/, function($0, $1) {
return repls[$1];
});
});
//test
console.log(pieces); //["div:(li,div)","div"]
This function will split whatever you specify in splitChar, but ignore that value if inside parenthesis:
function customSplit(stringToSplit, splitChar){
var arr = new Array();
var isParenOpen = 0
var curChar;
var curString = "";
for (var i = 0; i < stringToSplit.length; i++) {
curChar = stringToSplit.substr(i, 1);
switch(curChar) {
case "(":
isParenOpen++;
break;
case ")":
if(isParenOpen > 0) isParenOpen--;
break;
case splitChar:
if (isParenOpen < 1) {
arr.push(curString);
curString = "";
continue;
}
}
curString += curChar;
}
if (curString.length > 0) {
arr.push(curString);
}
return arr;
}

Javascript split function not correct worked with specific regex

I have a problem. I have a string - "\,str\,i,ing" and i need to split by comma before which not have slash. For my string - ["\,str\,i", "ing"]. I'm use next regex
myString.split("[^\],", 2)
but it's doesn't worked.
Well, this is ridiculous to avoid the lack of lookbehind but seems to get the correct result.
"\\,str\\,i,ing".split('').reverse().join('').split(/,(?=[^\\])/).map(function(a){
return a.split('').reverse().join('');
}).reverse();
//=> ["\,str\,i", "ing"]
Not sure about your expected output but you are specifying string not a regex, use:
var arr = "\,str\,i,ing".split(/[^\\],/, 2);
console.log(arr);
To split using regex, wrap your regex in /..../
This is not easily possible with js, because it does not support lookbehind. Even if you'd use a real regex, it would eat the last character:
> "xyz\\,xyz,xyz".split(/[^\\],/, 2)
["xyz\\,xy", "xyz"]
If you don't want the z to be eaten, I'd suggest:
var str = "....";
return str.split(",").reduce(function(res, part) {
var l = res.length;
if (l && res[l-1].substr(-1) == "\\" || l<2)
// ^ ^^ ^
// not the first was escaped limit
res[l-1] += ","+part;
else
res.push(part);
return;
}, []);
Reading between the lines, it looks like you want to split a string by , characters that are not preceded by \ characters.
It would be really great if JavaScript had a regular expression lookbehind (and negative lookbehind) pattern, but unfortunately it does not. What it does have is a lookahead ((?=) )and negative lookahead ((?!)) pattern. Make sure to review the documentation.
You can use these as a lookbehind if you reverse the string:
var str,
reverseStr,
arr,
reverseArr;
//don't forget to escape your backslashes
str = '\\,str\\,i,ing';
//reverse your string
reverseStr = str.split('').reverse().join('');
//split the array on `,`s that aren't followed by `\`
reverseArr = reverseStr.split(/,(?!\\)/);
//reverse the reversed array, and reverse each string in the array
arr = reverseArr.reverse().map(function (val) {
return val.split('').reverse().join('');
});
You picked a tough character to match- a forward slash preceding a comma is apt to disappear while you pass it around in a string, since '\,'==','...
var s= 'My dog, the one with two \\, blue \\,eyes, is asleep.';
var a= [], M, rx=/(\\?),/g;
while((M= rx.exec(s))!= null){
if(M[1]) continue;
a.push(s.substring(0, rx.lastIndex-1));
s= s.substring(rx.lastIndex);
rx.lastIndex= 0;
};
a.push(s);
/* returned value: (Array)
My dog
the one with two \, blue \,eyes
is asleep.
*/
Find something which will not be present in your original string, say "###". Replace "\\," with it. Split the resulting string by ",". Replace "###" back with "\\,".
Something like this:
<script type="text/javascript">
var s1 = "\\,str\\,i,ing";
var s2 = s1.replace(/\\,/g,"###");
console.log(s2);
var s3 = s2.split(",");
for (var i=0;i<s3.length;i++)
{
s3[i] = s3[i].replace(/###/g,"\\,");
}
console.log(s3);
</script>
See JSFiddle

Categories