Javascript regex - split string - javascript

Struggling with a regex requirement. I need to split a string into an array wherever it finds a forward slash. But not if the forward slash is preceded by an escape.
Eg, if I have this string:
hello/world
I would like it to be split into an array like so:
arrayName[0] = hello
arrayName[1] = world
And if I have this string:
hello/wo\/rld
I would like it to be split into an array like so:
arrayName[0] = hello
arrayName[1] = wo/rld
Any ideas?

I wouldn't use split() for this job. It's much easier to match the path components themselves, rather than the delimiters. For example:
var subject = 'hello/wo\\/rld';
var regex = /(?:[^\/\\]+|\\.)+/g;
var matched = null;
while (matched = regex.exec(subject)) {
print(matched[0]);
}
output:
hello
wo\/rld
test it at ideone.com

The following is a little long-winded but will work, and avoids the problem with IE's broken split implementation by not using a regular expression.
function splitPath(str) {
var rawParts = str.split("/"), parts = [];
for (var i = 0, len = rawParts.length, part; i < len; ++i) {
part = "";
while (rawParts[i].slice(-1) == "\\") {
part += rawParts[i++].slice(0, -1) + "/";
}
parts.push(part + rawParts[i]);
}
return parts;
}
var str = "hello/world\\/foo/bar";
alert( splitPath(str).join(",") );

Here's a way adapted from the techniques in this blog post:
var str = "Testing/one\\/two\\/three";
var result = str.replace(/(\\)?\//g, function($0, $1){
return $1 ? '/' : '[****]';
}).split('[****]');
Live example
Given:
Testing/one\/two\/three
The result is:
[0]: Testing
[1]: one/two/three
That first uses the simple "fake" lookbehind to replace / with [****] and to replace \/ with /, then splits on the [****] value. (Obviously, replace [****] with anything that won't be in the string.)

/*
If you are getting your string from an ajax response or a data base query,
that is, the string has not been interpreted by javascript,
you can match character sequences that either have no slash or have escaped slashes.
If you are defining the string in a script, escape the escapes and strip them after the match.
*/
var s='hello/wor\\/ld';
s=s.match(/(([^\/]*(\\\/)+)([^\/]*)+|([^\/]+))/g) || [s];
alert(s.join('\n'))
s.join('\n').replace(/\\/g,'')
/* returned value: (String)
hello
wor/ld
*/

Here's an example at rubular.com

For short code, you can use reverse to simulate negative lookbehind
function reverse(s){
return s.split('').reverse().join('');
}
var parts = reverse(myString).split(/[/](?!\\(?:\\\\)*(?:[^\\]|$))/g).reverse();
for (var i = parts.length; --i >= 0;) { parts[i] = reverse(parts[i]); }
but to be efficient, it's probably better to split on /[/]/ and then walk the array and rejoin elements that have an escape at the end.

Something like this may take care of it for you.
var str = "/hello/wo\\/rld/";
var split = str.replace(/^\/|\\?\/|\/$/g, function(match) {
if (match.indexOf('\\') == -1) {
return '\x00';
}
return match;
}).split('\x00');
alert(split);

Related

Regex match quotes inside bracket regex

I'm working on a regex that must match only the text inside quotes but not in a comment, my macthes must only the strings in bold
<"love";>
>/*"love"*/<
<>'love'<>
"lo
more love
ve"
I'm stunck on this:
/(?:((\"|\')(.|\n)*?(\"|\')))(?=(?:\/\**\*\/))/gm
The first one (?:((\"|\')(.|\n)*?(\"|\'))) match all the strings
the second one (?=(?:\/\**\*\/)) doesn't match text inside quotes inside /* "mystring" */
bit my logic is cleary wrong
Any suggestion?
Thanks
Maybe you just need to use a negative lookahead to check for the comment end */?
But first, I'd split the string into separate lines
var arrayOfLines = input_str.split(/\r?\n/);
or, without empty lines:
var arrayOfLines = input_str.match(/[^\r\n]+/g);
and then use this regex:
["']([^'"]+)["'](?!.*\*\/)
Sample code:
var rebuilt_string = ''
var re = /["']([^'"]+)["'](?!.*\*\/)/g;
var subst = '<b>$1</b>';
for (i = 0; i < arrayOfLines.length; i++)
{
rebuilt_string = rebuilt_string + arrayOfLines[i].replace(re, subst) + "\r\n";
}
The way to avoid commented parts is to match them before. The global pattern looks like this:
/(capture parts to avoid)|target/
Then use a callback function for the replacement (when the capture group exists, return the match without change, otherwise, replace the match with what you want.
Example:
var result = text.replace(/(\/\*[^*]*(?:\*+(?!\/)[^*]*)*\*\/)|"[^"\\]*(?:\\[\s\S][^"\\]*)*"|'[^'\\]*(?:\\[\s\S][^'\\]*)*'/g,
function (m, g1) {
if (g1) return g1;
return '<b>' + m + '</b>';
});

Using regular expression to split a string

I have a string which I need to separate correctly:
self.view.frame.size.height = 44
I need to get only view, frame, size, and height. And I need to do it with a regular expression.
So far I've tried a lot of variants, none of them are even close to what I want to get. And my code now looks like this:
var testString = 'self.view.frame.size.height = 44'
var re = new RegExp('\\.(.*)\\.', "g")
var array = re.exec(testString);
console.log('Array length is ' + array.length)
for (var i = 0; i < array.length; i++) {
console.log('<' + array[i] + ">");
}
And it doesn't work at all:
Array length is 2
<.view.frame.size.>
<view.frame.size>
I'm new at Javascript, so maybe I want the impossible, let me know.
Thanks.
In Javascript, executing a regexp with the g modifier doesn't return all the matches at once. You have to execute it repeatedly on the same input string, and each one returns the next match.
You also need to change the regexp so it only returns one word at a time. .* is greedy, so it returns the longest possible match, so it was returning all the words between the first and last .. [^.]* will match a sequence of non-dot characters, so it will just return one word. You can't include the second . in the regexp, because that will interfere with the repetition -- each repetition starts searching after the end of the previous match, and there's no beginning . after the ending . of the word. Also, there's no . after height, so the last word won't match it.
EDIT: I've changed the regexp to use \w* instead of [^.]*, because it was grabbing the whole height = 44 string instead of just height.
var testString = 'self.view.frame.size.height = 44';
var re = /\.(\w*)/g;
var array = [];
var result;
while (result = re.exec(testString)) {
array.push(result[1]);
}
console.log('Array length is ' + array.length)
for (var i = 0; i < array.length; i++) {
console.log('<' + array[i] + ">");
}
If you're sure that your data will be always in the same format you can use this:
function parse (string) {
return string.split(" = ").shift().split(".").splice(1);
}
In your context, split is a MUCH better option:
var str = "self.view.frame.size.height = 44";
var bits1 = str.split(" ")[0];
var bits2 = bits1.split(".");
bits2.shift(); // get rid of the unwanted self
console.log(bits2);

javascript:get strings between specific strings

I hava a long string like this detail?ww=hello"....detail?ww=that".I want't to get all strings between detail?ww= and the next ",I use .match(/detail\?ww=.+\"/g) but the array i get contains detail?ww= and ",how can I only get strings without detail?ww= and "
If JavaScript understood lookbehind, you could use that to match strings preceded by detail?ww= and followed by ;. Unfotunately that is not the case, so a little more processing is required:
var str = 'detail?ww=hello"....detail?ww=that"';
var regexG = /detail\?ww\=(.+?)\"/g;
var regex = /detail\?ww\=(.+?)\"/;
var matches = str.match(regexG).map(function(item){ return item.match(regex)[1] });
console.log(matches);
Some changes to your regexp:
+? - non-greedy quantifier.
You could do this using a basic loop :
var result = [],
s = 'detail?ww=hello"....detail?ww=that"',
r = /detail\?ww=([^"]+)/g,
m;
while (m = r.exec(s)) {
result.push(m[1]);
}
result; // ["hello", "that"]
[^"]+ : any char except double quotes, one or more times
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec
Keep in mind that map is not supported by IE8 and below : http://kangax.github.io/es5-compat-table/#Array.prototype.map. If you really don't like loops but need a cross browser compatible solution, here is an alternative :
var s = 'detail?ww=hello"....detail?ww=that"';
s = s.replace(/.*?detail\?ww=([^"]+")/g, '$1').match(/[^"]+/g) || [];
s; // ["hello", "that"]

Splitting an array at only certain places but not others

I understand the .split() function quite well. But what I can seem to figure out is how to split in certain places but not in others. Sounds confusing? Well I mean for example, lets say I use .split(",") on the following string:
div:(li,div),div
Is it possible to split it so that only the commas ouside of the parentheses get split.
So the string above with the split method should return:
['div:(li,div)', 'div']
Of course at the moment it is also splitting the first comma inside of the parentheses, returning:
['div:(li', 'div)', 'div']
Is there some way to make this work like I desire?
If your expected strings are not going to become more complicated than this, you don't have to worry about writing code to parse them. Regex will work just fine.
http://jsfiddle.net/dC5HN/1/
var str = "div:(li,div),div:(li,div),div";
var parts = str.split(/,(?=(?:[^\)]|\([^\)]*\))*$)/g);
console.log(parts);
outputs:
["div:(li,div)", "div:(li,div)", "div"]
REGEX is not built for this sort of thing, which is essentially parsing.
When faced with this sort of situation previously I've first temporarily replaced the parenthesised parts with a placeholder, then split, then replaced the placeholders with the original parenthised parts.
A bit hacky, but it works:
var str = 'div:(li,div),div',
repls = [];
//first strip out parenthesised parts and store in array
str = str.replace(/\([^\)]*\)/g, function($0) {
repls.push($0);
return '*repl'+(repls.length - 1)+'*';
});
//with the parenthisised parts removed, split the string then iteratively
//reinstate the removed parenthisised parts
var pieces = str.split(',').map(function(val, index) {
return val.replace(/\*repl(\d+)\*/, function($0, $1) {
return repls[$1];
});
});
//test
console.log(pieces); //["div:(li,div)","div"]
This function will split whatever you specify in splitChar, but ignore that value if inside parenthesis:
function customSplit(stringToSplit, splitChar){
var arr = new Array();
var isParenOpen = 0
var curChar;
var curString = "";
for (var i = 0; i < stringToSplit.length; i++) {
curChar = stringToSplit.substr(i, 1);
switch(curChar) {
case "(":
isParenOpen++;
break;
case ")":
if(isParenOpen > 0) isParenOpen--;
break;
case splitChar:
if (isParenOpen < 1) {
arr.push(curString);
curString = "";
continue;
}
}
curString += curChar;
}
if (curString.length > 0) {
arr.push(curString);
}
return arr;
}

Javascript split function not correct worked with specific regex

I have a problem. I have a string - "\,str\,i,ing" and i need to split by comma before which not have slash. For my string - ["\,str\,i", "ing"]. I'm use next regex
myString.split("[^\],", 2)
but it's doesn't worked.
Well, this is ridiculous to avoid the lack of lookbehind but seems to get the correct result.
"\\,str\\,i,ing".split('').reverse().join('').split(/,(?=[^\\])/).map(function(a){
return a.split('').reverse().join('');
}).reverse();
//=> ["\,str\,i", "ing"]
Not sure about your expected output but you are specifying string not a regex, use:
var arr = "\,str\,i,ing".split(/[^\\],/, 2);
console.log(arr);
To split using regex, wrap your regex in /..../
This is not easily possible with js, because it does not support lookbehind. Even if you'd use a real regex, it would eat the last character:
> "xyz\\,xyz,xyz".split(/[^\\],/, 2)
["xyz\\,xy", "xyz"]
If you don't want the z to be eaten, I'd suggest:
var str = "....";
return str.split(",").reduce(function(res, part) {
var l = res.length;
if (l && res[l-1].substr(-1) == "\\" || l<2)
// ^ ^^ ^
// not the first was escaped limit
res[l-1] += ","+part;
else
res.push(part);
return;
}, []);
Reading between the lines, it looks like you want to split a string by , characters that are not preceded by \ characters.
It would be really great if JavaScript had a regular expression lookbehind (and negative lookbehind) pattern, but unfortunately it does not. What it does have is a lookahead ((?=) )and negative lookahead ((?!)) pattern. Make sure to review the documentation.
You can use these as a lookbehind if you reverse the string:
var str,
reverseStr,
arr,
reverseArr;
//don't forget to escape your backslashes
str = '\\,str\\,i,ing';
//reverse your string
reverseStr = str.split('').reverse().join('');
//split the array on `,`s that aren't followed by `\`
reverseArr = reverseStr.split(/,(?!\\)/);
//reverse the reversed array, and reverse each string in the array
arr = reverseArr.reverse().map(function (val) {
return val.split('').reverse().join('');
});
You picked a tough character to match- a forward slash preceding a comma is apt to disappear while you pass it around in a string, since '\,'==','...
var s= 'My dog, the one with two \\, blue \\,eyes, is asleep.';
var a= [], M, rx=/(\\?),/g;
while((M= rx.exec(s))!= null){
if(M[1]) continue;
a.push(s.substring(0, rx.lastIndex-1));
s= s.substring(rx.lastIndex);
rx.lastIndex= 0;
};
a.push(s);
/* returned value: (Array)
My dog
the one with two \, blue \,eyes
is asleep.
*/
Find something which will not be present in your original string, say "###". Replace "\\," with it. Split the resulting string by ",". Replace "###" back with "\\,".
Something like this:
<script type="text/javascript">
var s1 = "\\,str\\,i,ing";
var s2 = s1.replace(/\\,/g,"###");
console.log(s2);
var s3 = s2.split(",");
for (var i=0;i<s3.length;i++)
{
s3[i] = s3[i].replace(/###/g,"\\,");
}
console.log(s3);
</script>
See JSFiddle

Categories