I am working on a JavaScript code that will decompose a RegExp into its base components and give a small explanation as to what it does.
My general idea is to split the input string (as a RegExp) into entries from another array.
My current code:
function interpret(regex){
var r = regex + "";
r = r.split("/");
body = r[1];
flags = r[2];
var classes = [".","\w","\d","\s","\W","\D","\S","[","]"];
var classdefs = ["any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","open matchset","close matchset"];
var quantifiers = ["*","+","?",
/{(\d+)}/g, // a{n}
/{(\d+),}/g, // a{n,}
/{(\d+),(\d+)}/g, // a{n,m}
/[+*?]\?/g // a<quant>? - lazy quantification
];
var quantDefs = ["repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly $1 time","repeated $1 or more times","repeated between $1 and $2 times"];
var escaped = ["\t","\n","\r","\.","\*","\\","\^","\?","\|"];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a backslash","a carot","a question mark","a vertical bar"];
// code to split r based on entries in classes, quantifiers, and escaped.
}
Ideally, this function (lets call it splitR) will return outputs something like this:
> splitR("hello",["he","l"]);
["he", "l", "l", "o"]
> splitR("hello",["he"]);
["he", "llo"]
> splitR("hello",["he","o"]);
["he", "ll", "o"];
> splitR("5 is the square root of 25",[/\d+/g,/\w{3,}/g,"of"]);
["5", " is ", "the", " ", "square", " ", "root", " ", "of", " ", "25"]
Clearly defined, the splitR function should, in the context of the interpret function, take a RegExp and split it into its base components; e.g. \d+[0-9]\w*? should split into ["\d", "+", "[", "0-9", "]", "\w", "*", "?"]. These components are defined separately in other arrays, using a variety of RegExps (e.g. /{(\d+)}/g to find a{n}) and strings (e.g. ".").
Truly, I am stumped as to the definition of splitR. Any help is appreciated!
This will split a regex in parts, and populate a second array with description of the parts. It will skip unexpected characters, but there's no real regex syntax checking, i.e. if you start a range and don't end it, the script will not complain. I took the liberty of adding some things that were missing from your lists, like grouping brackets, start and end anchors...
function interpret(regex)
{
var body = regex.source;
var flags = (regex.global ? "g" : "") + (regex.ignoreCase ? "i" : "") + (regex.multiline ? "m" : "");
var classes = [/^\w\-\w/,/^\./,/^\\w/,/^\\d/,/^\\s/,/^\\W/,/^\\D/,/^\\S/,/^\[/,/^\]/,/^\(/,/^\)/,/^\^/,/^\$/,/^\|/];
var classDefs = ["character range","any non-newline character","any word (digit or letter)","any digit (characters 0-9)","any whitespace character","any non-word (non-digit and non-letter)","any non-digit (not characters 0-9)","any non-whitespace character","open matchset","close matchset","open group","close group","start anchor or negation","end anchor","alternative"];
var quantifiers = [/^[+*?]\?/,/^\*/,/^\+/,/^\?/,/^{(\d+)}/,/^{(\d+),}/,/^{(\d+),(\d+)}/];
var quantDefs = ["lazy quantification","repeated 0 or more times","repeated 1 or more times","repeated once or not at all","repeated exactly $1 time","repeated $1 or more times","repeated between $1 and $2 times"];
var escaped = [/^\\t/,/^\\n/,/^\\r/,/^\\\./,/^\\\*/,/^\\\+/,/^\\\-/,/^\\\\/,/^\\\^/,/^\\\$/,/^\\\?/,/^\\\|/,/^\\\[/,/^\\\]/,/^\\\(/,/^\\\)/,/^\\\{/,/^\\\}/];
var escapedDefs = ["a tab","a linefeed","a carriage return","a period","an asterisk","a plus","a minus","a backslash","a caret","a dollar sign","a question mark","a vertical bar","a square bracket","a square bracket","a bracket","a bracket","a curly bracket","a curly bracket"];
var literal = [/^[^\.\\\[\]\(\)\^\$\|\*\+\-\?\{\}]+/];
var literalDefs = ["literal text"];
var regs = classes.concat(quantifiers,escaped,literal);
var defs = classDefs.concat(quantDefs,escapedDefs,literalDefs);
var reglist = [];
var deflist = [];
while (body.length)
{
var found = false;
var chunk = null;
for (var i = 0; i < regs.length; i++)
{
chunk = body.match(regs[i]);
if (chunk)
{
reglist.push(chunk[0]);
deflist.push(defs[i]);
body = body.substr(chunk[0].length);
found = true;
break;
}
}
if (!found)
{
body = body.substr(1); // skip unexpected character
}
}
console.log(regex.source);
console.log(reglist);
console.log(deflist);
alert("see console for output");
}
var x = new RegExp("^[a-z0-9]\\^.\\.\\w\\d\\s\\W\\D\\S+(te|\\|st)*\\*\\n+\\+\\-\\}(\\W?\\?\\s{1,3})\\\\*?a{3}b{4,}c{}\\r\\t\\$$", "ig");
interpret(x);
Related
var alphabet = "FIN SLHOJVHEN GYKOHU";
I want to split it every 2 character so it would print
"I LOVE YOU "
I already try this but it didn't work
for (var i = 0 ; i \< alphabet.length ; i+=2 ){
alphabet.split(i)
correct me please
You can transform the string into an array, filter it and make it a string again.
let alphabet = "FIN SLHOJVHEN GYKOHU";
alphabet = [...alphabet].filter((_, i) => i%2).join("");
console.log(alphabet); //I LOVE YOU;
Also this one results in I LOVE YOU (regex101 demo).
// the string
let alphabet = 'FIN SLHOJVHEN GYKOHU';
// capture ^ start or chr, match latter chr
alphabet = alphabet.replace(/(^|.)./g, '$1');
console.log(alphabet);
Since the split function will split the given string by the delimiter you passed, it seems to me that you wish to first split the words (using empty space) contained in the encoded string and only then take only the characters at even positions to include in the decoded string.
This is a demo achieving that:
const encoded = "FIN SLHOJVHEN GYKOHU";
const words = encoded.split(' ');
let decoded = '';
words.forEach((word)=>{
for (let i=1;i<word.length;i+=2){
decoded += word[i];
}
decoded += ' ';
});
console.log(decoded);
Using a regex replacement approach we can try:
var alphabet = "FIN SLHOJVHEN GYKOHU";
var output = alphabet.replace(/[A-Z]([A-Z]|(?=\s))/g, "$1");
console.log(output);
Here is an explanation of the regex pattern:
[A-Z] match a single (odd) uppercase letter
( open capture
[A-Z] an uppercase letter
| OR
(?=\s) lookahead and find a space
)
In other words, we match an odd letter and then capture the next letter, unless that odd letter happen to be the last in the word. Then we replace with just the captured even letter, if available.
you already have different ways to do it, I'm adding one, so you'll get totally confused! hehe
This one is recursive:
we take your string alphabet
first 2 letters (every 2)
last one of this 2 characters string is stored in toPrint variable
delete the first 2 characters from alphabet
... loop till alphabet empty
Your toPrint has I Love You
Certainly not the fastest one, but nice.
let alphabet = "FIN SLHOJVHEN GYKOHU";
let toPrint = '';
do {
let temp = alphabet.slice(0, 2);
toPrint += temp[1];
alphabet = alphabet.slice(2, alphabet.length);
} while (alphabet !== '');
console.log(toPrint);
You can start your loop at 1 as you want every second character, and note that \< should be <
In the loop, i is the position of the character, so you would still have to get the character for that position and then assemble the resulting string.
var alphabet = "FIN SLHOJVHEN GYKOHU";
var result = "";
for (var i = 1; i < alphabet.length; i += 2) {
result += alphabet[i];
}
console.log(result);
If you want to take the spaces into account and only get the 2nd non whitespace character you might:
get the separate words by splitting on whitespace chars
remove the empty entries
join every second character to a string
join the parts from the initial split with a space
const alphabet = " FIN SLHOJVHEN GYKOHU ";
const result = alphabet
.split(/\s+/)
.filter(Boolean)
.map(s => s.split("").filter((s, i) => i % 2).join(""))
.join(" ");
console.log(result);
If you have a browser where a positive lookbehind for a regex is supported:
const alphabet = " FIN SLHOJVHEN GYKOHU ";
const result = alphabet
.split(/\s+/)
.filter(Boolean)
.map(s => s.match(/(?<=^(?:..)*.)./g).join(""))
.join(" ");
console.log(result);
var alphabet = "FIN SLHOJVHEN GYKOHU";
const arrayAlpha = alphabet.split('');
let stringToPrint = '';
for(let i=1; i<arrayAlpha.length; i+=2){
stringToPrint = stringToPrint + arrayAlpha[i]
}
console.log(stringToPrint)
MyString = "big BANANA: 5, pineapple(7), small Apples_juice (1,5%)_, oranges* juice 20 %, , other fruit : no number "
I'd like to get the number of each element in my MyString.
The separator for decimal can be a comma or a dot.
The code I've tried:
function getValue(string, word) {
var index = string.toLowerCase().indexOf(word.toLowerCase()),
part = string.slice(index + word.length, string.indexOf(',', index));
return index === -1 ? 'no ' + word + ' found!' // or throw an exception
: part.replace(/[^0-9$.,]/g, '');
}
If I am not mistaken, looking at your example string you want to get numbers from parts that can be splitted by a comma and a whitespace ,.
If that is the case, then this might be an option:
Create an array using split and loop the items
Check if the word you are looking for in present in the item using indexOf
Use a pattern like \d+(?:[.,]\d+)? for the match
If you have a match, return it.
For example:
var MyString = "big BANANA: 5, pineapple(7), small Apples_juice (1,5%)_, oranges* juice 20 %, , other fruit : no number ";
function getValue(string, word) {
var items = string.toLowerCase().split(', ');
word = word.toLowerCase();
var pattern = /\d+(?:[.,]\d+)?/g;
var result = 'no ' + word + ' found!';
for (var i = 0; i < items.length; i++) {
var item = items[i];
if (item.indexOf(word) !== -1) {
var match = item.match(pattern);
if (match && typeof match[0] !== 'undefined') {
result = match[0];
}
}
}
return result;
}
console.log(getValue(MyString, "big BANANA"));
console.log(getValue(MyString, "pineapple"));
console.log(getValue(MyString, "small Apples_juice"));
console.log(getValue(MyString, "oranges* juice"));
console.log(getValue(MyString, "other fruit"));
console.log(getValue(MyString, "ApPle"));
I recommend rewriting your code to put your values in an object:
var myObj = {
bigBanana: 5,
pineapple: 7,
smallApplesJuice: 1.5,
...
}
You can then iterate through myObj values with a simple for...in loop.
If this is not feasible, you'll have to create different code using RegEx for every substring you want to get our of your big string. For example, try this to get the value of Apples_juice:
//create a regular expression that matches everything after "Apples_juice":
var re = new RegExp(/((?<=Apples_juice).*$)/);
//extract three characters that follow "Apples_juice (":
var extracted_value = MyString.match(re)[0].substring(2,5);
Note that extracted_value is a string, which you will then have to convert to a number. Hope this helps.
Assume that there is a string like this:
var content = "1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20";
I want to add <br /> after every 5 dots.
So, the result should be:
1.2.3.4.5.<br />
6.7.8.9.10.<br />
11.12.13.14.15.<br />
16.17.18.19.20.<br />
I want to do this without a for loop. Is it possible with just regex?
i'm doing this with this code;
regenerate:function(content,call){
var data2;
var brbr = content.replace(/[\u0250-\ue007]/g, '').match(/(\r\n)/g);
if (brbr !== "") {
data2 = content.replace(/[\u0250-\ue007]/g, '').replace(/(\r\n)/gm, "<br><br>");
} else {
data2 = content.replace(/[\u0250-\ue007]/g, '');
}
var dataArr = data2.split(".");
for (var y = 10; y < dataArr.length - 10; y += 10) {
var dataArrSpecific1 = dataArr[y] + ".";
var dataArrSpecific2 = dataArr[y] + ".<br>";
var dataArrSpecificBosluk = dataArr[y + 1];
var data3 = data2.replace(new RegExp(dataArrSpecific1.replace(/[\u0250-\ue007]/g, ''), "g"), "" + dataArrSpecific2.replace(/[\u0250-\ue007]/g, '') + "");
data3 = data3.replace(new RegExp(dataArrSpecificBosluk.replace(/[\u0250-\ue007]/g, ''), "g"), " " + dataArrSpecificBosluk.replace(/[\u0250-\ue007]/g, '') + "");
data2 = data3;
}
call(data2.replace(/[\u0250-\ue007]/g, ''));
}
Actually , i want to refactoring this code
Working bin:http://jsbin.com/dikifipelo/1/
var string = "1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20." ;
string = string.replace(/(([^\.]+\.){5})/g, "$1<br/>");
Works with any type and length of characters between the dots.
Explanation:
The pattern /(([^.]+.){5})/g can be broken down as such:
[^\.] - any character that is not a dot
[^\.]+ - any character that is not a dot, one or more times
[^\.]+\. - any character that is not a dot, one or more times, followed by a dot
([^\.]+\.){5} - any character....dot, appearing five times
(([^\.]+\.){5}) - any...five times, capture this (all round brackets capture unless told not to, with a ?: as the first thing inside them)
the /g/ flag makes it so that the whole string is matched - ie, all matches are found
"$1" represents the results of the first group (or bracket)
so, the replace function finds all instances of the pattern in the string, and replaces them with the match itself + a line break (br).
Once you learn regular expressions, life is never the same.
I'm trying to make a Regex in JavaScript to match each not escaped specific characters.
Here I'm looking for all the ' characters. They can be at the beginning or the end of the string, and consecutive.
E.g.:
'abc''abc\'abc
I should get 3 matchs: the 1st, 5 and 6th character. But not 11th which escaped.
You'll have to account for cases like \\' which should match, and \\\' which shouldn't. but you don't have lookbehinds in JS, let alone variable-length lookbehinds, so you'll have to use something else.
Use the following regex:
\\.|(')
This will match both all escaped characters and the ' characters you're looking for, but the quotes will be in a capture group.
Look at this demo. The matches you're interested in are in green, the ones to ignore are in blue.
Then, in JS, ignore each match object m where !m[1].
Example:
var input = "'abc''abc\\'abc \\\\' abc";
var re = /\\.|(')/g;
var m;
var positions = [];
while (m = re.exec(input)) {
if (m[1])
positions.push(m.index);
}
var pos = [];
for (var i = 0; i < input.length; ++i) {
pos.push(positions.indexOf(i) >= 0 ? "^" : " ");
}
document.getElementById("output").innerText = input + "\n" + pos.join("");
<pre id="output"></pre>
You can use:
var s = "'abc''abc\\'abc";
var cnt=0;
s.replace(/\\?'/g, function($0) { if ($0[0] != '\\') cnt++; return $0;});
console.log(cnt);
//=> 3
What is the best way to count how many spaces before the fist character of a string?
str0 = 'nospaces even with other spaces still bring back zero';
str1 = ' onespace do not care about other spaces';
str2 = ' twospaces';
Use String.prototype.search
' foo'.search(/\S/); // 4, index of first non whitespace char
EDIT:
You can search for "Non whitespace characters, OR end of input" to avoid checking for -1.
' '.search(/\S|$/)
Using the following regex:
/^\s*/
in String.prototype.match() will result in an array with a single item, the length of which will tell you how many whitespace chars there were at the start of the string.
pttrn = /^\s*/;
str0 = 'nospaces';
len0 = str0.match(pttrn)[0].length;
str1 = ' onespace do not care about other spaces';
len1 = str1.match(pttrn)[0].length;
str2 = ' twospaces';
len2 = str2.match(pttrn)[0].length;
Remember that this will also match tab chars, each of which will count as one.
You could use trimLeft() as follows
myString.length - myString.trimLeft().length
Proof it works:
let myString = ' hello there '
let spacesAtStart = myString.length - myString.trimLeft().length
console.log(spacesAtStart)
See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/TrimLeft
str0 = 'nospaces';
str1 = ' onespace do not care about other spaces';
str2 = ' twospaces';
arr_str0 = str0.match(/^[\s]*/g);
count1 = arr_str0[0].length;
console.log(count1);
arr_str1 = str1.match(/^[\s]*/g);
count2 = arr_str1[0].length;
console.log(count2);
arr_str2 = str2.match(/^[\s]*/g);
count3 = arr_str2[0].length;
console.log(count3);
Here:
I have used regular expression to count the number of spaces before the fist character of a string.
^ : start of string.
\s : for space
[ : beginning of character group
] : end of character group
str.match(/^\s*/)[0].length
str is the string.