Regex to check Illegal Search Pattern - javascript

I am trying to convert below function to a simple regex check to find illegal pattern in my search app.
The rules are
Any string without * :Not match.
* by itself :Not match.
if * is present and length of the string is less than 3 (ignoring space):Match
Here is what makes it tricky, | and (space)or(space) in my app results in running two different searches so therefore it has to evaluate the strings on each side separately. just like below function. the function works but the regexCheck doesn't.
var regexCheck = /^[A-Za-z0-9]*$/g;
var check = function (s) {
var flag = true;
var b = s.replace(/ or /g,'|').split('|');
for (var i = 0; i < b.length; i++) {
if (flag && b[i].indexOf('*') > - 1) {
var startCount = (b[i].match(/[*]/g) || []).length;
var limit = (startCount > 0) ? 2 : 3;
var c = b[i].replace(/[ *]/g, '');
if (c.length < limit && c.length > 0) {
flag = false;
break;
}
}
}
if (flag) {
console.log('good:' + s);
} else {
console.log('fail:' + s);
}
return flag;
};
var s = [
'a',
'*',
'a*',
'*a',
'a*a',
'a*a|aa',
'a*a|a',
'a*a|a*',
'a*a or a*',
];
var j = 0;
while (j < s.length) {
check(s[j]);
if(regexCheck.test(s[j])){
console.log('rg-good:' + s[j]);
} else {
console.log('rg-fail:' + s[j]);
}
j++;
}

Ok, still not sure if I got you correctly, so please test and tell me if that is what you want:
/^.*(?:^| or |\|)(?=.?\*)(\**)[^\s\*](\**)(?=$| or |\|).*$/
Here is the version that ignores whitespaces:
/^.*(?:^| or |\|)\s*(?=\s*.?\s*\*)\s*(\*\s*)*[^\s\*]\s*(\*\s*)*(?=$| or |\|).*$/
To make it easier to understand, I will explain the basic one:
Surround everything with ^.* and .*$ - this will make us match the entire string if one match is found. Aka if we have a*|aa, we will match the whole thing, not just a*. If you want to match only the subpattern - you can remove those.
Surround everything with (?:^| or |\|) and (?=$| or |\|) - will make us evaluate subpatterns one at a time. Aka separate each attempt to match with either line start/end, or |, or or.
(?=.?\*) - positive lookahead that says "in the following pattern, either the first or the second character has to be a *. By the 3rd rule, the first found star shouldn't be at third or later position, nor are we allowed not to have a star by the 1st rule.
(\**)[^\s\*](\**) - match any amount of *, followed by a character, that is not a star or a whitespace, followed by any amount of *.

Related

removing the second matched word from a string?

I got a string
For example:
This is for trails and I want to learn Js and Coding and Development
The above mentioned line as a string
function trail(sen){
var cat = "and"
var fin = sen.indexOf(cat);
if(fin > 0){
var last = sen.substring(0, fin)
}
else{
var last = sen;
}
return last;
}
console.log(
trail("This is for trails and I want to learn Js and Coding and Development ")
);
I am trying to find the index of the second "and" in a string rather than the first one.
and get the string part from index 0 to that second "and"
Could you please provide the better approach ?
You can use split together with join to achieve this, like so:
const myStr = 'This is for trails and I want to learn Js and Coding and Development'
const subStr = 'and'
const splitted = getSplitted(myStr, subStr, 2) // Splits before the "N th" ocurrence of subStr
console.log(splitted)
function getSplitted(str, subStr, idx) {
return str.split(subStr, idx).join(subStr);
}
You can first find the second occurrence and then remove it via simple slice.
This method also supports regular expressions as pattern.
/**
* Find the n-th occurrence of given pattern in a string.
* #param { string } str The string to be examined.
* #param { string | RegExp } pattern The pattern to be matched.
* #param { number } n Starting index.
* #return { [number, string | RegExpExecArray] } The index & the match result. `[-1, null]` if pattern occurs less than n times.
*/
function findNth(str, pattern, n = 1) {
// The total processed index & and the last match
let index = 0, result;
for(; n--; ) {
// Index of the next match relative to the end of the last one
let offset = -1;
if(pattern instanceof RegExp) {
const match = pattern.exec(str);
if(match !== null) {
offset = match.index;
result = match[0];
}
}
else { // string case
offset = str.indexOf(pattern);
result = pattern;
}
// If none is matched
if(offset === -1)
return [-1, null];
// Seek over the match result
offset += result.length;
str = str.slice(offset);
index += offset;
}
// Gotta go back to the start of the last match
index -= result.length;
return [index, result];
}
/** Remove the n-th occurrence of given pattern out of a string. */
function removeNth(str, pattern, n = 1) {
const result = findNth(str, pattern, n);
if(result[0] === -1)
return str;
return str.slice(0, result[0]) + str.slice(result[0] + result[1].length);
}
{
const str = 'This is for trails and I want to learn Js and Coding and Development';
console.log(removeNth(str, 'and', 2));
console.log(removeNth(str, /\s*and/, 2));
}
Use split
sen.split(cat, 2) // This line will divide the syntax into an array of two elements till second "and" occurrence
// ['This is for trails ', ' I want to learn Js ']
Then you need to join them to add the first and
sen.split(cat, 2).join(cat)
And to get the length
sen.split(cat, 2).join(cat).length
let str = "This is for trails and I want to learn Js and Coding and Development".split("and", 2).join("");
console.log(str);

JS Regex - Match each not escaped specific characters

I'm trying to make a Regex in JavaScript to match each not escaped specific characters.
Here I'm looking for all the ' characters. They can be at the beginning or the end of the string, and consecutive.
E.g.:
'abc''abc\'abc
I should get 3 matchs: the 1st, 5 and 6th character. But not 11th which escaped.
You'll have to account for cases like \\' which should match, and \\\' which shouldn't. but you don't have lookbehinds in JS, let alone variable-length lookbehinds, so you'll have to use something else.
Use the following regex:
\\.|(')
This will match both all escaped characters and the ' characters you're looking for, but the quotes will be in a capture group.
Look at this demo. The matches you're interested in are in green, the ones to ignore are in blue.
Then, in JS, ignore each match object m where !m[1].
Example:
var input = "'abc''abc\\'abc \\\\' abc";
var re = /\\.|(')/g;
var m;
var positions = [];
while (m = re.exec(input)) {
if (m[1])
positions.push(m.index);
}
var pos = [];
for (var i = 0; i < input.length; ++i) {
pos.push(positions.indexOf(i) >= 0 ? "^" : " ");
}
document.getElementById("output").innerText = input + "\n" + pos.join("");
<pre id="output"></pre>
You can use:
var s = "'abc''abc\\'abc";
var cnt=0;
s.replace(/\\?'/g, function($0) { if ($0[0] != '\\') cnt++; return $0;});
console.log(cnt);
//=> 3

create all possible variations of a string with inserted character

I'm trying to take the variable email and create all possible combinations with a "." in it like so:
Results
andrew
andre.w
andr.ew
andr.e.w
and.rew
and.re.w
and.r.ew
and.r.e.w
an.drew
an.dre.w
an.dr.ew
an.dr.e.w
an.d.rew
an.d.re.w
an.d.r.ew
an.d.r.e.w
a.ndrew
a.ndre.w
a.ndr.ew
a.ndr.e.w
a.nd.rew
a.nd.re.w
a.nd.r.ew
a.nd.r.e.w
a.n.drew
a.n.dre.w
a.n.dr.ew
a.n.dr.e.w
a.n.d.rew
a.n.d.re.w
a.n.d.r.ew
a.n.d.r.e.w
I'm not sure how to do about doing this exactly. I know how to use a loop to go over each character, but as far as the rest goes I'm stumped. I was looking at substr, slice and few other functions but couldn't get anything working.
Code
var email = "andrew";
for (var i = 0; i < email.length; i++) {
console.log( email[i] + "." );
}
That's easy:
var str = 'andrew';
var results = [],
bin;
for (var i = 0; i < Math.pow(2, str.length - 1); ++i) {
bin = i.toString(2).split('').reverse().join('');
results.push(str.replace(/./g, function(letter, index) {
if (bin.charAt(index) == 1) {
letter += '.';
}
return letter;
}));
}
console.log(results);
Demo: http://jsfiddle.net/9qLY6/
Short description:
For 'abc' string there are 2 positions for a dot character: between a and b; b and c. These 2 positions might be presented as a digits of a binary number. All the possible combinations in this case are:
00
01
10
11
If you treat 1 as - . there, and 0 as no . there - you can just iterate over 2^(n-1) numbers and put . if the corresponding bit is set.
If you're interested in a recursive solution like Dinesh mentioned, here's some code to get you started.
function withPeriods(str, prev) {
prev = prev || '';
if(!str || str.length == 0) {
return prev ? [prev] : [];
} else if(str.length == 1) {
return [prev + str];
} else {
var c = str.charAt(0);
var newStr = str.slice(1);
return withPeriods(newStr, prev+c).concat(withPeriods(newStr, prev+c+'.'));
}
}
The idea here is that you are working your way through the string, keeping the current result in the 'prev' variable. If the string is length 0 or 1, there's nothing left to do. Otherwise, you need consider two options: one where you take a character from 'str' and add it to 'prev', and one where you do that but also add a '.'
If you think about it, you need to either insert a dot, or not insert one, at every possible location in the string (between any two characters). A funky way to do this is to realize that if you have n characters, there are n-1 places. If you wrote the combinations of period = 1 and no period = 0, then you can write all possible solutions as a 2^n-1 binary sequence. Showing this for a four letter word "word":
000 word
001 wor.d
010 wo.rd
011 wo.r.d
100 w.ord
101 w.or.d
110 w.o.rd
111 w.o.r.d
In pseudo code (can't test JS syntax right now):
n = strlen( email );
combinations = 1 << n - 1; // left shift operation
for i = 0 to combinations - 1:
dot = 1
for j = 0 to n:
print email[j];
if dot & i:
print '.'
dot << 1;
Can you take it from here?
You might take a recursive approach to this problem. Maybe you can use the base case as a string with 2 characters.

Replace characters using Regex positive/negative lookahead?

I have this string :
var a='abc123#xyz123';
I want to build 2 regexes replace functions which :
1) Replace all characters that do have a future '#' - with '*' (not including '#')
so the result should look like :
'******#xyz123'
2) Replace all characters that do not have a future '#' - with '*' (not including '#')
so the result should look like :
'abc123#******'
What have I tried :
For the positive lookahead :
var a='abc123#xyz123';
alert(a.replace(/(.+(?=#))+/ig,'*')); //*#xyz123 --wrong result since it is greedy...
Question :
How can I make my regexes work as expected ?
First part using lookahead:
repl = a.replace(/.(?=[^#]*#)/g, "*");
//=> "******#xyz123"
Explanation:
This regex finds any character that is followed by # using lookahead and replaced that with *.
Second part using replace callback:
repla = a.replace(/#(.*)$/, function(m, t) { return m[0] + t.replace(/./g, '*'); } );
//=> abc123#******
Explanation:
This code finds text after #. Inside the callback function is replaces every character with asterisk.
You can use indexOf and substr for this instead:
function maskBeforeAfter(before, str, character, maskCharacter) {
character = character || '#';
maskCharacter = maskCharacter || '*';
var characterPosition = str.indexOf(character);
if (characterPosition > -1) {
var mask = '';
if (before) {
for (var i = 0; i < characterPosition; i++) {
mask += maskCharacter;
}
return mask + str.substr(characterPosition);
} else {
for (var i = 0; i < str.length - characterPosition - 1; i++) {
mask += maskCharacter;
}
return str.substr(0, characterPosition + 1) + mask;
}
}
return str;
}
function maskBefore(str, character, maskCharacter) {
return maskBeforeAfter(true, str, character, maskCharacter);
}
function maskAfter(str, character, maskCharacter) {
return maskBeforeAfter(false, str, character, maskCharacter);
}
> var a = 'abc12345#xyz123';
> maskBefore(a);
"********#xyz123"
> maskAfter(a);
"abc12345#******"
If you insist on a simple regex:
The first one is already answered. The second can be written similarly:
a.replace(/[^#](?![^#]*#)/g, '*')
(?![^#]*#) is a negative lookahead that checks that there isn't a pound after the current character.
[^#] also checks that the current character isn't a pound. (we could have also used /(?![^#]*#)./g, but it is less pretty.
A positive option is:
a.replace(/[^#](?=[^#]*$)/g, '*');
this is very similar to the first one: (?=[^#]*$) checks that we have only non-pounds ahead, until the end of the string.
In both of this options, all characters in strings with no pounds will be replaces: "abcd" -> "****"

How to check if a digit is used in a number multiple times

Example: We have the number 1122. I would like to check that if given number contains the digit 1 more than once. In this case, it should return true.
I need the code to be flexible, it has to work with any number, like 3340, 5660, 4177 etc.
You can easily "force" JS to coerce any numeric value to a string, either by calling the toString method, or concatenating:
var someNum = 1122;
var oneCount = (someNum + '').split('1').length;
by concatenating a number to an empty string, the variable is coerced to a string, so you can use all the string methods you like (.match, .substring, .indexOf, ...).
In this example, I've chosen to split the string on each '1' char, count and use the length of the resulting array. If the the length > 2, than you know what you need to know.
var multipleOnes = ((someNum + '').split('1').length > 2);//returns a bool, true in this case
In response to your comment, to make it flexible - writing a simple function will do:
function multipleDigit(number, digit, moreThan)
{
moreThan = (moreThan || 1) + 1;//default more than 1 time, +1 for the length at the end
digit = (digit !== undefined ? digit : 1).toString();
return ((someNum + '').split(digit).length > moreThan);
}
multipleDigit(1123, 1);//returns true
multipleDigit(1123, 1, 2);//returns false
multipleDigit(223344,3);//returns 3 -> more than 1 3 in number.
Use javascript's match() method. Essentially, what you'd need to do is first convert the number to a string. Numbers don't have the RegExp methods. After that, match for the number 1 globally and count the results (match returns an array with all matched results).
​var number = 1100;
console.log(number.toString().match(/1/g).length);​
function find(num, tofind) {
var b = parseInt(num, 10);
var c = parseInt(tofind, 10);
var a = c.split("");
var times = 0;
for (var i = 0; i < a.length; i++) {
if (a[i] == b) {
times++;
}
}
alert(times);
}
find('2', '1122');
Convert the number to a string and iterate over it. Return true once a second digit has been found, for efficiency.
function checkDigitRepeat(number, digit) {
var i, count = 0;
i = Math.abs(number);
if(isNaN(i)) {
throw(TypeError('expected Number for number, got: ' + number));
}
number = i.toString();
i = Math.abs(digit);
if(isNaN(i)) {
throw(TypeError('expected Number for digit, got: ' + digit));
}
digit = i.toString();
if(digit > 9) {
throw(SyntaxError('expected a digit for digit, got a sequence of digits: ' + digit));
}
for(i = 0; i < number.length; i += 1) {
if(number[i] === digit) {
count += 1;
if(count >= 2) { return true; }
}
}
return false;
}
In the event that you want to check for a sequence of digits, your solution may lie in using regular expressions.
var myNum = '0011';
var isMultipleTimes = function(num) {
return !!num.toString().match(/(\d)\1/g);
}
console.log(isMultipleTimes(myNum));
JavaScript Match
Using #Aspiring Aqib's answer, I made a function that actually works properly and in the way I want.
The way it works is:
Example execution: multDig('221','2')
Split the number (first argument) to an array where each element is one digit.Output: ['2','2','1']
Run a for loop, which checks each of the array elements if they match with the digit (second argument), and increment the times variable if there is a match.Output: 2
Check inside the for loop if the match was detected already to improve performance on longer numbers like 2211111111111111
Return true if the number was found more than once, otherwise, return false.
And finally the code itself:
function multDig(number, digit){
var finalSplit = number.toString().split(''), times = 0;
for (i = 0; i < finalSplit.length; i++){
if (finalSplit[i] == digit){
times++
}
if (times > 1){
return true;
}
}
return false;
}

Categories