I'm creating a DSL based on JSON and JavaScript and I have a requirement to let key values be specified 'raw' and not enclosed in string delimiters. A quick example that hopefully explains this:
{myKey:custom_function('arg1'), myKey2:custom_function("another arg1")}
should become
{myKey:"custom_function('arg1')", myKey2:"custom_function(\"another arg1\")"}
This is because at the time of parsing the JSON object, custom_function will not exist. I need to be able to parse the JSON without evaluating any of the values, then only expand values one by one as I iterate the keys.
What regular expression or other method can I use to turn that 1st snippet into the 2nd one?
I'm assuming a simpler solution will cover 90% of cases but that writing a bullet-proof implementation would take a lot of effort. Based on the research I did into JavaScript's regular expression support (apparantly no lookbehind capability) I'm assuming it will require something more than just 1 or 2 lines of regex patterns.
Also, this is for a node application so any tricks that it has for this will be helpful too.
EDIT:
This question seems to be getting some downvotes, but I've left it up anyway for the benefit of future googlers / my own reference. It's a perfectly valid question about what method / technique would work best for this kind of problem, and there could easily be other node/js newcomers who face a similar problem.
Final answer: Regex just isn't suited to a task as complex as this. Any similarly complex solutions I found online (e.g. removing code comments) all resorted to a mainly custom, iterative approach, only using regex sparingly, so a similar approach ended up being not too painless in this situation.
So in the end the 'best' method I could find didn't involve very much regex or any specialized libraries from node or elsewhere suited to the problem.
Finally, for the benefit of future googlers who might have a similar problem, I've published my solution at https://gist.github.com/2590689 and copied below:
//clothe a hub file that has 'naked' expressions
//e.g. turn {key:$('body p')} into {key:"$('body p')"}
function clothe(contents){
closers = /[\}\]\)\/"']/
openers = /[\{\[\(\/"']/
closing = {
"{": "}",
"[": "]",
"(": ")",
"/": "/",
'"': '"',
"'": "'"
}
contents = contents.split("");
var beforeKey = true;
var inKey = false;
var beforeValue = false;
var inValue = false;
var inArray = false;
var delimiterStack = [];
function inDelimited(){
return delimiterStack.length > 0;
}
function toggleDelimiter(d){
if(openers.exec(d) && !closers.exec(d)){
pushDelimiter(d);
}else if(openers.exec(d) && closers.exec(d)){
if(topDelimiter()){
if(topDelimiter()==d){
popDelimiterIfValid(d);
}else{
pushDelimiter(d);
}
}else{
pushDelimiter(d);
}
}else if(closers.exec(d)){
popDelimiterIfValid(d);
}
}
function topDelimiter(){
if(delimiterStack.length>=0){
return delimiterStack[delimiterStack.length-1];
}else{
return undefined;
}
}
function pushDelimiter(d){
delimiterStack.push(d);
}
function popDelimiterIfValid(d){
if(delimiterStack.length>0)
if(closing[delimiterStack[delimiterStack.length-1]]==d)
delimiterStack.pop(d);
}
function rTrimmedRightBound(rightBound){
while(rightBound>0){
if(!/\s/g.exec(contents[--rightBound])){
return rightBound+1;
}
}
}
for(var i=0; i<contents.length; i++){
function delimiterCheck(c){
if(c=='"'){
toggleDelimiter('"');
contents.splice(i, 0, '\\');
i++;
}else if(openers.exec(c) || closers.exec(c)){
toggleDelimiter(c)
}
}
if(beforeKey){
if(/[a-zA-Z0-9$_!]/.exec(contents[i])){
beforeKey = false;
inKey = true;
}
}else if(inKey){
if(contents[i]==":"){
inKey = false;
beforeValue = true;
}
}else if(beforeValue){
if(/[a-zA-Z0-9$_!'"\(\/]/.exec(contents[i])){
contents.splice(i, 0, '"');
i++;
beforeValue = false;
inValue = true;
delimiterCheck(contents[i]);
}else if(/\{/.exec(contents[i])){
beforeKey = true;
beforeValue = false;
}else if(/\[/.exec(contents[i])){
beforeValue = false;
inArray = true;
}
}else if(inArray && !inValue){
if(/[a-zA-Z0-9$_!'"\(\/]/.exec(contents[i])){
contents.splice(i, 0, '"');
i++;
beforeValue = false;
inValue = true;
delimiterCheck(contents[i]);
}
}else if(inValue){
if(!inDelimited() && /[\},\]]/.exec(contents[i])){
contents.splice(rTrimmedRightBound(i), 0, '"');
i++;
inValue = false;
if(/\]/.exec(contents[i])){
inArray = false;
}
beforeKey = !inArray;
}else{
delimiterCheck(contents[i]);
}
}
}
return contents.join("");
}
Related
I have written below lines of code for checking date format on the basis of multiple date formats like
function checkValidDate(dateValue)
{
var dateFormat = getDateFormat();
switch(dateFormat)
{
case "d-m-Y":
var regex = /^(0[1-9]|[12][0-9]|3[01])[- /.](0[1-9]|1[012])[- /.](19|20)\d\d$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
case "m-d-Y":
var regex = /^(0[1-9]|1[012])[-/.](0[1-9]|[12][0-9]|3[01])[- /.](19|20)\d\d$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
case "Y-m-d":
var regex = /^(19|20)\d[- /.](0[1-9]|1[012]\d(0[1-9]|[12][0-9]|3[01]))$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
}
}
$("#dobfield").blur(function(){
var dob = $("#dobfield").val().trim();
var check = checkValidDate(dob);
if(check ==false)
alert("wrong");
});
There are three formats accepted in this project "d-m-Y", "m-d-Y", "Y-m-d".
Now I am trying to check whether the date entered is in valid regular expression format or not on the basis of presently selected date format.
The above code is not working!!! Please help me!!!
function checkValidDate(dateValue)
{
var dateFormat="d-m-Y";
switch(dateFormat)
{
case "d-m-Y":
var regex = /^([1-9]|[12][0-9]|3[01])[- /.]([1-9]|1[012])[- /.]\d\d$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
case "m-d-Y":
var regex = /^([1-9]|1[012])[- /.]([1-9]|[12][0-9]|3[01])[- /.]\d\d$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
case "Y-m-d":
var regex = /^\d\d[- /.]([1-9]|[12][0-9]|3[01])[- /.]([1-9]|1[012])$/;
if(dateValue.match(regex))
return true;
else
return false;
break;
default:
return false;
break;
}
}
I have changed the regex little bit to support the format that you have told. Hope this help you.
Working Example
I see multiple problems here.
In amount of code provided - not self testable example
At definition - what is d-m-Y?
Possibly at regex, depending on former point.
But the biggest problem is in approach.
Does switch using dateFormat work?
Is d-m-Y in fact dd-mm-YYYY?
In that case, 1st regex (didn't test beyond that - no point) is wrong. This one will work:
case "d-m-Y":
var regex = /^(0?[1-9]|[12][0-9]|3[01])[\-\-](0?[1-9]|1[012])[\-\-]\d{4}$/;
Using regex for this is not enough. You gonna need way more complex regex to filter out dates like 31-2-year, or years that have or not have 29-2 for example.
Your best bet is to use MomentJS. It's simple, not lightweight and battle-tested with good documentation.
I tried to validate url with or without http No matter what i did the function return false.
I checked my regex string in this site:
http://regexr.com/
And its seen as i expect.
function isUrlValid(userInput) {
var regexQuery = "/(http(s)?://.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/";
var url = new RegExp(regexQuery,"g");
if (url.test(userInput)) {
alert('Great, you entered an E-Mail-address');
return true;
}
return false;
}
I fix the problem by change the .test to .match and leave the regex as is.
I change the function to Match + make a change here with the slashes and its work: (http(s)?://.)
The fixed function:
function isUrlValid(userInput) {
var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g);
if(res == null)
return false;
else
return true;
}
I believe the other answer will reject some valid url's (like domain names in uppercase or long sub-domains) and allow some invalid ones (like http://www.-example-.com or www.%#&.com). I tried to take into account a number of additional url syntax rules (without getting into internationalisation).
function isUrlValid(userInput) {
var regexQuery = "^(https?://)?(www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6}(/[-\\w#\\+\\.~#\\?&/=%]*)?$";
var url = new RegExp(regexQuery,"i");
return url.test(userInput);
}
var input = ["https://o.sub-domain.example.com/foo/bar?foo=bar&boo=far#a%20b",
"HTTP://EX-AMPLE.COM",
"example.c",
"example-.com"];
for (var i in input) document.write(isUrlValid(input[i]) + ": " + input[i] + "<br>");
To also allow IP addresses and port numbers, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6})|((\\d{1,3}\\.){3}\\d{1,3}))(:\\d{2,4})?(/[-\\w#\\+\\.~#\\?&/=%]*)?$"
To also allow query strings without a slash between the domain name and the question mark (which is theoretically not allowed, but works in most real-life situations), the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6})|((\\d{1,3}\\.){3}\\d{1,3}))(:\\d{2,4})?((/|\\?)[-\\w#\\+\\.~#\\?&/=%]*)?$"
To also make sure that every % is followed by a hex number, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6})|((\\d{1,3}\\.){3}\\d{1,3}))(:\\d{2,4})?((/|\\?)(((%[0-9a-f]{2})|[-\\w#\\+\\.~#\\?&/=])*))?$"
(Note: as John Wu mentioned in a comment, there are valid single-letter domains).
Actually, this question needs a powerful regex and the following code is not very hard to understand, please see below(ES6 - TypeScript):
const isValidUrl = (url: string): boolean => {
const urlRegex = /^((http(s?)?):\/\/)?([wW]{3}\.)?[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?$/g;
const result = url.match(urlRegex);
return result !== null;
};
Try this code.
function CheckURL(fieldId, alertMessage) {
var url = fieldId.value;
if(url !== "")
{
if (url.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g) !== null)
return true;
else {
alert(alertMessage);
fieldId.focus();
return false;
}
}
}
var website = document.getElementById('Website');
if (!CheckURL(website, "Enter a valid website address")) {
return false;
}
Recently, I've been attempting to emulate a small language in jQuery and JavaScript, yet I've come across what I believe is an issue. I think that I may be parsing everything completely wrong.
In the code:
#name Testing
#inputs
#outputs
#persist
#trigger
print("Test")
The current way I am separating and parsing the string is by splitting all of the code into lines, and then reading through this lines array using searches and splits. For example, I would find the name using something like:
if(typeof lines[line] === 'undefined')
{
}
else
{
if(lines[line].search('#name') == 0)
{
name = lines[line].split(' ')[1];
}
}
But I think that I may be largely wrong on how I am handling parsing.
While reading through examples on how other people are handling parsing of code blocks like this, it appeared that people parsed the entire block, instead of splitting it into lines as I do. I suppose the question of the matter is, what is the proper and conventional way of parsing things like this, and how do you suggest I use it to parse something such as this?
In simple cases like this regular expressions is your tool of choice:
matches = code.match(/#name\s+(\w+)/)
name = matches[1]
To parse "real" programming languages regexps are not powerful enough, you'll need a parser, either hand-written or automatically generated with a tool like PEG.
A general approach to parsing, that I like to take often is the following:
loop through the complete block of text, character by character.
if you find a character that signalizes the start of one unit, call a specialized subfunction to parse the next characters.
within each subfunction, call additional subfunctions if you find certain characters
return from every subfunction when a character is found, that signalizes, that the unit has ended.
Here is a small example:
var text = "#func(arg1,arg2)"
function parse(text) {
var i, max_i, ch, funcRes;
for (i = 0, max_i = text.length; i < max_i; i++) {
ch = text.charAt(i);
if (ch === "#") {
funcRes = parseFunction(text, i + 1);
i = funcRes.index;
}
}
console.log(funcRes);
}
function parseFunction(text, i) {
var max_i, ch, name, argsRes;
name = [];
for (max_i = text.length; i < max_i; i++) {
ch = text.charAt(i);
if (ch === "(") {
argsRes = parseArguments(text, i + 1);
return {
name: name.join(""),
args: argsRes.arr,
index: argsRes.index
};
}
name.push(ch);
}
}
function parseArguments(text, i) {
var max_i, ch, args, arg;
arg = [];
args = [];
for (max_i = text.length; i < max_i; i++) {
ch = text.charAt(i);
if (ch === ",") {
args.push(arg.join(""));
arg = [];
continue;
} else if (ch === ")") {
args.push(arg.join(""));
return {
arr: args,
index: i
};
}
arg.push(ch);
}
}
FIDDLE
this example just parses function expressions, that follow the syntax "#functionName(argumentName1, argumentName2, ...)". The general idea is to visit every character exactly once without the need to save current states like "hasSeenAtCharacter" or "hasSeenOpeningParentheses", which can get pretty messy when you parse large structures.
Please note that this is a very simplified example and it misses all the error handling and stuff like that, but I hope the general idea can be seen. Note also that I'm not saying that you should use this approach all the time. It's a very general approach, that can be used in many scenerios. But that doesn't mean that it can't be combined with regular expressions for instance, if it, at some part of your text, makes more sense than parsing each individual character.
And one last remark: you can save yourself the trouble if you put the specialized parsing function inside the main parsing function, so that all functions have access to the same variable i.
I'm programming my own autocomplete textbox control using C# and javascript on clientside. On client side i want to replace the characters in string which matching the characters the user was searching for to highlight it. For example if the user was searching for the characters 'bue' i want to replace this letters in the word 'marbuel' like so:
mar<span style="color:#81BEF7;font-weight:bold">bue</span>l
in order to give the matching part another color. This works pretty fine if i have 100-200 items in my autocomplete, but when it comes to 500 or more, it takes too mutch time.
The following code shows my method which does the logic for this:
HighlightTextPart: function (text, part) {
var currentPartIndex = 0;
var partLength = part.length;
var finalString = '';
var highlightPart = '';
var bFoundPart = false;
var bFoundPartHandled = false;
var charToAdd;
for (var i = 0; i < text.length; i++) {
var myChar = text[i];
charToAdd = null;
if (!bFoundPart) {
var myCharLower = myChar.toLowerCase();
var charToCompare = part[currentPartIndex].toLowerCase();
if (charToCompare == myCharLower) {
highlightPart += myChar;
if (currentPartIndex == partLength - 1)
bFoundPart = true;
currentPartIndex++;
}
else {
currentPartIndex = 0;
highlightPart = '';
charToAdd = myChar;
}
}
else
charToAdd = myChar;
if (bFoundPart && !bFoundPartHandled) {
finalString += '<span style="color:#81BEF7;font-weight:bold">' + highlightPart + '</span>';
bFoundPartHandled = true;
}
if (charToAdd != null)
finalString += charToAdd;
}
return finalString;
},
This method only highlight the first occurence of the matching part.
I use it as follows. Once the request is coming back from server i build an html UL list with the matching items by looping over each item and in each loop i call this method in order to highlight the matching part.
As i told for up to 100 items it woks pretty nice but it is too mutch for 500 or more.
Is there any way to make it faster? Maybe by using regex or some other technique?
I also thought about using "setTimeOut" to do it in a extra function or maybe do it only for the items, which currently are visible, because only a couple of items are visible while for the others you have to scroll.
Try limiting visible list size, so you are only showing 100 items at maximum for example. From a usability standpoint, perhaps even go down to only 20 items, so it would be even faster than that. Also consider using classes - see if it improves performance. So instead of
mar<span style="color:#81BEF7;font-weight:bold">bue</span>l
You will have this:
mar<span class="highlight">bue</span>l
String replacement in JavaScript is pretty easy with String.replace():
function linkify(s, part)
{
return s.replace(part, function(m) {
return '<span style="color:#81BEF7;font-weight:bold">' + htmlspecialchars(m) + '</span>';
});
}
function htmlspecialchars(txt)
{
return txt.replace('<', '<')
.replace('>', '>')
.replace('"', '"')
.replace('&', '&');
}
console.log(linkify('marbuel', 'bue'));
I fixed this problem by using regex instead of my method posted previous. I replace the string now with the following code:
return text.replace(new RegExp('(' + part + ')', 'gi'), "<span>$1</span>");
This is pretty fast. Much faster as the code above. 500 items in the autocomplete seems to be no problem. But can anybody explain, why this is so mutch faster as my method or doing it with string.replace without regex? I have no idea.
Thx!
does anyone know of a good regular expression to remove events from html.
For example the string:
"<h1 onmouseover="top.location='http://www.google.com">Large Text</h1>
Becomes
"<h1>Large Text</h1>
So HTML tags are preserved but events like onmouseover, onmouseout, onclick, etc. are removed.
Thanks in Advance!
How about:
data.replace(/ on\w+="[^"]*"/g, '');
Edit from the comments:
This is intended to be run on your markup as a one time thing. If you're trying to remove events dynamically during the execution of the page, that's a slightly different story. A javascript library like jQuery makes it extremely easy, though:
$('*').unbind();
Edit:
Restricting this to only within tags is a lot harder. I'm not confident it can be done with a single regex expression. However, this should get you by if no one can come up with one:
var matched;
do
{
matched = false;
data = data.replace(/(<[^>]+)( on\w+="[^"]*")+/g,
function(match, goodPart)
{
matched = true;
return goodPart;
});
} while(matched);
Edit:
I surrender at writing a single regex for this. There must be some way to check the context of a match without actually capturing the beginning of the tag in your match, but my RegEx-fu is not strong enough. This is the most elegant solution I'm going to come up with:
data = data.replace(/<[^>]+/g, function(match)
{
return match.replace(/ on\w+="[^"]*"/g, '');
});
Here's a pure JS way to do it:
function clean(html) {
function stripHTML(){
html = html.slice(0, strip) + html.slice(j);
j = strip;
strip = false;
}
function isValidTagChar(str) {
return str.match(/[a-z?\\\/!]/i);
}
var strip = false; //keeps track of index to strip from
var lastQuote = false; //keeps track of whether or not we're inside quotes and what type of quotes
for(var i=0; i<html.length; i++){
if(html[i] === "<" && html[i+1] && isValidTagChar(html[i+1])) {
i++;
//Enter element
for(var j=i; j<html.length; j++){
if(!lastQuote && html[j] === ">"){
if(strip) {
stripHTML();
}
i = j;
break;
}
if(lastQuote === html[j]){
lastQuote = false;
continue;
}
if(!lastQuote && html[j-1] === "=" && (html[j] === "'" || html[j] === '"')){
lastQuote = html[j];
}
//Find on statements
if(!lastQuote && html[j-2] === " " && html[j-1] === "o" && html[j] === "n"){
strip = j-2;
}
if(strip && html[j] === " " && !lastQuote){
stripHTML();
}
}
}
}
return html;
}