What is a good regular expression to match a URL? [duplicate] - javascript

This question already has answers here:
What is the best regular expression to check if a string is a valid URL?
(62 answers)
Closed 8 years ago.
Currently I have an input box which will detect the URL and parse the data.
So right now, I am using:
var urlR = /^(?:([A-Za-z]+):)?(\/{0,3})([0-9.\-A-Za-z]+)
(?::(\d+))?(?:\/([^?#]*))?(?:\?([^#]*))?(?:#(.*))?$/;
var url= content.match(urlR);
The problem is, when I enter a URL like www.google.com, its not working. when I entered http://www.google.com, it is working.
I am not very fluent in regular expressions. Can anyone help me?

Regex if you want to ensure URL starts with HTTP/HTTPS:
https?:\/\/(www\.)?[-a-zA-Z0-9#:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()#:%_\+.~#?&//=]*)
If you do not require HTTP protocol:
[-a-zA-Z0-9#:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()#:%_\+.~#?&//=]*)
To try this out see http://regexr.com?37i6s, or for a version which is less restrictive http://regexr.com/3e6m0.
Example JavaScript implementation:
var expression = /[-a-zA-Z0-9#:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()#:%_\+.~#?&//=]*)?/gi;
var regex = new RegExp(expression);
var t = 'www.google.com';
if (t.match(regex)) {
alert("Successful match");
} else {
alert("No match");
}

(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})
Will match the following cases
http://www.foufos.gr
https://www.foufos.gr
http://foufos.gr
http://www.foufos.gr/kino
http://werer.gr
www.foufos.gr
www.mp3.com
www.t.co
http://t.co
http://www.t.co
https://www.t.co
www.aa.com
http://aa.com
http://www.aa.com
https://www.aa.com
Will NOT match the following
www.foufos
www.foufos-.gr
www.-foufos.gr
foufos.gr
http://www.foufos
http://foufos
www.mp3#.com
var expression = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/gi;
var regex = new RegExp(expression);
var check = [
'http://www.foufos.gr',
'https://www.foufos.gr',
'http://foufos.gr',
'http://www.foufos.gr/kino',
'http://werer.gr',
'www.foufos.gr',
'www.mp3.com',
'www.t.co',
'http://t.co',
'http://www.t.co',
'https://www.t.co',
'www.aa.com',
'http://aa.com',
'http://www.aa.com',
'https://www.aa.com',
'www.foufos',
'www.foufos-.gr',
'www.-foufos.gr',
'foufos.gr',
'http://www.foufos',
'http://foufos',
'www.mp3#.com'
];
check.forEach(function(entry) {
if (entry.match(regex)) {
$("#output").append( "<div >Success: " + entry + "</div>" );
} else {
$("#output").append( "<div>Fail: " + entry + "</div>" );
}
});
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<div id="output"></div>
Check it in rubular - NEW version
Check it in rubular - old version

These are the droids you're looking for. This is taken from validator.js which is the library you should really use to do this. But if you want to roll your own, who am I to stop you? If you want pure regex then you can just take out the length check. I think it's a good idea to test the length of the URL though if you really want to determine compliance with the spec.
function isURL(str) {
var urlRegex = '^(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?#)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?$';
var url = new RegExp(urlRegex, 'i');
return str.length < 2083 && url.test(str);
}
Test:
function isURL(str) {
var urlRegex = '^(?!mailto:)(?:(?:http|https|ftp)://)(?:\\S+(?::\\S*)?#)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-?)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?$';
var url = new RegExp(urlRegex, 'i');
return str.length < 2083 && url.test(str);
}
var check = [
'http://www.foufos.gr',
'https://www.foufos.gr',
'http://foufos.gr',
'http://www.foufos.gr/kino',
'http://werer.gr',
'www.foufos.gr',
'www.mp3.com',
'www.t.co',
'http://t.co',
'http://www.t.co',
'https://www.t.co',
'www.aa.com',
'http://aa.com',
'http://www.aa.com',
'https://www.aa.com',
'www.foufos',
'www.foufos-.gr',
'www.-foufos.gr',
'foufos.gr',
'http://www.foufos',
'http://foufos',
'www.mp3#.com'
];
for (let index = 0; index < check.length; index++) {
var url=check[index]
if (isURL(check[index]))
console.log(`${url} ✔`);
else{
console.log(`${url} ❌`);
}
}
Result

Another possible solution, above solution failed for me in parsing query string params.
var regex = new RegExp("^(http[s]?:\\/\\/(www\\.)?|ftp:\\/\\/(www\\.)?|www\\.){1}([0-9A-Za-z-\\.#:%_\+~#=]+)+((\\.[a-zA-Z]{2,3})+)(/(.)*)?(\\?(.)*)?");
if(regex.test("http://google.com")){
alert("Successful match");
}else{
alert("No match");
}
In this solution please feel free to modify [-0-9A-Za-z\.#:%_\+~#=, to match the domain/sub domain name. In this solution query string parameters are also taken care.
If you are not using RegEx, then from the expression replace \\ by \.
Hope this helps.
Test:-
function IsUrl(url){
var regex = new RegExp("^(http[s]?:\\/\\/(www\\.)?|ftp:\\/\\/(www\\.)?|www\\.){1}([0-9A-Za-z-\\.#:%_\+~#=]+)+((\\.[a-zA-Z]{2,3})+)(/(.)*)?(\\?(.)*)?");
if(regex.test(url)){
console.log(`${url} ✔`);
}else{
console.log(`${url} ❌`);
}}
var check = [
'http://www.foufos.gr',
'https://www.foufos.gr',
'http://foufos.gr',
'http://www.foufos.gr/kino',
'http://werer.gr',
'www.foufos.gr',
'www.mp3.com',
'www.t.co',
'http://t.co',
'http://www.t.co',
'https://www.t.co',
'www.aa.com',
'http://aa.com',
'http://www.aa.com',
'https://www.aa.com',
'www.foufos',
'www.foufos-.gr',
'www.-foufos.gr',
'foufos.gr',
'http://www.foufos',
'http://foufos',
'www.mp3#.com'
];
for (let index = 0; index < check.length; index++) {
IsUrl(check[index])
}
Result

I was trying to put together some JavaScript to validate a domain name (ex. google.com) and if it validates enable a submit button. I thought that I would share my code for those who are looking to accomplish something similar. It expects a domain without any http:// or www. value. The script uses a stripped down regular expression from above for domain matching, which isn't strict about fake TLD.
http://jsfiddle.net/nMVDS/1/
$(function () {
$('#whitelist_add').keyup(function () {
if ($(this).val() == '') { //Check to see if there is any text entered
//If there is no text within the input, disable the button
$('.whitelistCheck').attr('disabled', 'disabled');
} else {
// Domain name regular expression
var regex = new RegExp("^([0-9A-Za-z-\\.#:%_\+~#=]+)+((\\.[a-zA-Z]{2,3})+)(/(.)*)?(\\?(.)*)?");
if (regex.test($(this).val())) {
// Domain looks OK
//alert("Successful match");
$('.whitelistCheck').removeAttr('disabled');
} else {
// Domain is NOT OK
//alert("No match");
$('.whitelistCheck').attr('disabled', 'disabled');
}
}
});
});
HTML FORM:
<form action="domain_management.php" method="get">
<input type="text" name="whitelist_add" id="whitelist_add" placeholder="domain.com">
<button type="submit" class="btn btn-success whitelistCheck" disabled='disabled'>Add to Whitelist</button>
</form>

Related

Regex check against referrer URL string

var orig = document.referrer; // Incoming URL
var check = new RegExp("boxes", "gi"); // Literal string, global + case insensitive.
// console.log(check);
if (orig.indexOf(check) > -1) {
console.log('you came from the box section');
} else {
console.log('you DIDNT come the box section');
}
Hi Guys,
I have a 'boxes' category on a site, where all box items have 'boxes' in the URL. A particular item from another category needs to be able to check whether or not the user came from a 'boxes' item. (This is an interim solution as I only have skin-level access).
When logging 'check', I get '/boxes/gi', which should be working when checking within indexOf, as a valid regex string.
I am not too sure why I can not get this to properly check, as the result is only ever that the user didn't come from the 'boxes' section.
I have a lot to learn, so in advance, I greatly appreciate any help.
Thanks!
You can use string variable instead of regex
var orig = document.referrer; // Incoming URL
// console.log(check);
if (orig.indexOf("boxes") > -1) {
console.log('you came from the box section');
} else {
console.log('you DIDNT come the box section');
}
indexOf does not accept a regex as argument. You either use your regex with search, or use indexOf with a string.
orig.toLowerCase().indexOf("box") > -1
// or
orig.search(check) > -1
You can parse the referrer URL into a link element and retrieve its pathname. You should also probably check the hostname to make sure it's from your own site:
var url = document.createElement('a');
url.href = document.referrer;
var comingFromBoxes = url.hostname === 'yoursite.com' && url.pathname.indexOf('/boxes') === 0;
Note: the referrer is not a reliable value by any means and should not be considered as such.
You can use match() with the regex to perform your logic.
$(document).ready(function(){
var url = "www.someurl.com/boxes/gi/abc";
var regex = /\/boxes\/gi/g;
var mtch = url.match(regex);
if(mtch !== null){
alert('url has the value');
}
else{
alert('url does not have the value');
}
})
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

RegEx test method returns false when it should be true

I've tested the three of the regex's on http://www.regexpal.com/ and they are what I need, but when doing a regex test 2 of them return false (BTC and CAD) and only the Bitcoin address seems to work (you may test with this wallet below).
13dHNxtbckAFM4PzdBqWgymFJmcv3Yzi32
https://jsfiddle.net/ps2fj1ff/1
(all the relevant code is in the html section)
var regWallet = new RegExp("^[13][a-km-zA-HJ-NP-Z1-9]{25,34}$");
var regBTC = new RegExp("^\d*\.\d*$");
var regCAD = new RegExp("^\d+(\.(\d{2}))?$");
$('#button1').on('click', function() {
var btcCheck = $('#amount_btc').val();
if (regBTC.test(btcCheck)) {
} else {
alert("Invalid BTC value!");
}
var cadCheck = $('#amount_cad').val();
if (regCAD.test(cadCheck)) {
} else {
alert("Invalid CAD value!");
}
var walletCheck = $('#wallet').val();
if (regWallet.test(walletCheck)) {
} else {
alert("Invalid Bitcoin address, please make sure you've entered a valid address!");
}
});
The reason is that in var regBTC = new RegExp("^\d*\.\d*$"); the \ is used to escape the character so if you console.log(regBTC) you will see it as ^d*.d*$.
To prevent this you will have to double escape it: var regBTC = new RegExp("^\\d*\\.\\d*$");
Or better yet use / instead: var regBTC = /^\d*\.\d*$/;
The same goes for the other regex too.
(I initially thought single quote would work too, but apparently not in javascript)
Use this instead:
var regBTC = /^\d*\.\d*$/;
var regCAD = /^\d+(\.(\d{2}))$/;
It's cleaner and more readable as most editors will give you regexp syntax highlighting in this format.
There really isn't any good reason to use new RegExp which forces you to write the expression as a string, which forces you to use confusing escapes, when there is a proper regular expression syntax built into JavaScript.

How to check if string contains '?' using javascript

I'm trying to check if an url contains a query string or not.
Lets say we have these two url's.
http://localhost:3000/userbookings/list
http://localhost:3000/userbookings/list?resource=de
My string is called fullPath, and I need to check if it contains the ?, so I know if its a query string or not.
Have tried with the following code:
if (fullPath.indexOf("?") > -1){
content = fs-readFileSync('http://localhost:3000/userbookings/list1');
}
else {
content = fs.readFileSync(fullPath);
}
Your way should work too but if you want in the future to use more complex qualifiers you could start using regular expressions:
var pattern = /\?/g;
var found = fullPath.match(pattern) != null;
alert(found);
this help you :
<html>
<head>
</head>
<body>
<script>
var str = "this is ?text";
var patt = /\?/gi;
if(str.search(patt)!=-1)
alert("Found");
else
alert("No found");
</script>
</body>
</html>
Like this:
if (str.indexOf("?") >= 0)
Or,
if (/\?/i.test(str))
Or,
if(str.includes('?'))
Please note that String.includes is an EcmaScript 6 feature and may not work in some browsers.
I think your goal is just to check if there's a php get variable right??
if(document.localtion.search != "") {
// Your code
}
The document.location.search will be "?resource=de" if you visit the url
http://localhost:3000/userbookings/list?resource=de
And it will be "" if you visit the url
http://localhost:3000/userbookings/list
Answer #2
check = document.location.split("?");
if(check.length > 1) {
//do your code
}
splitting the http://localhost:3000/userbookings/list?resource=de url using "?" will be splitted by 2.
And splitting the http://localhost:3000/userbookings/list url using "?" wil result by 1.
Answer #3
check = fullpath.replace("?","");
if(check != fullpath) {
//do your code
}
Removing the "?" in the full path. If the check is the same as fullpath then it doesn't have a "?"
I think this might help you out. Feel free to comment

JS Regex url validation

I tried to validate url with or without http No matter what i did the function return false.
I checked my regex string in this site:
http://regexr.com/
And its seen as i expect.
function isUrlValid(userInput) {
var regexQuery = "/(http(s)?://.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/";
var url = new RegExp(regexQuery,"g");
if (url.test(userInput)) {
alert('Great, you entered an E-Mail-address');
return true;
}
return false;
}
I fix the problem by change the .test to .match and leave the regex as is.
I change the function to Match + make a change here with the slashes and its work: (http(s)?://.)
The fixed function:
function isUrlValid(userInput) {
var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g);
if(res == null)
return false;
else
return true;
}
I believe the other answer will reject some valid url's (like domain names in uppercase or long sub-domains) and allow some invalid ones (like http://www.-example-.com or www.%#&.com). I tried to take into account a number of additional url syntax rules (without getting into internationalisation).
function isUrlValid(userInput) {
var regexQuery = "^(https?://)?(www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6}(/[-\\w#\\+\\.~#\\?&/=%]*)?$";
var url = new RegExp(regexQuery,"i");
return url.test(userInput);
}
var input = ["https://o.sub-domain.example.com/foo/bar?foo=bar&boo=far#a%20b",
"HTTP://EX-AMPLE.COM",
"example.c",
"example-.com"];
for (var i in input) document.write(isUrlValid(input[i]) + ": " + input[i] + "<br>");
To also allow IP addresses and port numbers, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a-z]{2,6})|((\\d{1‌​,3}\\.){3}\\d{1,3}))‌​(:\\d{2,4})?(/[-\\w#‌​\\+\\.~#\\?&/=%]*)?$‌​"
To also allow query strings without a slash between the domain name and the question mark (which is theoretically not allowed, but works in most real-life situations), the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a‌​-z]{2,6})|((\\d‌​{1,3}\\.){3}\\d{1,3}‌​))(:\\d{2,4})?((/|\\‌​?)[-\\w#\\+\\.~#\\?&‌​/=%]*)?$"
To also make sure that every % is followed by a hex number, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a-z]{2,6})|((\\d{1‌​,3}\\.){3}\\d{1,3}))‌​(:\\d{2,4})?((/|\\?)‌​(((%[0-9a-f]{2})|[-\‌​\w#\\+\\.~#\\?&/=])*‌​))?$"
(Note: as John Wu mentioned in a comment, there are valid single-letter domains).
Actually, this question needs a powerful regex and the following code is not very hard to understand, please see below(ES6 - TypeScript):
const isValidUrl = (url: string): boolean => {
const urlRegex = /^((http(s?)?):\/\/)?([wW]{3}\.)?[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?$/g;
const result = url.match(urlRegex);
return result !== null;
};
Try this code.
function CheckURL(fieldId, alertMessage) {
var url = fieldId.value;
if(url !== "")
{
if (url.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g) !== null)
return true;
else {
alert(alertMessage);
fieldId.focus();
return false;
}
}
}
var website = document.getElementById('Website');
if (!CheckURL(website, "Enter a valid website address")) {
return false;
}

Validate email as you type

I want to validate input as I type so I use onkeyup event to do so, but if I validate an email regex "name#domain.com", as soon as user starts to type it throws an error - first character doesnt match regex...
So I wrote this:
var addValidation = function (patterns) {
var index = patterns.length; //I know I can avoid this
while (index--) {
patterns[index] = new RegExp(patterns[index]);
}
index = 0;
var current = patterns[index],
matchExact = function (patt, str) {
var match = str.match(patt);
return match !== null && str === match[0];
};
return function () {
var str = this.value;
if (!matchExact(current, str) ) {
var tmp = patterns[index + 1] ?
new RegExp(current.source + patterns[index + 1].source) :
false;
if (tmp && matchExact(tmp, str)) {
current = tmp;
index++;
}
else {
alert("Wrong");
}
}
}
};
document.getElementById("x").onkeyup = addValidation(["[a-zA-Z0-9\\.]+", "#{1}", "[a-zA-Z0-9]+", "\\.{1}", "[a-zA-Z]{1,3}"]);
It seems to work, but... it's ugly and it will alert you if you do step back (eg. "name#" and you press backspace).
I know that Dojo's validation is great, but I do not want to use Dojo. Are there any better ways to achieve that?
//EDIT: http://livedocs.dojotoolkit.org/dijit/form/ValidationTextBox this is an example, but you can define your own pattern (like email regex) and it will validate it perfectly.
Add interval before validation will start:
var t;
document.getElementById("x").onkeyup = function () {
if (t) {
clearTimeout(t);
}
t = setTimeout(function () {
//do validation
}, 1000)
}
Don't ever try to validate an email address with a regualr expression. You'll either end up allowing addresses which are not valid, or block email addresses which are perfectly valid and just annoy your visitors. It's also worth bearing in mind that the best regex so far for validating email addresses is this:
http://www.ex-parrot.com/pdw/Mail-RFC822-Address.html

Categories