Is there a way in JavaScript to check if a string is a URL?
RegExes are excluded because the URL is most likely written like stackoverflow; that is to say that it might not have a .com, www or http.
If you want to check whether a string is valid HTTP URL, you can use URL constructor (it will throw on malformed string):
function isValidHttpUrl(string) {
let url;
try {
url = new URL(string);
} catch (_) {
return false;
}
return url.protocol === "http:" || url.protocol === "https:";
}
console.log("http://example.com: "+isValidHttpUrl("https://example.com"));
console.log("example.com: "+isValidHttpUrl("example.com"));
Note: Per RFC 3886, URL must begin with a scheme (not limited to http/https), e. g.:
www.example.com is not valid URL (missing scheme)
javascript:void(0) is valid URL, although not an HTTP one
http://.. is valid URL with the host being .. (whether it resolves depends on your DNS)
https://example..com is valid URL, same as above
A related question with an answer
Or this Regexp from Devshed:
function validURL(str) {
var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
'((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$','i'); // fragment locator
return !!pattern.test(str);
}
function isURL(str) {
var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.?)+[a-z]{2,}|'+ // domain name
'((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$','i'); // fragment locator
return pattern.test(str);
}
Rather than using a regular expression, I would recommend making use of an anchor element.
when you set the href property of an anchor, various other properties are set.
var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";
parser.protocol; // => "http:"
parser.hostname; // => "example.com"
parser.port; // => "3000"
parser.pathname; // => "/pathname/"
parser.search; // => "?search=test"
parser.hash; // => "#hash"
parser.host; // => "example.com:3000"
source
However, if the value href is bound to is not a valid url, then the value of those auxiliary properties will be the empty string.
Edit: as pointed out in the comments: if an invalid url is used, the properties of the current URL may be substituted.
So, as long as you're not passing in the URL of the current page, you can do something like:
function isValidURL(str) {
var a = document.createElement('a');
a.href = str;
return (a.host && a.host != window.location.host);
}
I am using below function to validate URL with or without http/https:
function isValidURL(string) {
var res = string.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g);
return (res !== null)
};
var testCase1 = "http://en.wikipedia.org/wiki/Procter_&_Gamble";
console.log(isValidURL(testCase1)); // return true
var testCase2 = "http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707";
console.log(isValidURL(testCase2)); // return true
var testCase3 = "https://sdfasd";
console.log(isValidURL(testCase3)); // return false
var testCase4 = "dfdsfdsfdfdsfsdfs";
console.log(isValidURL(testCase4)); // return false
var testCase5 = "magnet:?xt=urn:btih:123";
console.log(isValidURL(testCase5)); // return false
var testCase6 = "https://stackoverflow.com/";
console.log(isValidURL(testCase6)); // return true
var testCase7 = "https://w";
console.log(isValidURL(testCase7)); // return false
var testCase8 = "https://sdfasdp.ppppppppppp";
console.log(isValidURL(testCase8)); // return false
To Validate Url using javascript is shown below
function ValidURL(str) {
var regex = /(?:https?):\/\/(\w+:?\w*)?(\S+)(:\d+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
if(!regex .test(str)) {
alert("Please enter valid URL.");
return false;
} else {
return true;
}
}
Rely on a library:
https://www.npmjs.com/package/valid-url
import { isWebUri } from 'valid-url';
// ...
if (!isWebUri(url)) {
return "Not a valid url.";
}
Improvement on the accepted answer...
Check for ftp/ftps as protocol
Has double escaping for backslashes (\\)
Ensures that domains have a dot and an extension (.com .io .xyz)
Allows full colon (:) in the path e.g. http://thingiverse.com/download:1894343
Allows ampersand (&) in path e.g http://en.wikipedia.org/wiki/Procter_&_Gamble
Allows # symbol in path e.g. https://medium.com/#techytimo
isURL(str) {
var pattern = new RegExp('^((ft|htt)ps?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name and extension
'((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
'(\\:\\d+)?'+ // port
'(\\/[-a-z\\d%#_.~+&:]*)*'+ // path
'(\\?[;&a-z\\d%#_.,~+&:=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$','i'); // fragment locator
return pattern.test(str);
}
You can use the URL native API:
const isUrl = string => {
try { return Boolean(new URL(string)); }
catch(e){ return false; }
}
Use validator.js
ES6
import isURL from 'validator/lib/isURL'
isURL(string)
No ES6
var validator = require('validator');
validator.isURL(string)
You can also fine tune this function's behavior by passing optional options object as the second argument of isURL
Here is the default options object:
let options = {
protocols: [
'http',
'https',
'ftp'
],
require_tld: true,
require_protocol: false,
require_host: true,
require_valid_protocol: true,
allow_underscores: false,
host_whitelist: false,
host_blacklist: false,
allow_trailing_dot: false,
allow_protocol_relative_urls: false,
disallow_auth: false
}
isURL(string, options)
host_whitelist and host_blacklist can be arrays of hosts. They also support regular expressions.
let options = {
host_blacklist: ['foo.com', 'bar.com'],
}
isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true
isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false
options = {
host_blacklist: ['bar.com', 'foo.com', /\.foo\.com$/],
}
isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true
isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false
isURL('http://images.foo.com/', options) // => false
isURL('http://cdn.foo.com/', options) // => false
isURL('http://a.b.c.foo.com/', options) // => false
Here is yet another method.
// ***note***: if the incoming value is empty(""), the function returns true
var elm;
function isValidURL(u){
//A precaution/solution for the problem written in the ***note***
if(u!==""){
if(!elm){
elm = document.createElement('input');
elm.setAttribute('type', 'url');
}
elm.value = u;
return elm.validity.valid;
}
else{
return false
}
}
console.log(isValidURL(''));
console.log(isValidURL('http://www.google.com/'));
console.log(isValidURL('//google.com'));
console.log(isValidURL('google.com'));
console.log(isValidURL('localhost:8000'));
As has been noted the perfect regex is elusive but still seems to be a reasonable approach (alternatives are server side tests or the new experimental URL API). However the high ranking answers are often returning false for common URLs but even worse will freeze your app/page for minutes on even as simple a string as isURL('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'). It's been pointed out in some of the comments, but most probably haven't entered a bad value to see it. Hanging like that makes that code unusable in any serious application. I think it's due to the repeated case insensitive sets in code like ((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.?)+[a-z]{2,}|' .... Take out the 'i' and it doesn't hang but will of course not work as desired. But even with the ignore case flag those tests reject high unicode values that are allowed.
The best already mentioned is:
function isURL(str) {
return /^(?:\w+:)?\/\/([^\s\.]+\.\S{2}|localhost[\:?\d]*)\S*$/.test(str);
}
That comes from Github segmentio/is-url. The good thing about a code repository is you can see the testing and any issues and also the test strings run through it. There's a branch that would allow strings missing protocol like google.com, though you're probably making too many assumptions then. The repository has been updated and I'm not planning on trying to keep up a mirror here. It's been broken up into separate tests to avoid RegEx redos which can be exploited for DOS attacks (I don't think you have to worry about that with client side js, but you do have to worry about your page hanging for so long that your visitor leaves your site).
There is one other repository I've seen that may even be better for isURL at dperini/regex-weburl.js, but it is highly complex. It has a bigger test list of valid and invalid URLs. The simple one above still passes all the positives and only fails to block a few odd negatives like http://a.b--c.de/ as well as the special ips.
Whichever you choose, run it through this function which I've adapted from the tests on dperini/regex-weburl.js, while using your browser's Developer Tools inpector.
function testIsURL() {
//should match
console.assert(isURL("http://foo.com/blah_blah"));
console.assert(isURL("http://foo.com/blah_blah/"));
console.assert(isURL("http://foo.com/blah_blah_(wikipedia)"));
console.assert(isURL("http://foo.com/blah_blah_(wikipedia)_(again)"));
console.assert(isURL("http://www.example.com/wpstyle/?p=364"));
console.assert(isURL("https://www.example.com/foo/?bar=baz&inga=42&quux"));
console.assert(isURL("http://✪df.ws/123"));
console.assert(isURL("http://userid:password#example.com:8080"));
console.assert(isURL("http://userid:password#example.com:8080/"));
console.assert(isURL("http://userid#example.com"));
console.assert(isURL("http://userid#example.com/"));
console.assert(isURL("http://userid#example.com:8080"));
console.assert(isURL("http://userid#example.com:8080/"));
console.assert(isURL("http://userid:password#example.com"));
console.assert(isURL("http://userid:password#example.com/"));
console.assert(isURL("http://142.42.1.1/"));
console.assert(isURL("http://142.42.1.1:8080/"));
console.assert(isURL("http://➡.ws/䨹"));
console.assert(isURL("http://⌘.ws"));
console.assert(isURL("http://⌘.ws/"));
console.assert(isURL("http://foo.com/blah_(wikipedia)#cite-1"));
console.assert(isURL("http://foo.com/blah_(wikipedia)_blah#cite-1"));
console.assert(isURL("http://foo.com/unicode_(✪)_in_parens"));
console.assert(isURL("http://foo.com/(something)?after=parens"));
console.assert(isURL("http://☺.damowmow.com/"));
console.assert(isURL("http://code.google.com/events/#&product=browser"));
console.assert(isURL("http://j.mp"));
console.assert(isURL("ftp://foo.bar/baz"));
console.assert(isURL("http://foo.bar/?q=Test%20URL-encoded%20stuff"));
console.assert(isURL("http://مثال.إختبار"));
console.assert(isURL("http://例子.测试"));
console.assert(isURL("http://उदाहरण.परीक्षा"));
console.assert(isURL("http://-.~_!$&'()*+,;=:%40:80%2f::::::#example.com"));
console.assert(isURL("http://1337.net"));
console.assert(isURL("http://a.b-c.de"));
console.assert(isURL("http://223.255.255.254"));
console.assert(isURL("postgres://u:p#example.com:5702/db"));
console.assert(isURL("https://d1f4470da51b49289906b3d6cbd65074#app.getsentry.com/13176"));
//SHOULD NOT MATCH:
console.assert(!isURL("http://"));
console.assert(!isURL("http://."));
console.assert(!isURL("http://.."));
console.assert(!isURL("http://../"));
console.assert(!isURL("http://?"));
console.assert(!isURL("http://??"));
console.assert(!isURL("http://??/"));
console.assert(!isURL("http://#"));
console.assert(!isURL("http://##"));
console.assert(!isURL("http://##/"));
console.assert(!isURL("http://foo.bar?q=Spaces should be encoded"));
console.assert(!isURL("//"));
console.assert(!isURL("//a"));
console.assert(!isURL("///a"));
console.assert(!isURL("///"));
console.assert(!isURL("http:///a"));
console.assert(!isURL("foo.com"));
console.assert(!isURL("rdar://1234"));
console.assert(!isURL("h://test"));
console.assert(!isURL("http:// shouldfail.com"));
console.assert(!isURL(":// should fail"));
console.assert(!isURL("http://foo.bar/foo(bar)baz quux"));
console.assert(!isURL("ftps://foo.bar/"));
console.assert(!isURL("http://-error-.invalid/"));
console.assert(!isURL("http://a.b--c.de/"));
console.assert(!isURL("http://-a.b.co"));
console.assert(!isURL("http://a.b-.co"));
console.assert(!isURL("http://0.0.0.0"));
console.assert(!isURL("http://10.1.1.0"));
console.assert(!isURL("http://10.1.1.255"));
console.assert(!isURL("http://224.1.1.1"));
console.assert(!isURL("http://1.1.1.1.1"));
console.assert(!isURL("http://123.123.123"));
console.assert(!isURL("http://3628126748"));
console.assert(!isURL("http://.www.foo.bar/"));
console.assert(!isURL("http://www.foo.bar./"));
console.assert(!isURL("http://.www.foo.bar./"));
console.assert(!isURL("http://10.1.1.1"));}
And then test that string of 'a's.
See this comparison of isURL regex by Mathias Bynens for more info before you post a seemingly great regex.
This function disallows localhost and only allows URLs for web pages (ie, only allows http or https protocol).
It also only allows safe characters as defined here: https://www.urlencoder.io/learn/
function isValidWebUrl(url) {
let regEx = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9#:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()#:%_\+.~#?&//=]*)$/gm;
return regEx.test(url);
}
(I don't have reps to comment on ValidURL example; hence post this as an answer.)
While use of protocol relative URLs is not encouraged (The Protocol-relative URL), they do get employed sometimes. To validate such an URL with a regular expression the protocol part could be optional, e.g.:
function isValidURL(str) {
var pattern = new RegExp('^((https?:)?\\/\\/)?'+ // protocol
'(?:\\S+(?::\\S*)?#)?' + // authentication
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
'((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$','i'); // fragment locater
if (!pattern.test(str)) {
return false;
} else {
return true;
}
}
As others noted, regular expression does not seem to be the best suited approach for validating URLs, though.
One function that I have been using to validate a URL "string" is:
var matcher = /^(?:\w+:)?\/\/([^\s\.]+\.\S{2}|localhost[\:?\d]*)\S*$/;
function isUrl(string){
return matcher.test(string);
}
This function will return a boolean whether the string is a URL.
Examples:
isUrl("https://google.com"); // true
isUrl("http://google.com"); // true
isUrl("http://google.de"); // true
isUrl("//google.de"); // true
isUrl("google.de"); // false
isUrl("http://google.com"); // true
isUrl("http://localhost"); // true
isUrl("https://sdfasd"); // false
I can't comment on the post that is the closest #5717133, but below is the way I figured out how to get #tom-gullen regex working.
/^(https?:\/\/)?((([a-z\d]([a-z\d-]*[a-z\d])*)\.)+[a-z]{2,}|((\d{1,3}\.){3}\d{1,3}))(\:\d+)?(\/[-a-z\d%_.~+]*)*(\?[;&a-z\d%_.~+=-]*)?(\#[-a-z\d_]*)?$/i
There's a lot of answers already, but here's another contribution:
Taken directly from the URL polyfill validity check, use an input element with type="url" to take advantage of the browser's built-in validity check:
var inputElement = doc.createElement('input');
inputElement.type = 'url';
inputElement.value = url;
if (!inputElement.checkValidity()) {
throw new TypeError('Invalid URL');
}
Source
This is quite difficult to do with pure regex because URLs have many 'inconveniences'.
For example domain names have complicated restrictions on hyphens:
a. It is allowed to have many consecutive hyphens in the middle.
b. but the first character and last character of the domain name cannot be a hyphen
c. The 3rd and 4th character cannot be both hyphen
Similarly port number can only be in the range 1-65535. This is easy to check if you extract the port part and convert to int but quite difficult to check with a regular expression.
There is also no easy way to check valid domain extensions. Some countries have second-level domains(such as 'co.uk'), or the extension can be a long word such as '.international'. And new TLDs are added regularly. This type of things can only be checked against a hard-coded list. (see https://en.wikipedia.org/wiki/Top-level_domain)
Then there are magnet urls, ftp addresses etc. These all have different requirements.
Nevertheless, here is a function that handles pretty much everything except:
Case 1. c
Accepts any 1-5 digit port number
Accepts any extension 2-13 chars
Does not accept ftp, magnet, etc...
function isValidURL(input) {
pattern = '^(https?:\\/\\/)?' + // protocol
'((([a-zA-Z\\d]([a-zA-Z\\d-]{0,61}[a-zA-Z\\d])*\\.)+' + // sub-domain + domain name
'[a-zA-Z]{2,13})' + // extension
'|((\\d{1,3}\\.){3}\\d{1,3})' + // OR ip (v4) address
'|localhost)' + // OR localhost
'(\\:\\d{1,5})?' + // port
'(\\/[a-zA-Z\\&\\d%_.~+-:#]*)*' + // path
'(\\?[a-zA-Z\\&\\d%_.,~+-:#=;&]*)?' + // query string
'(\\#[-a-zA-Z&\\d_]*)?$'; // fragment locator
regex = new RegExp(pattern);
return regex.test(input);
}
let tests = [];
tests.push(['', false]);
tests.push(['http://en.wikipedia.org/wiki/Procter_&_Gamble', true]);
tests.push(['https://sdfasd', false]);
tests.push(['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true]);
tests.push(['https://stackoverflow.com/', true]);
tests.push(['https://w', false]);
tests.push(['aaa', false]);
tests.push(['aaaa', false]);
tests.push(['oh.my', true]);
tests.push(['dfdsfdsfdfdsfsdfs', false]);
tests.push(['google.co.uk', true]);
tests.push(['test-domain.MUSEUM', true]);
tests.push(['-hyphen-start.gov.tr', false]);
tests.push(['hyphen-end-.com', false]);
tests.push(['https://sdfasdp.international', true]);
tests.push(['https://sdfasdp.pppppppp', false]);
tests.push(['https://sdfasdp.ppppppppppppppppppp', false]);
tests.push(['https://sdfasd', false]);
tests.push(['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true]);
tests.push(['http://www.google-com.123', false]);
tests.push(['http://my--testdomain.com', false]);
tests.push(['http://my2nd--testdomain.com', true]);
tests.push(['http://thingiverse.com/download:1894343', true]);
tests.push(['https://medium.com/#techytimo', true]);
tests.push(['http://localhost', true]);
tests.push(['localhost', true]);
tests.push(['localhost:8080', true]);
tests.push(['localhost:65536', true]);
tests.push(['localhost:80000', false]);
tests.push(['magnet:?xt=urn:btih:123', true]);
for (let i = 0; i < tests.length; i++) {
console.log('Test #' + i + (isValidURL(tests[i][0]) == tests[i][1] ? ' passed' : ' failed') + ' on ["' + tests[i][0] + '", ' + tests[i][1] + ']');
}
Mathias Bynens has compiled a list of well-known URL regexes with test URLs. There is little reason to write a new regular expression; just pick an existing one that suits you best.
But the comparison table for those regexes also shows that it is next to impossible to do URL validation with a single regular expression. All of the regexes in Bynens' list produce false positives and false negatives.
I suggest that you use an existing URL parser (for example new URL('http://www.example.com/') in JavaScript) and then apply the checks you want to perform against the parsed and normalized form of the URL resp. its components. Using the JavaScript URL interface has the additional benefit that it will only accept such URLs that are really accepted by the browser.
You should also keep in mind that technically incorrect URLs may still work. For example http://w_w_w.example.com/, http://www..example.com/, http://123.example.com/ all have an invalid hostname part but every browser I know will try to open them without complaints, and when you specify IP addresses for those invalid names in /etc/hosts/ such URLs will even work but only on your computer.
The question is, therefore, not so much whether a URL is valid, but rather which URLs work and should be allowed in a particular context.
If you want to do URL validation there are a lot of details and edge cases that are easy to overlook:
URLs may contain credentials as in http://user:password#www.example.com/.
Port numbers must be in the range of 0-65535, but you may still want to exclude the wildcard port 0.
Port numbers may have leading zeros as in http://www.example.com:000080/.
IPv4 addresses are by no means restricted to 4 decimal integers in the range of 0-255. You can use one to four integers, and they can be decimal, octal or hexadecimal. The URLs https://010.010.000010.010/, https://0x8.0x8.0x0008.0x8/, https://8.8.2056/, https://8.526344/, https://134744072/ are all valid and just creative ways of writing https://8.8.8.8/.
Allowing loopback addresses (http://127.0.0.1/), private IP addresses (http://192.168.1.1), link-local addresses (http://169.254.100.200) and so on may have an impact on security or privacy. If, for instance, you allow them as the address of user avatars in a forum, you cause the users' browsers to send unsolicited network requests in their local network and in the internet of things such requests may cause funny and not so funny things to happen in your home.
For the same reasons, you may want to discard links to not fully qualified hostnames, in other words hostnames without a dot.
But hostnames may always have a trailing dot (like in http://www.stackoverflow.com.).
The hostname portion of a link may contain angle brackets for IPv6 addresses as in http://[::1].
IPv6 addresses also have ranges for private networks or link-local addresses etc.
If you block certain IPv4 addresses, keep in mind that for example https://127.0.0.1 and https://[::ffff:127.0.0.1] point to the same resource (if the loopback device of your machine is IPv6 ready).
The hostname portion of URLs may now contain Unicode, so that the character range [-0-9a-zA-z] is definitely no longer sufficient.
Many registries for top-level domains define specific restrictions, for example on the allowed set of Unicode characters. Or they subdivide their namespace (like co.uk and many others).
Top-level domains must not contain decimal digits, and the hyphen is not allowed unless for the IDN A-label prefix "xn--".
Unicode top-level domains (and their punycode encoding with "xn--") must still contain only letters but who wants to check that in a regex?
Which of these limitations and rules apply is a question of project requirements and taste.
I have recently written a URL validator for a web app that is suitable for user-supplied URLs in forums, social networks, or the like. Feel free to use it as a base for your own one:
JavaScript/Typescript version for the (Angular) frontend
Perl version for the backend
I have also written a blog post The Gory Details of URL Validation with more in-depth information.
this working with me
function isURL(str) {
var regex = /(http|https):\/\/(\w+:{0,1}\w*)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
var pattern = new RegExp(regex);
return pattern.test(str);
}
There are a couple of tests using the URL constructor which do not delineate whether the input is a string or URL object.
// Testing whether something is a URL
function isURL(url) {
return toString.call(url) === "[object URL]";
}
// Testing whether the input is both a string and valid url:
function isUrl(url) {
try {
return toString.call(url) === "[object String]" && !!(new URL(url));
} catch (_) {
return false;
}
}
I had revised all the comments, notes and remarks is this topic and have made a new regular expression:
^((javascript:[\w-_]+(\([\w-_\s,.]*\))?)|(mailto:([\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+\.)*[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+#([\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+\.)*[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+)|(\w+:\/\/(([\w\u00C0-\u1FFF\u2C00-\uD7FF-]+\.)*([\w\u00C0-\u1FFF\u2C00-\uD7FF-]*\.?))(:\d+)?(((\/[^\s#$%^&*?]+)+|\/)(\?[\w\u00C0-\u1FFF\u2C00-\uD7FF:;&%_,.~+=-]+)?)?(#[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+)?))$
You can test and improve it here https://regexr.com/668mt .
I checked this expression on next values:
http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707
http://192.168.0.4:55/
https://web.archive.org/web/20170817095211/https://github.com/Microsoft/vscode/issues/32405
http://www.example.com
javascript:void()
http://.
https://example.
https://en.m.wikipedia.org/wiki/C_Sharp_(programming_language)
http://zh.wikipedia.org/wiki/Wikipedia:关于中文维基百科/en?a#a
https://medium.com/#User_name/
https://test-test-test-test-test-test-test-test-test.web.app/
http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707
https://sdfasdp.ppppppppppp
mailto:sadf#gmail.com
https://тест.юа
If you can change the input type, I think this solution would be much easier:
You can simple use type="url" in your input and the check it with checkValidity() in js
E.g:
your.html
<input id="foo" type="url">
your.js
// The selector is JQuery, but the function is plain JS
$("#foo").on("keyup", function() {
if (this.checkValidity()) {
// The url is valid
} else {
// The url is invalid
}
});
If you need to also support https://localhost:3000 then use this modified version of [Devshed]s regex.
function isURL(url) {
if(!url) return false;
var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
'((\\d{1,3}\\.){3}\\d{1,3}))|' + // OR ip (v4) address
'localhost' + // OR localhost
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
'(\\#[-a-z\\d_]*)?$', 'i'); // fragment locator
return pattern.test(url);
}
I change the function to Match + make a change here with the slashes and its work: (http:// and https) both
function isValidUrl(userInput) {
var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g);
if(res == null)
return false;
else
return true;
}
I think using the native URL API is better than a complex regex patterns as #pavlo suggested. It has some drawbacks though which we can fix by some extra code. This approach fails for the following valid url.
//cdn.google.com/script.js
We can add the missing protocol beforehand to avoid that. It also fails to detect following invalid url.
http://w
http://..
So why check the whole url? we can just check the domain. I borrowed the regex to verify domain from here.
function isValidUrl(string) {
if (string && string.length > 1 && string.slice(0, 2) == '//') {
string = 'http:' + string; //dummy protocol so that URL works
}
try {
var url = new URL(string);
return url.hostname && url.hostname.match(/^([a-z0-9])(([a-z0-9-]{1,61})?[a-z0-9]{1})?(\.[a-z0-9](([a-z0-9-]{1,61})?[a-z0-9]{1})?)?(\.[a-zA-Z]{2,4})+$/) ? true : false;
} catch (_) {
return false;
}
}
The hostname attribute is empty string for javascript:void(0), so it works for that too, and you can also add IP address verifier too. I'd like to stick to native API's most, and hope it starts to support everything in near future.
The question asks a validation method for an url such as stackoverflow, without the protocol or any dot in the hostname. So, it's not a matter of validating url sintax, but checking if it's a valid url, by actually calling it.
I tried several methods for knowing if the url true exists and is callable from within the browser, but did not find any way to test with javascript the response header of the call:
adding an anchor element is fine for firing the click() method.
making ajax call to the challenging url with 'GET' is fine, but has it's various limitations due to CORS policies and it is not the case of using ajax, for as the url maybe any outside my server's domain.
using the fetch API has a workaround similar to ajax.
other problem is that I have my server under https protocol and throws an exception when calling non secure urls.
So, the best solution I can think of is getting some tool to perform CURL using javascript trying something like curl -I <url>. Unfortunately I did not find any and in appereance it's not possible. I will appreciate any comments on this.
But, in the end, I have a server running PHP and as I use Ajax for almost all my requests, I wrote a function on the server side to perform the curl request there and return to the browser.
Regarding the single word url on the question 'stackoverflow' it will lead me to https://daniserver.com.ar/stackoverflow, where daniserver.com.ar is my own domain.
This seems to be one of the hardest problems in CS ;)
Here's another incomplete solution that works well enough for me and better than the others I've seen here. I'm using a input[type=url] for this in order to support IE11, otherwise it would be much simpler using window.URL to perform the validation instead:
const ipv4Regex = /^(\d{1,3}\.){3}\d{1,3}$/;
function isValidIpv4(ip) {
if (!ipv4Regex.test(ip)) return false;
return !ip.split('.').find(n => n > 255);
}
const domainRegex = /(?:[a-z0-9-]{1,63}\.){1,125}[a-z]{2,63}$/i;
function isValidDomain(domain) {
return isValidIpv4(domain) || domainRegex.test(domain);
}
let input;
function validateUrl(url) {
if (! /^https?:\/\//.test(url)) url = `http://${url}`; // assuming Babel is used
// to support IE11 we'll resort to input[type=url] instead of window.URL:
// try { return isValidDomain(new URL(url).host) && url; } catch(e) { return false; }
if (!input) { input = document.createElement('input'); input.type = 'url'; }
input.value = url;
if (! input.validity.valid) return false;
const domain = url.split(/^https?:\/\//)[1].split('/')[0].split('#').pop();
return isValidDomain(domain) && url;
}
console.log(validateUrl('google'), // false
validateUrl('user:pw#mydomain.com'),
validateUrl('https://google.com'),
validateUrl('100.100.100.100/abc'),
validateUrl('100.100.100.256/abc')); // false
In order to accept incomplete inputs such as "www.mydomain.com" it will also make it valid assuming the protocol is "http" in those cases and returning the valid URL if the address is valid. It returns false when invalid.
It also supports IPv4 domains, but not IPv6.
In my case my only requirement is that the user input won't be interpreted as a relative link when placed in the href of an a tag and the answers here were either a bit OTT for that or allowed URLs not meeting my requirements, so this is what I'm going with:
^https?://.+$
The same thing could be achieved pretty easily without regex.
This is defiantly not the most effective approach, but it is readable and easy to form to whatever you need. And it's easier to add regex/complexity from here. So here is a very pragmatic approach
const validFirstBits = ["ftp://", "http://", "https://", "www."];
const invalidPatterns = [" ", "//.", ".."];
export function isUrl(word) {
// less than www.1.dk
if (!word || word.length < 8) return false;
// Let's check and see, if our candidate starts with some of our valid first bits
const firstBitIsValid = validFirstBits.some(bit => word.indexOf(bit) === 0);
if (!firstBitIsValid) return false;
const hasInvalidPatterns = invalidPatterns.some(
pattern => word.indexOf(pattern) !== -1,
);
if (hasInvalidPatterns) return false;
const dotSplit = word.split(".");
if (dotSplit.length > 1) {
const lastBit = dotSplit.pop(); // string or undefined
if (!lastBit) return false;
const length = lastBit.length;
const lastBitIsValid =
length > 1 || (length === 1 && !isNaN(parseInt(lastBit)));
return !!lastBitIsValid;
}
return false;
}
TEST:
import { isUrl } from "./foo";
describe("Foo", () => {
test("should validate correct urls correctly", function() {
const validUrls = [
"http://example.com",
"http://example.com/blah",
"http://127.0.0.1",
"http://127.0.0.1/wow",
"https://example.com",
"https://example.com/blah",
"https://127.0.0.1:1234",
"ftp://example.com",
"ftp://example.com/blah",
"ftp://127.0.0.1",
"www.example.com",
"www.example.com/blah",
];
validUrls.forEach(url => {
expect(isUrl(url) && url).toEqual(url);
});
});
test("should validate invalid urls correctly", function() {
const inValidUrls = [
"http:// foo.com",
"http:/foo.com",
"http://.foo.com",
"http://foo..com",
"http://.com",
"http://foo",
"http://foo.c",
];
inValidUrls.forEach(url => {
expect(!isUrl(url) && url).toEqual(url);
});
});
});
What is the shortest, accurate, and cross-browser compatible method for reading a cookie in JavaScript?
Very often, while building stand-alone scripts (where I can't have any outside dependencies), I find myself adding a function for reading cookies, and usually fall-back on the QuirksMode.org readCookie() method (280 bytes, 216 minified.)
function readCookie(name) {
var nameEQ = name + "=";
var ca = document.cookie.split(';');
for(var i=0;i < ca.length;i++) {
var c = ca[i];
while (c.charAt(0)==' ') c = c.substring(1,c.length);
if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length);
}
return null;
}
It does the job, but its ugly, and adds quite a bit of bloat each time.
The method that jQuery.cookie uses something like this (modified, 165 bytes, 125 minified):
function read_cookie(key)
{
var result;
return (result = new RegExp('(?:^|; )' + encodeURIComponent(key) + '=([^;]*)').exec(document.cookie)) ? (result[1]) : null;
}
Note this is not a 'Code Golf' competition: I'm legitimately interested in reducing the size of my readCookie function, and in ensuring the solution I have is valid.
Shorter, more reliable and more performant than the current best-voted answer:
const getCookieValue = (name) => (
document.cookie.match('(^|;)\\s*' + name + '\\s*=\\s*([^;]+)')?.pop() || ''
)
A performance comparison of various approaches is shown here:
https://jsben.ch/AhMN6
Some notes on approach:
The regex approach is not only the fastest in most browsers, it yields the shortest function as well. Additionally it should be pointed out that according to the official spec (RFC 2109), the space after the semicolon which separates cookies in the document.cookie is optional and an argument could be made that it should not be relied upon. Additionally, whitespace is allowed before and after the equals sign (=) and an argument could be made that this potential whitespace should be factored into any reliable document.cookie parser. The regex above accounts for both of the above whitespace conditions.
This will only ever hit document.cookie ONE time. Every subsequent request will be instant.
(function(){
var cookies;
function readCookie(name,c,C,i){
if(cookies){ return cookies[name]; }
c = document.cookie.split('; ');
cookies = {};
for(i=c.length-1; i>=0; i--){
C = c[i].split('=');
cookies[C[0]] = C[1];
}
return cookies[name];
}
window.readCookie = readCookie; // or expose it however you want
})();
I'm afraid there really isn't a faster way than this general logic unless you're free to use .forEach which is browser dependent (even then you're not saving that much)
Your own example slightly compressed to 120 bytes:
function read_cookie(k,r){return(r=RegExp('(^|; )'+encodeURIComponent(k)+'=([^;]*)').exec(document.cookie))?r[2]:null;}
You can get it to 110 bytes if you make it a 1-letter function name, 90 bytes if you drop the encodeURIComponent.
I've gotten it down to 73 bytes, but to be fair it's 82 bytes when named readCookie and 102 bytes when then adding encodeURIComponent:
function C(k){return(document.cookie.match('(^|; )'+k+'=([^;]*)')||0)[2]}
Assumptions
Based on the question, I believe some assumptions / requirements for this function include:
It will be used as a library function, and so meant to be dropped into any codebase;
As such, it will need to work in many different environments, i.e. work with legacy JS code, CMSes of various levels of quality, etc.;
To inter-operate with code written by other people and/or code that you do not control, the function should not make any assumptions on how cookie names or values are encoded. Calling the function with a string "foo:bar[0]" should return a cookie (literally) named "foo:bar[0]";
New cookies may be written and/or existing cookies modified at any point during lifetime of the page.
Under these assumptions, it's clear that encodeURIComponent / decodeURIComponent should not be used; doing so assumes that the code that set the cookie also encoded it using these functions.
The regular expression approach gets problematic if the cookie name can contain special characters. jQuery.cookie works around this issue by encoding the cookie name (actually both name and value) when storing a cookie, and decoding the name when retrieving a cookie. A regular expression solution is below.
Unless you're only reading cookies you control completely, it would also be advisable to read cookies from document.cookie directly and not cache the results, since there is no way to know if the cache is invalid without reading document.cookie again.
(While accessing and parsing document.cookies will be slightly slower than using a cache, it would not be as slow as reading other parts of the DOM, since cookies do not play a role in the DOM / render trees.)
Loop-based function
Here goes the Code Golf answer, based on PPK's (loop-based) function:
function readCookie(name) {
name += '=';
for (var ca = document.cookie.split(/;\s*/), i = ca.length - 1; i >= 0; i--)
if (!ca[i].indexOf(name))
return ca[i].replace(name, '');
}
which when minified, comes to 128 characters (not counting the function name):
function readCookie(n){n+='=';for(var a=document.cookie.split(/;\s*/),i=a.length-1;i>=0;i--)if(!a[i].indexOf(n))return a[i].replace(n,'');}
Regular expression-based function
Update: If you really want a regular expression solution:
function readCookie(name) {
return (name = new RegExp('(?:^|;\\s*)' + ('' + name).replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&') + '=([^;]*)').exec(document.cookie)) && name[1];
}
This escapes any special characters in the cookie name before constructing the RegExp object. Minified, this comes to 134 characters (not counting the function name):
function readCookie(n){return(n=new RegExp('(?:^|;\\s*)'+(''+n).replace(/[-[\]{}()*+?.,\\^$|#\s]/g,'\\$&')+'=([^;]*)').exec(document.cookie))&&n[1];}
As Rudu and cwolves have pointed out in the comments, the regular-expression-escaping regex can be shortened by a few characters. I think it would be good to keep the escaping regex consistent (you may be using it elsewhere), but their suggestions are worth considering.
Notes
Both of these functions won't handle null or undefined, i.e. if there is a cookie named "null", readCookie(null) will return its value. If you need to handle this case, adapt the code accordingly.
code from google analytics ga.js
function c(a){
var d=[],
e=document.cookie.split(";");
a=RegExp("^\\s*"+a+"=\\s*(.*?)\\s*$");
for(var b=0;b<e.length;b++){
var f=e[b].match(a);
f&&d.push(f[1])
}
return d
}
How about this one?
function getCookie(k){var v=document.cookie.match('(^|;) ?'+k+'=([^;]*)(;|$)');return v?v[2]:null}
Counted 89 bytes without the function name.
The following function will allow differentiating between empty strings and undefined cookies. Undefined cookies will correctly return undefined and not an empty string unlike some of the other answers here.
function getCookie(name) {
return (document.cookie.match('(^|;) *'+name+'=([^;]*)')||[])[2];
}
The above worked fine for me on all browsers I checked, but as mentioned by #vanovm in comments, as per the specification the key/value may be surrounded by whitespace. Hence the following is more standard compliant.
function getCookie(name) {
return (document.cookie.match('(?:^|;)\\s*'+name.trim()+'\\s*=\\s*([^;]*?)\\s*(?:;|$)')||[])[1];
}
this in an object that you can read, write, overWrite and delete cookies.
var cookie = {
write : function (cname, cvalue, exdays) {
var d = new Date();
d.setTime(d.getTime() + (exdays*24*60*60*1000));
var expires = "expires="+d.toUTCString();
document.cookie = cname + "=" + cvalue + "; " + expires;
},
read : function (name) {
if (document.cookie.indexOf(name) > -1) {
return document.cookie.split(name)[1].split("; ")[0].substr(1)
} else {
return "";
}
},
delete : function (cname) {
var d = new Date();
d.setTime(d.getTime() - 1000);
var expires = "expires="+d.toUTCString();
document.cookie = cname + "=; " + expires;
}
};
Here goes.. Cheers!
function getCookie(n) {
let a = `; ${document.cookie}`.match(`;\\s*${n}=([^;]+)`);
return a ? a[1] : '';
}
Note that I made use of ES6's template strings to compose the regex expression.
It's 2022, everything except Internet Explorer supports the URLSearchParams API (^1) and String.prototype.replaceAll API (^2), so we can horribly (ab)use them:
const cookies = new URLSearchParams(document.cookie.replaceAll('&', '%26').replaceAll('; ', '&'));
cookies.get('cookie name'); // returns undefined if not set, string otherwise
Both of these functions look equally valid in terms of reading cookie. You can shave a few bytes off though (and it really is getting into Code Golf territory here):
function readCookie(name) {
var nameEQ = name + "=", ca = document.cookie.split(';'), i = 0, c;
for(;i < ca.length;i++) {
c = ca[i];
while (c[0]==' ') c = c.substring(1);
if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length);
}
return null;
}
All I did with this is collapse all the variable declarations into one var statement, removed the unnecessary second arguments in calls to substring, and replace the one charAt call into an array dereference.
This still isn't as short as the second function you provided, but even that can have a few bytes taken off:
function read_cookie(key)
{
var result;
return (result = new RegExp('(^|; )' + encodeURIComponent(key) + '=([^;]*)').exec(document.cookie)) ? result[2] : null;
}
I changed the first sub-expression in the regular expression to be a capturing sub-expression, and changed the result[1] part to result[2] to coincide with this change; also removed the unnecessary parens around result[2].
To truly remove as much bloat as possible, consider not using a wrapper function at all:
try {
var myCookie = document.cookie.match('(^|;) *myCookie=([^;]*)')[2]
} catch (_) {
// handle missing cookie
}
As long as you're familiar with RegEx, that code is reasonably clean and easy to read.
To have all cookies accessible by name in a Map:
const cookies = "a=b ; c = d ;e=";
const map = cookies.split(";").map((s) => s.split("=").map((s) => s.trim())).reduce((m, [k, v]) => (m.set(k, v), m), new Map());
console.log(map); //Map(3) {'a' => 'b', 'c' => 'd', 'e' => ''}
map.get("a"); //returns "b"
map.get("c"); //returns "d"
map.get("e"); //returns ""
(edit: posted the wrong version first.. and a non-functional one at that. Updated to current, which uses an unparam function that is much like the second example.)
Nice idea in the first example cwolves. I built on both for a fairly compact cookie reading/writing function that works across multiple subdomains. Figured I'd share in case anyone else runs across this thread looking for that.
(function(s){
s.strToObj = function (x,splitter) {
for ( var y = {},p,a = x.split (splitter),L = a.length;L;) {
p = a[ --L].split ('=');
y[p[0]] = p[1]
}
return y
};
s.rwCookie = function (n,v,e) {
var d=document,
c= s.cookies||s.strToObj(d.cookie,'; '),
h=location.hostname,
domain;
if(v){
domain = h.slice(h.lastIndexOf('.',(h.lastIndexOf('.')-1))+1);
d.cookie = n + '=' + (c[n]=v) + (e ? '; expires=' + e : '') + '; domain=.' + domain + '; path=/'
}
return c[n]||c
};
})(some_global_namespace)
If you pass rwCookie nothing, it will get
all cookies into cookie storage
Passed rwCookie a cookie name, it gets that
cookie's value from storage
Passed a cookie value, it writes the cookie and places the value in storage
Expiration defaults to session unless you specify one
Using cwolves' answer, but not using a closure nor a pre-computed hash :
// Golfed it a bit, too...
function readCookie(n){
var c = document.cookie.split('; '),
i = c.length,
C;
for(; i>0; i--){
C = c[i].split('=');
if(C[0] == n) return C[1];
}
}
...and minifying...
function readCookie(n){var c=document.cookie.split('; '),i=c.length,C;for(;i>0;i--){C=c[i].split('=');if(C[0]==n)return C[1];}}
...equals 127 bytes.
Here is the simplest solution using javascript string functions.
document.cookie.substring(document.cookie.indexOf("COOKIE_NAME"),
document.cookie.indexOf(";",
document.cookie.indexOf("COOKIE_NAME"))).
substr(COOKIE_NAME.length);
Just to throw my hat in the race, here's my proposal:
function getCookie(name) {
const cookieDict = document.cookie.split(';')
.map((x)=>x.split('='))
.reduce((accum,current) => { accum[current[0]]=current[1]; return accum;}, Object());
return cookieDict[name];
}
The above code generates a dict that stores cookies as key-value pairs (i.e., cookieDict), and afterwards accesses the property name to retrieve the cookie.
This could effectively be expressed as a one-liner, but this is only for the brave:
document.cookie.split(';').map((x)=>x.split('=')).reduce((accum,current) => { accum[current[0]]=current[1]; return accum;}, {})[name]
The absolute best approach would be to generate cookieDict at page load and then throughout the page lifecycle just access individual cookies by calling cookieDict['cookiename'].
This function doesn't work for older browser like chrome > 80.
const getCookieValue = (name) => (
document.cookie.match('(^|;)\\s*' + name + '\\s*=\\s*([^;]+)')?.pop() || ''
)
I solved it by using this function instead that returns undefined if the cookie is missing:
function getCookie(name) {
// Add the = sign
name = name + '=';
// Get the decoded cookie
var decodedCookie = decodeURIComponent(document.cookie);
// Get all cookies, split on ; sign
var cookies = decodedCookie.split(';');
// Loop over the cookies
for (var i = 0; i < cookies.length; i++) {
// Define the single cookie, and remove whitespace
var cookie = cookies[i].trim();
// If this cookie has the name of what we are searching
if (cookie.indexOf(name) == 0) {
// Return everything after the cookies name
return cookie.substring(name.length, cookie.length);
}
}
}
Credit: https://daily-dev-tips.com/posts/vanilla-javascript-cookies-%F0%9F%8D%AA/
You can verify if a cookie exists and it has a defined value:
function getCookie(cookiename) {
if (typeof(cookiename) == 'string' && cookiename != '') {
const COOKIES = document.cookie.split(';');
for (i = 0; i < COOKIES.length; i++) {
if (COOKIES[i].trim().startsWith(cookiename)) {
return COOKIES[i].split('=')[1];
}
}
}
return null;
}
const COOKIE_EXAMPLE = getCookie('example');
if (COOKIE_EXAMPLE == 'stackoverflow') { ... }
// If is set a cookie named "example" with value "stackoverflow"
if (COOKIE_EXAMPLE != null) { ... }
// If is set a cookie named "example" ignoring the value
It will return null if cookie doesn't exists.
Get the cookie value or undefined if it doesn't exist:
document
.cookie
.split('; ')
.filter(row => row.startsWith('cookie_name='))
.map(c=>c.split('=')[1])[0];
On chromium based browsers you can use the experimental cookieStore api:
await cookieStore.get('cookieName');
Check the Browsersupport before using!