Regex to extract domain and video id from youtube/vimeo url - javascript

I am copying a function that will take a youtube/vimeo url and return what site the video came from (vimeo/yt) as well as the video id.
Here's what I have so far: http://jsfiddle.net/csjwf/181/
<strong>Result:</strong>
<div id="result"></div>
function parseVideoURL(url) {
url.match(/^http:\/\/(?:.*?)\.?(youtube|vimeo)\.com\/(watch\?[^#]*v=(\w+)|(\d+)).+$/);
return {
provider : RegExp.$1,
id : RegExp.$1 == 'vimeo' ? RegExp.$2 : RegExp.$3
}
}
var result = document.getElementById("result");
var video = parseVideoURL("http://www.youtube.com/watch?v=PQLnmdOthmA&feature=feedrec_grec_index");
result.innerHTML = "Provider: " + video.provider + "<br>ID: " + video.id;
var video = parseVideoURL("http://vimeo.com/22080133");
result.innerHTML += "<br>--<br>Provider: " + video.provider + "<br>ID: " + video.id;
Output:
Result:
Provider: youtube
ID: PQLnmdOthmA
--
Provider: vimeo
ID: 2208013
However, notice how for vimeo vids, if the url ends in the ID, the last number is always cut off. If you add a slash to the end of the vimeo url the id is pulled fully.

The .+$ at the end is requiring at least one character after the last digit that is captured as a string of digits. That will chop one digit off what is captured. Is there a reason you have that there?
You can change the last + to a * like this:
/^http:\/\/(?:.*?)\.?(youtube|vimeo)\.com\/(watch\?[^#]*v=(\w+)|(\d+)).*$/
or even better, get rid of the end part entirely since it doesn't look like it's needed:
/^http:\/\/(?:.*?)\.?(youtube|vimeo)\.com\/(watch\?[^#]*v=(\w+)|(\d+))/
Here's a bit safer way to write your function that allows for any order of the query parameters in the youtube URL and doesn't put stuff into the regex that doesn't need to be there. The code is longer, but it's much more robust and would be much easier to add more providers:
function parseVideoURL(url) {
function getParm(url, base) {
var re = new RegExp("(\\?|&)" + base + "\\=([^&]*)(&|$)");
var matches = url.match(re);
if (matches) {
return(matches[2]);
} else {
return("");
}
}
var retVal = {};
var matches;
if (url.indexOf("youtube.com/watch") != -1) {
retVal.provider = "youtube";
retVal.id = getParm(url, "v");
} else if (matches = url.match(/vimeo.com\/(\d+)/)) {
retVal.provider = "vimeo";
retVal.id = matches[1];
}
return(retVal);
}
Working version here: http://jsfiddle.net/jfriend00/N2hPj/

Here is an updated version that also works with youtu.be and youtube.com/embed urls using #jfriend00's code and some code found here: JavaScript REGEX: How do I get the YouTube video id from a URL?.
EDIT: Updated my answer (and the fiddle) with a function that actually works. :-)
function parseVideoURL(url) {
function getParm(url, base) {
var re = new RegExp("(\\?|&)" + base + "\\=([^&]*)(&|$)");
var matches = url.match(re);
if (matches) {
return(matches[2]);
} else {
return("");
}
}
var retVal = {};
var matches;
var success = false;
if ( url.match('http(s)?://(www.)?youtube|youtu\.be') ) {
if (url.match('embed')) { retVal.id = url.split(/embed\//)[1].split('"')[0]; }
else { retVal.id = url.split(/v\/|v=|youtu\.be\//)[1].split(/[?&]/)[0]; }
retVal.provider = "youtube";
var videoUrl = 'https://www.youtube.com/embed/' + retVal.id + '?rel=0';
success = true;
} else if (matches = url.match(/vimeo.com\/(\d+)/)) {
retVal.provider = "vimeo";
retVal.id = matches[1];
var videoUrl = 'http://player.vimeo.com/video/' + retVal.id;
success = true;
}
if (success) {
return retVal;
}
else { alert("No valid media id detected"); }
}
And a working jsfiddle: http://jsfiddle.net/9n8Nn/3/
Out of the two stackexchange answers, this is the code that worked best for me in the end.

To simplify your regex I would use haystack.indexOf(needle) to determine if the url is vimeo or youtube and then apply site specific regex. Much easier, and later you can add video sites without overly complicating the regex.

Last number gets cut off because you're using ".+" at the end, which means "one or more of any character". Replace the + with a *, meaning "zero or more".

url.match(/^http:\/\/(?:.*?)\.?(youtube|vimeo)\.com\/(watch\?[^#]*v=(\w+).+|(\d+))$/);

Remove the last . and the end matching
url.match(/^http:\/\/(?:.*?)\.?(youtube|vimeo)\.com\/(watch\?[^#]*v=(\w+)|(\d+))/);

Related

Extract the last part of URL path with Custom JS - but not added parameters with?

I am trying to extract the last part of a URL to track in GTM, but not include added parameters like "?gclid=...".
As in:
https://example.com/m/5f5a0a9472cf844b320b6136/?gclid=1234
I want to just extract the 5f5a0a9472cf844b320b6136.
So far I've used:
function() {
var pageUrl = window.location.href;
return pageUrl.split("/")[pageUrl.split("/").length - 1];
}
But that is giving me the gclid number. This issue is, that parameter only exists on the landing page, not subsequent pages.
so if I were to use length - 2] that won't work once they leave the landing page. It would return the /m/.
How do I escape the "?" string on the landing page?
You can do something like this which will be easier
function (){
paths = window.location.pathname.split("/")
return paths[paths.length-1]
}
maybe this will help
const strs = [
"https://example.com/5f5a0a9472cf844b320b6136/?gclid=1234/",
"https://example.com/m/5f5a0a9472cf844b320b6136/?gclid=1234/",
"https://example.com/m/n/5f5a0a9472cf844b320b6136/",
"https://example.com/m/n/5f5a0a9472cf844b320b6136",
];
strs.forEach((str) => {
// ********************
if (str.includes("?")) {
const parts = str.split("/?")[0].split("/");
console.log(parts[parts.length - 1]);
} else {
const lastChar = str.charAt(str.length - 1);
str = lastChar === "/" ? str.substring(0, str.length - 1) : str;
const parts = str.split("/");
console.log(parts[parts.length - 1]);
}
// ********************
});
Since your using GTM, enable and use the built-in "page path" variable instead, which does not include parameters:
function() {
return {{Page Path}}.split("/").pop();
}
pop() return the last element from the array (it also removes it from the array, which in this case does not matter).
Okay, I found an alternative solution that I thought I'd share. Because that variable string always totals 24 characters, I created a function to look for it.
function getQuoteId() {
var segments = window.location.pathname.split('/');
for (var i = segments.length - 1; i >= 0; i--) {
if (segments[i] && segments[i].length === 24) {
return segments[i];
}
}
return null;
}
This mitigates the trailing "/" as well as the added parameters, and it returns the path I was looking to isolate.

JS Regex url validation

I tried to validate url with or without http No matter what i did the function return false.
I checked my regex string in this site:
http://regexr.com/
And its seen as i expect.
function isUrlValid(userInput) {
var regexQuery = "/(http(s)?://.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/";
var url = new RegExp(regexQuery,"g");
if (url.test(userInput)) {
alert('Great, you entered an E-Mail-address');
return true;
}
return false;
}
I fix the problem by change the .test to .match and leave the regex as is.
I change the function to Match + make a change here with the slashes and its work: (http(s)?://.)
The fixed function:
function isUrlValid(userInput) {
var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g);
if(res == null)
return false;
else
return true;
}
I believe the other answer will reject some valid url's (like domain names in uppercase or long sub-domains) and allow some invalid ones (like http://www.-example-.com or www.%#&.com). I tried to take into account a number of additional url syntax rules (without getting into internationalisation).
function isUrlValid(userInput) {
var regexQuery = "^(https?://)?(www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z0-9]{0,61}[a-z0-9]\\.[a-z]{2,6}(/[-\\w#\\+\\.~#\\?&/=%]*)?$";
var url = new RegExp(regexQuery,"i");
return url.test(userInput);
}
var input = ["https://o.sub-domain.example.com/foo/bar?foo=bar&boo=far#a%20b",
"HTTP://EX-AMPLE.COM",
"example.c",
"example-.com"];
for (var i in input) document.write(isUrlValid(input[i]) + ": " + input[i] + "<br>");
To also allow IP addresses and port numbers, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a-z]{2,6})|((\\d{1‌​,3}\\.){3}\\d{1,3}))‌​(:\\d{2,4})?(/[-\\w#‌​\\+\\.~#\\?&/=%]*)?$‌​"
To also allow query strings without a slash between the domain name and the question mark (which is theoretically not allowed, but works in most real-life situations), the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a‌​-z]{2,6})|((\\d‌​{1,3}\\.){3}\\d{1,3}‌​))(:\\d{2,4})?((/|\\‌​?)[-\\w#\\+\\.~#\\?&‌​/=%]*)?$"
To also make sure that every % is followed by a hex number, the regex is:
"^(https?://)?(((www\\.)?([-a-z0-9]{1,63}\\.)*?[a-z0-9][-a-z‌​0-9]{0,61}[a-z0-9]\\‌​.[a-z]{2,6})|((\\d{1‌​,3}\\.){3}\\d{1,3}))‌​(:\\d{2,4})?((/|\\?)‌​(((%[0-9a-f]{2})|[-\‌​\w#\\+\\.~#\\?&/=])*‌​))?$"
(Note: as John Wu mentioned in a comment, there are valid single-letter domains).
Actually, this question needs a powerful regex and the following code is not very hard to understand, please see below(ES6 - TypeScript):
const isValidUrl = (url: string): boolean => {
const urlRegex = /^((http(s?)?):\/\/)?([wW]{3}\.)?[a-zA-Z0-9\-.]+\.[a-zA-Z]{2,}(\.[a-zA-Z]{2,})?$/g;
const result = url.match(urlRegex);
return result !== null;
};
Try this code.
function CheckURL(fieldId, alertMessage) {
var url = fieldId.value;
if(url !== "")
{
if (url.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9#:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9#:%_\+.~#?&//=]*)/g) !== null)
return true;
else {
alert(alertMessage);
fieldId.focus();
return false;
}
}
}
var website = document.getElementById('Website');
if (!CheckURL(website, "Enter a valid website address")) {
return false;
}

Regex to add a space after the fourth chars Javascript

I have a string which is a car number plate. But for display purposes I what to add a space after the fourth char in this string. The data comes from a data service so I have to do this on the front-end
eg. AF13BXP to this AF13 BXP
The code below doesn't seem to work:
var $regtext = $('#regNumber');
if ($regtext.length > 0)
{
var regtext = $regtext.text(),
newRegtext = regtext.replace(/[\n\s]/g, '');
console.log(newRegtext);
}
Simple and clear way to do this, without regex:
var $regtext = $('#regNumber');
if ($regtext.length > 0)
{
var regtext = $regtext.text(),
newRegtext = regtext.substr(0, 4) + " " + regtext.substr(4);
console.log(newRegtext);
}
It's also pretty fast too: runs 10,000 times in 351ms, faster than splitting and joining etc. Good if you'll be processing loads of data from the webservice.
You can use following jquery : Demo
$('.test').keyup(function() {
var foo = $(this).val().split(" ").join("");
if (foo.length > 0) {
foo = foo.match(new RegExp('.{1,4}', 'g')).join(" ");
}
$(this).val(foo);
});
If you want to use regex the following should do it.
newRegtext = regtext.replace(/^(.{4})/,'$1 ')
Try this code
$(document).ready(function(e) {
var $regtext = $('#regNumber');
var $regtext = $regtext.text();
if ($regtext.length > 0)
{
regCheck = /^([A-Z1-9a-z]{1,4})([A-Z1-9a-z]*)$/;
regtext = regCheck.test($regtext);
newRegtext = $regtext.replace(regCheck,"$1 $2");
alert(newRegtext);
}
});

RegEx to extract parameters from url hash in JavaScript

My urls will look like:
http://example.com/whatever#page?x=1&locale=hu&y=2
http://example.com/whatever#page?x=1&locale=hu
http://example.com/whatever#page?locale=hu
http://example.com/whatever#page?locale=
http://example.com/whatever#page?x=1
http://example.com/whatever#page
http://example.com/whatever
I'd like to get the locale parameter or empty string if it's not set.
I'm trying something like:
locale = location.hash.replace(/.*(?:[?&]locale=([^&]*))?.*/, "$2");
But my problem is that I couldn't find the right RegExp that works for all cases (both when there's locale= in the hash and when there isn't)
Here's a piece of code that will extract it from the hash and avoid it anywhere else in the URL:
function getLocaleFromHash(url) {
var match = url.match(/#.*[?&]locale=([^&]+)(&|$)/);
return(match ? match[1] : "");
}
And, you can see it work on all your test cases here: http://jsfiddle.net/jfriend00/p37Mx/
If you want to be able to look for any parm in the hash, you would use this:
function getParmFromHash(url, parm) {
var re = new RegExp("#.*[?&]" + parm + "=([^&]+)(&|$)");
var match = url.match(re);
return(match ? match[1] : "");
}
See it work here: http://jsfiddle.net/jfriend00/6kgUk/
A more generic function that will fetch all parameters in the URL would look like this. For normal URLs where the hash is after the query and the parameters are in the query string, it would look like this. This is a bit more code because it does more. It fetches all the parameters into an object where you can look up any parameter by it's key and it URL decodes them all too:
function getParmsFromURL(url) {
var parms = {}, pieces, parts, i;
var hash = url.lastIndexOf("#");
if (hash !== -1) {
// remove hash value
url = url.slice(0, hash);
}
var question = url.lastIndexOf("?");
if (question !== -1) {
url = url.slice(question + 1);
pieces = url.split("&");
for (i = 0; i < pieces.length; i++) {
parts = pieces[i].split("=");
if (parts.length < 2) {
parts.push("");
}
parms[decodeURIComponent(parts[0])] = decodeURIComponent(parts[1]);
}
}
return parms;
}
For a special version that handles parameters in a hash value and after a ? in the hash value like in the OP's question (which isn't the typical case), one could use this:
function getParmsFromURLHash(url) {
var parms = {}, pieces, parts, i;
var hash = url.lastIndexOf("#");
if (hash !== -1) {
// isolate just the hash value
url = url.slice(hash + 1);
}
var question = url.indexOf("?");
if (question !== -1) {
url = url.slice(question + 1);
pieces = url.split("&");
for (i = 0; i < pieces.length; i++) {
parts = pieces[i].split("=");
if (parts.length < 2) {
parts.push("");
}
parms[decodeURIComponent(parts[0])] = decodeURIComponent(parts[1]);
}
}
return parms;
}
Working demo: http://jsfiddle.net/jfriend00/v8cd5/
And, then if you wanted the local option, you'd just do this:
var parms = getParmsFromURL(url);
var locale = parms["locale"];
locale = location.hash.match( /[?&]locale=([^&]*)?/ );
locale = ( locale == null ? "" : locale[1] || "" );
Will do the trick. I don't think the .* are needed, because you do not specify a start or an end of the string.
I tested this regular expression on all your examples and they all worked correctly :)
Edit: sorry, it was invalid in some cases. It is now correct in all cases.
If you really want to do it in one regex:
locale = location.hash.match(/([?&]locale=|^((?![?&]locale=).)+$)([^&]*)/)[3];
It works against all of your examples, though I imagine it's horribly inefficient.

Improve this search engine detecter with javascript

I have the following code which detects which search engine and what search term has been used:
if (document.referrer.search(/google\.*/i) != -1) {
var start = document.referrer.search(/q=/);
var searchTerms = document.referrer.substring(start + 2);
var end = searchTerms.search(/&/);
end = (end == -1) ? searchTerms.length : end;
searchTerms = searchTerms.substring(0, end);
if (searchTerms.length != 0) {
searchTerms = searchTerms.replace(/\+/g, " ");
searchTerms = unescape(searchTerms);
alert('You have searched: '+searchTerms+' on google');
}
}
That actually works, but unfortunately it doesn't work as expected sometimes.
Sometimes if the referrer was even not google i get an alert with the search term as : ttp://www.domain.com ( without H at the start ) i think that may lead to the bug.
Appreciate any help!
Have you tried leveraging existing JS URL parsing schemes? It might save you a bunch of time. For example:
http://blog.stevenlevithan.com/archives/parseuri
It's cutting the "h" off because q= was not in the referrer string. So your start variable is -1. Then you add 2 to that to get your searchTerms var with a substring. You need to check for start to be equal to -1 and return.
I also think your "google" string detection is not bulletproof, I would rather do something like this...
var ref = document.referrer;
var pcol = ref.indexOf("://") + 3;
if(ref.indexOf("google.com") == pcol || ref.indexOf("www.google.com") == pcol) {
// It is google
}
One last thing, you should use decodeURIComponent instead of unescape.

Categories