Get subdomain and load it to url with greasemonkey - javascript

I am having the URL http://somesubdomain.domain.com (subdomains may vary, domain is always the same). Need to take subdomain and reload the page with something like domain.com/some/path/here/somesubdomain using greasemonkey (or open a new window with URL domain.com/some/path/here/somesubdomain, whatever).

var full = window.location.host
//window.location.host is subdomain.domain.com
var parts = full.split('.')
var sub = parts[0]
var domain = parts[1]
var type = parts[2]
//sub is 'subdomain', 'domain', type is 'com'
var newUrl = 'http://' + domain + '.' + type + '/your/other/path/' + subDomain
window.open(newUrl);

The answer provided by Derek will work in the most common cases, but will not work for "xxx.xxx" sub domains, or "host.co.uk". (also, using window.location.host, will also retrieve the port number, which is not treated : http://www.w3schools.com/jsref/prop_loc_host.asp)
To be honest I do not see a perfect solution for this problem.
Personally, I've created a method for host name splitting which I use very often because it covers a larger number of host names.
This method splits the hostname into {domain: "", type: "", subdomain: ""}
function splitHostname() {
var result = {};
var regexParse = new RegExp('([a-z\-0-9]{2,63})\.([a-z\.]{2,5})$');
var urlParts = regexParse.exec(window.location.hostname);
result.domain = urlParts[1];
result.type = urlParts[2];
result.subdomain = window.location.hostname.replace(result.domain + '.' + result.type, '').slice(0, -1);;
return result;
}
console.log(splitHostname());
This method only returns the subdomain as a string:
function getSubdomain(hostname) {
var regexParse = new RegExp('[a-z\-0-9]{2,63}\.[a-z\.]{2,5}$');
var urlParts = regexParse.exec(hostname);
return hostname.replace(urlParts[0],'').slice(0, -1);
}
console.log(getSubdomain(window.location.hostname));
// for use in node with express: getSubdomain(req.hostname)
These two methods will work for most common domains (including co.uk)
NOTE: the slice at the end of sub domains is to remove the extra dot.
I hope this solves your problem.

The solutions provided here work some of the time, or even most of the time, but not everywhere. To the best of my knowledge, the best way to find the full subdomain of any domain (and remember, sometimes subdomains have periods in them too! You can have sub-subdomains, etc) is to use the Public Suffix List, which is maintained by Mozilla.
The part of the URL that isn't in the Public Suffix List is the subdomain plus the domain itself, joined by a dot. Once you remove the public suffix, you can remove the domain and have just the subdomain left by removing the last segment between the dots.
Let's look at a complicated example. Say you're testing sub.sub.example.pvt.k12.ma.us. pvt.k12.ma.us is a public suffix, believe it or not! So if you used the Public Suffix List, you'd be able to quickly turn that into sub.sub.example by removing the known suffix. Then you could go from sub.sub.example to just sub.sub after stripping off the last portion of the remaining pieces, which was the domain. sub.sub is your subdomain.

This could work in most cases except for the one that #jlbang mention
const split=location.host.split(".");
let subdomain="";
let domain="";
if(split.length==1){//localHost
domain=split[0];
}else if(split.length==2){//sub.localHost or example.com
if(split[1].includes("localhost")){//sub.localHost
domain=split[1];
subdomain=split[0];
}else{//example.com
domain=split.join(".");
}
}else{//sub2.sub.localHost or sub2.sub.example.com or sub.example.com or example.com.ec sub.example.com.ec or ... etc
const last=split[split.length-1];
const lastLast=split[split.length-2];
if(last.includes("localhost")){//sub2.sub.localHost
domain=last;
subdomain=split.slice(0,split.length-1).join(".");
}else if(last.length==2 && lastLast.length<=3){//example.com.ec or sub.example.com.ec
domain=split.slice(split.length-3,split.length).join(".");
if(split.length>3){//sub.example.com.ec
subdomain=split.slice(0,split.length-3).join(".");
}
}else{//sub2.sub.example.com
domain=split.slice(split.length-2,split.length).join(".");
subdomain=split.slice(0,split.length-2).join(".");
}
}
const newUrl = 'http://example.com/some/path/here/' + subdomain

I adapted Vlad's solution in modern Typescript:
const splitHostname = (
hostname: string
): { domain: string; type: string; subdomain: string } | undefined => {
var urlParts = /([a-z-0-9]{2,63}).([a-z.]{2,5})$/.exec(hostname);
if (!urlParts) return;
const [, domain, type] = urlParts;
const subdomain = hostname.replace(`${domain}.${type}`, "").slice(0, -1);
return {
domain,
type,
subdomain,
};
};

get a subdomain from URL
function getSubdomain(url) {
url = url.replace( "https://www.","");
url = url.replace( "http://www.","");
url = url.replace( "https://","");
url = url.replace("http://", "");
var temp = url.split("/");
if (temp.length > 0) {
var temp2 = temp[0].split(".");
if (temp2.length > 2) {
return temp2[0];
}
else {
return "";
}
}
return "";
}

Related

Removing subdomain in a string in TypeScript

I have a string in TypeScript which is subdomain.domain.com I want to create a new string that is just the domain on its own, so for example subdomain.domain.com would become domain.com
Note: The 'subdomain' part of the URL could be different sizes so it could be 'subdomain.domain.com' or it might be 'sub.domain.com' so I can't do this on character size. The domain might also be different so it could be 'subdomain.domain.com' or it could be 'subdomain.new-domain.com'.
So basically I need to just remove up to and including the first '.' - hope that all makes sense.
var domain = 'mail.testing.praveent.com';
var domainCharacters = domain.split('').reverse();
var domainReversed = '', dotCount = 0;
do {
if (domainCharacters[0] === '.') {
dotCount++;
if (dotCount == 2) break;
}
domainReversed += domainCharacters[0];
domainCharacters.splice(0, 1);
} while (dotCount < 2 && domainCharacters.length > 0);
var domainWithoutSubdomain = domainReversed.split('').reverse().join('');
This will strip off the subdomains in a domain and give the root (#) domain name alone.
You can split it by . and get only the last 2 elements and turn it back into a string again.
function strip(url: string) {
const fragments = url.split('.');
const last = fragments.pop();
try {
// if its a valid url with a protocol (http/https)
const instance = new URL(url);
return `${instance.protocol}//${fragments.pop()}.${last}`;
} catch (_) {
return `${fragments.pop()}.${last}`;
}
}
strip('https://subdomain.example.com') // https://example.com
strip('subdomain.example.com') // example.com
strip('https://subdomain.another-subdomain.example.com') // https://example.com

Trying to use .split with two different page URL structures

My intention
pull out language code from my two type of URL strings
My question
How do I make a split between two different URL structures? I have two URL strucutres, listed as examples below under the code.
My problem
I can't figure out how I should split the two different variables separately or together in one line with cc =... using custom javascript with Google Tag Manager
Code
function() {
cc = {{Page Path}}.split("/")[1].toLowerCase();
cc = {{virtualURL}}.split("/#/")[1].toLowerCase();
if(cc.length == 2) {
cc = cc;
} else {
cc = 'other';
}
return cc;
}
Example of {{Page Path}} - https://www.example.com/en/.....
Example of {{virtualURL}} - https://www.booking.example.com/#/en/........
Note
In both examples I want to be able to pull out en successfully.
Any solution here is likely to be fragile, you could have https://example.com/xy/ where xy isn't meant to be a language code.
But allowing for that, and allowing only two-character language codes:
var rexGetLang = /\/([a-z]{2})\//;
function getLang(url) {
var match = rexGetLang.exec(url);
return match ? match[1] : "other";
}
console.log(getLang("https://www.example.com/en/....."));
console.log(getLang("https://www.booking.example.com/#/en/........"));
Or if you want to allow for en-GB and such:
var rexGetLang = /\/([a-z]{2}(?:-[A-Z]{2})?)\//;
function getLang(url) {
var match = rexGetLang.exec(url);
return match ? match[1] : "other";
}
console.log(getLang("https://www.example.com/en/....."));
console.log(getLang("https://www.booking.example.com/#/en/........"));
console.log(getLang("https://www.booking.example.com/........"));
console.log(getLang("https://www.example.com/en-GB/....."));
console.log(getLang("https://www.booking.example.com/#/en-US/........"));
We can take out the language code simply by splitting the URL by /. Let's see what we get when we split the two URL's given as the example:
https://www.example.com/en/ - ["https:", "", "www.example.com", "en", ""]
https://www.booking.example.com/#/en/ - ["https:", "", "www.booking.example.com", "#", "en", ""]
In the above examples we can see that language code is either coming at 3rd index (1st example) or at the 4th index (2nd example) which can be taken care by an if condition. Let's see how:
let url = 'https://www.booking.example.com/#/en/';
let urlTokens = url.split('/');
let languageCode = urlTokens[3] === '#' ? urlTokens[4] : urlTokens[3];
console.log(languageCode);
// Web API for handling URL https://developer.mozilla.org/en-US/docs/Web/API/URL
const url = new URL('https://www.example.com/en/website');
url.hostname; // 'example.com'
url.port; // ''
url.search; // ''
url.pathname; // '/en/website'
url.protocol; // 'https:'
// RegEx to see if /en/ exists https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp
new RegExp(/\/en\//).test(url.pathname) // true
function getLanguage(url) {
var rgx = /^https:\/\/[^\/]+\/(?:#\/)?([a-z]+)/;
var language = url.match(rgx)[1];
return language;
}
var url = 'https://www.booking.example.com/#/en/';
var language = getLanguage(url);

Add a variable in URL instead of replacing one

I'm facing a little issue with a javascript script. I'm trying to make my website multi languages. All is set in database, and my select works on pages where the URLs don't have variables. Here is my script:
<script type="text/javascript">
function submitForm() {
var thelang = document.getElementById('lang').options[document.getElementById('lang').selectedIndex].value;
window.location.href = window.location.pathname + '?lang=' + thelang;
}
</script>
In the homepage case, it works, and change http://localhost/ by http://localhost/?lang=en
But when I have an URL with a variable already set, it replaces it. From http://localhost/modules/product/product.php?id=1 I have http://localhost/modules/product/product.php?lang=en and the result I'd like is:
http://localhost/modules/product/product.php?id=1&lang=en
How to fix the script to make it works in both cases, or add the varibale, or glue it with an existing one?
Try checking to see if querystring params already exist in the URL.
function submitForm() {
var thelang = document.getElementById('lang').options[document.getElementById('lang').selectedIndex].value;
if (window.location.href.indexOf('?') >= 0) {
// There are already querystring params in the URL. Append my new param.
window.location.href = window.location.href + '&lang=' + thelang;
} else {
// There are not querystring params in the URL. Create my new param.
window.location.href = window.location.href + '?lang=' + thelang;
}
}
Update: Account for Subsequent Lang Changes
This assumes that the lang value will always be two characters.
function submitForm() {
var thelang = document.getElementById('lang').options[document.getElementById('lang').selectedIndex].value;
var newUrl = window.location.href;
var langIndex = newUrl.indexOf('lang=');
if (langIndex >= 0) {
// Lang is already in the querystring params. Remove it.
newUrl = newUrl.substr(0, langIndex) + newUrl.substring(langIndex + 8); // 8 is length of lang key/value pair + 1.
}
// Remove the final '?' or '&' character if there are no params remaining.
newUrl = newUrl.endsWith('?') || newUrl.endsWith('&') ? newUrl.substr(0, newUrl.length - 1) : newUrl;
newUrl = newUrl.indexOf('?') >= 0
? newUrl + '&lang=' + thelang // There are already querystring params in the URL. Append my new param.
: newUrl + '?lang=' + thelang; // There are not querystring params in the URL. Create my new param.
window.location.href = newUrl;
}
If I understand you correctly you want to add ?lang=en at the end. Unless there is already an id=1(or similar) there.
So you could just add an if statement, looking if there is .php writen at the end.
Not a very pretty solution but you are alreaady adding strings together so it doesn't matter
You can use the "search" element of window.location. See here for compatibility. You can then, concat the result with your desired parameter. BUT, you can do something way more complex (and secure) and check if there's already a parameter with that ID using a for + URLSearchParams.
const params = new URLSearchParams(window.location.search);
const paramsObj = Array.from(params.keys()).reduce(
(acc, val) => ({ ...acc, [val]: params.get(val) }), {}
);
This should fix it:
var currentUrl = window.location.origin + window.location.pathname;
var newUrl = currentUrl + (currentUrl.includes('?') ? ('&lang=' + thelang) : ('?lang=' + thelang));
window.location.href = newUrl;

remove parameter from url via regex

I have a function for removing the parameter from url.
this is my function :
function removeParameter(key) {
let parameters = document.location.search;
const regParameter = new RegExp('[?|&]' + key + "=([a-zA-Z0-9_-]+)");
if (regParameter.test(parameters)){
parameters = parameters.replace(regParameter , '')
}
window.history.pushState({}, '', parameters)}
when I call this function for the url like this
http://example.com/products?color=4&brand=apple
first call function for removing the brand is correct result
removeParameter('brand')
but another call this function for removing the color doesn't work correctly.
actually when i want to removing the first parameter(key come's after ? mark) this function doesn't work...
The third argument to pushState() is the entire URL. Your function is sending only the location.search i.e. query parameter part of the URL. So you'll need to do
window.history.pushState({}, '', location.pathname + parameters)}
on your function's last line. Also, your code is currently not handling the edge cases i.e. if you remove first parameter, it removes the ? and not the trailing &. So you end up with http://example.com/products&brand=apple which isn't a valid URL. And finally, I simplified your expression a bit.
const reg = new RegExp('[?&](' + key + '=[\\w-]+&?)');
let matches = reg.exec(parameters);
if (matches){
parameters = parameters.replace(matches[1], '');
}
This still doesn't handle more complex cases (params without values, hash etc). There are a couple of other options:
Dump the regex and go with a split('&') based solution. More code, but a lot more readable and less error-prone.
If you don't need IE support, use URLSearchParams. Then your entire function can be reduced to this:
var params = new URLSearchParams(location.search);
params.delete(key);
window.history.pushState({}, '', location.pathname + "?" + params.toString());
Correct me if I'm wrong,
I made a working snippet out of your code, and it seems to work correctly.
If you run the snippet on a fresh new tab, it will add 2 urls in the tab history.
I also modified your regex to make it easier.
function removeParameter(key) {
var parameters = url; // document.location.search; // TAKIT: modified for test
const regParameter = new RegExp('[?|&]' + key + "=([^&]+)"); // TAKIT: Simplified regex
if (regParameter.test(parameters)) {
parameters = parameters.replace(regParameter, '')
}
window.history.pushState({}, 'Test 1', parameters);
return parameters; // TAKIT: Added
}
// Output
var url = "https://stacksnippets.net/js?color=4&brand=apple";
console.log(url);
url = removeParameter("brand");
console.log(url);
url = removeParameter("color");
console.log(url);
Hope it helps.
This function can be used, i modified #Takit Isy answer
function removeParameter(key) {
var parameters = url; // document.location.search; // TAKIT: modified for test
const regParameter = new RegExp(key + "=([a-zA-Z0-9_-]+[&]{0,1})");
if (regParameter.test(parameters)) {
parameters = parameters.replace(regParameter, '')
if(parameters.substring(parameters.length-1)=='?' || parameters.substring(parameters.length-1)=='&'){
parameters = parameters.slice(0, -1);
}
}
return parameters; // TAKIT: Added
}

Decoding the following link

I have been digging in some javascript api's lately and I found the following line:
get_url_info: function($db_link) {
var ldst_href;
if ($db_link.data('ldst-href')) {
ldst_href = $db_link.data('ldst-href');
}
else {
ldst_href = $db_link.attr('href');
}
var matchs = ldst_href.match(/^http:\/\/([^\.]+)\..*playguide\/db\/(.*?)\/?(#.+)?$/);
var subdomain = matchs[1];
var path = matchs[2];
if (!eorzeadb.dynamic_tooltip && eorzeadb.versions.data) {
url = eorzeadb.cdn_prefix + 'pc/tooltip/' + eorzeadb.versions.data +
'/' + subdomain + '/' + path + '.js';
}
else {
url = ldst_href + '/jsonp/';
}
return {
'url': url,
'data_key': subdomain + '/' + path
};
},
This result is supposed the return an array which I assume is contained in the link. I'm having a hard time decrypting the link tho.
Does anybody have any experience with these kinds of links or a way that I could start out?
http://regexr.com/
Here you can understand all the parts of the regex. Basically, is looking for a pattern like this:
http://(blablah).playguide/db/(OPTIONAL)(optional/)#(probably some id)
The result will be an array with the original link, followed by the domain, the first optional argument, and the hashtag, something like this
["http://(blablah).playguide/db/(OPTIONAL)(optional/)#(probably some id)", "(blablah)", "(OPTIONAL)(optional/)", "#(probably some id)"]
It will then use that information to build a different link

Categories