remove hostname and port from url using regular expression - javascript

I am trying to remove
http://localhost:7001/
part from
http://localhost:7001/www.facebook.com
to get the output as
www.facebook.com
what is the regular expression that i can use to achieve this exact pattern?

You don't need any library or REGEX
var url = new URL('http://localhost:7001/www.facebook.com')
console.log(url.pathname)
https://developer.mozilla.org/en-US/docs/Web/API/URL

Based on #atiruz answer, but this is
url = url.replace( /^[a-zA-Z]{3,5}\:\/{2}[a-zA-Z0-9_.:-]+\//, '' );
shortest
can take https or ftp too
can take url with or without explicit port

To javascript you can use this code:
var URL = "http://localhost:7001/www.facebook.com";
var newURL = URL.replace (/^[a-z]{4,5}\:\/{2}[a-z]{1,}\:[0-9]{1,4}.(.*)/, '$1'); // http or https
alert (newURL);
Look at this code in action Here
Regards,
Victor

This is how I made it work without resorting to regular expressions:
var URL = "http://localhost:7001/www.facebook.com";
var URLsplit = URL.split('/');
var host = URLsplit[0] + "//" + URLsplit[2] + "/";
var newURL = URL.replace(host, '');
Might not be an elegant solution though but it should be easier to understand for those who don't have much experience with regex (like me! ugh!).

For a simple regex to match any protocol, domain, and (optionally) port:
var url = 'http://localhost:7001/www.facebook.com';
// Create a regex to match protocol, domain, and host
var matchProtocolDomainHost = /^.*\/\/[^\/]+:?[0-9]?\//i;
// Replace protocol, domain and host from url, assign to `myNewUrl`
var myNewUrl = url.replace(matchProtocolDomainHost, '');
Now myNewUrl === 'www.facebook.com'.
See demo on regex101

Regex to match the part of url, that you want to remove, will be something like: /^http[s]?:\/\/.+?\//
Example of Java code (note that in Java we use two backslashes "\\" for escaping character):
String urlWithBasePath = "http://localhost:7001/www.facebook.com";
String resultUrl = urlWithBasePath.replaceFirst("^http[s]?:\\/\\/.+?\\/", ""); // resultUrl => www.facebook.com
Example of JS code:
let urlWithBasePath = "http://localhost:7001/www.facebook.com";
let resultUrl = urlWithBasePath.replace(/^http[s]?:\/\/.+?\//, ''); // resultUrl => www.facebook.com
Example of Python code:
import re
urlWithBasePath = "http://localhost:7001/www.facebook.com"
resultUrl = re.sub(r'^http[s]?:\/\/.+?\/', '', urlWithBasePath) # resultUrl => www.facebook.com
Example or Ruby code:
urlWithBasePath = "http://localhost:7001/www.facebook.com"
resultUrl = urlWithBasePath = urlWithBasePath.sub(/^http[s]?:\/\/.+?\//, '') # resultUrl => www.facebook.com
Example of PHP code:
$urlWithBasePath = "http://localhost:7001/www.facebook.com";
$resultUrl = preg_replace('/^http[s]?:\/\/.+?\//', '', $urlWithBasePath); // resultUrl => www.facebook.com
Example of C# code (you should also specify using System.Text.RegularExpressions;):
string urlWithBasePath = "http://localhost:7001/www.facebook.com";
string resultUrl = Regex.Replace(urlWithBasePath, #"^http[s]?:\/\/.+?\/", ""); // resultUrl => www.facebook.com

All other regular expressions here look a bit complicated? This is all that's needed: (right?)
var originSlash = /^https?:\/\/[^/]+\//i;
theUrl.replace(originSlash, '');

Alternatively, you can parse the url using as3corelib's URI class. That way you don't have to do any string manipulations, which helps to avoid making unintentional assumptions. It requires a few more lines of code, but it's a more general solution that should work for a wide variety of cases:
var url : URI = new URI("http://localhost:7001/myPath?myQuery=value#myFragment");
// example of useful properties
trace(url.scheme); // prints: http
trace(url.authority); // prints the host: localhost
trace(url.port); // prints: 7001
trace(url.path); // prints: /myPath
trace(url.query); // prints: myQuery=test
trace(url.fragment); // prints: myFragment
// build a new relative url, make sure we keep the query and fragment
var relativeURL : URI = new URI();
relativeURL.path = url.path;
relativeURL.query = url.query;
relativeURL.fragment = url.fragment;
var relativeURLString : String = relativeURL.toString();
// remove first / if any
if (relativeURLString.charAt(0) == "/") {
relativeURLString = relativeURLString.substring(1, relativeURLString.length);
}
trace(relativeURLString); // prints: myPath?myQuery=test#myFragment

instead of using regex you could just use the browser's capabilities of parsing an URL:
var parser = document.createElement('a');
parser.href = "http://localhost:7001/www.facebook.com";
var path = parser.pathname.substring(1); // --> results in 'www.facebook.com'

If you are just looking to remove the origin and get the rest of the URL, including hashes, query params and any characters without restrictions:
function getUrlFromPath(targetUrl) {
const url = new URL(targetUrl);
return targetUrl.replace(url.origin, '');
}
function main() {
const testUrls = [
'http://localhost:3000/test?search=something',
'https://www.google.co.in/search?q=hello+there+obi+wan&newwindow=1&sxsrf=ALiCzsZoaZvs0CrLQEHFmmR-MdrZ2ZHW2A%3A1665462761920&source=hp&ei=6fFEY_7cNY36wAOFyqagBA&iflsig=AJiK0e8AAAAAY0T_-R12vR7P_tmmkpEqgzmoZNczbnZA&ved=0ahUKEwi-9buirNf6AhUNPXAKHQWlCUQQ4dUDCAc&uact=5&oq=hello+there+obi+wan&gs_lcp=Cgdnd3Mtd2l6EAMyBQgAEIAEMgUIABCABDIFCAAQgAQyBQgAEIAEMgUIABCABDIFCAAQgAQyBQgAEIAEMgUIABCABDIFCAAQgAQyBQgAEIAEOgQIIxAnOhEILhCABBCxAxCDARDHARDRAzoLCAAQgAQQsQMQgwE6CwguEIAEELEDEIMBOg4ILhCABBCxAxCDARDUAjoICAAQsQMQgwE6CwguEIAEELEDENQCOggIABCABBCxAzoICC4QsQMQgwFQAFjjE2C6FmgAcAB4A4AB1QSIAd8ZkgELMC45LjIuMC4yLjGYAQCgAQE&sclient=gws-wiz'
];
testUrls.forEach(url => {
console.log(getUrlFromPath(url));
});
}
main();
A failsafe regex pattern to achieve this will get complex and cumbersome to come up with.

Just use replace
"http://localhost:7001/www.facebook.com".replace("http://localhost:7001/",'')

Related

Get string between / in URL

Let's say I have the following URL https://www.google.com/en-gb/test-page.
I'm trying to extract whatever is after the domain name, in this case en-gb, however, with my approach, it's currently spitting out the entire slug.
I.e.
var pathname = window.location.pathname.substr(1);
console.log(pathname);
Will log out:
en-gb/test-page
How can I get it so that it only log out en-gb?
Just split the url with the / delimiter
const url = 'https://www.google.com/en-gb/test-page';
console.log(url.split('/')[3]);
You can use URL.pathname
Code:
const url = new URL('https://www.google.com/en-gb/test-page');
const str = url.pathname.split('/')[1];
console.log(str);
Split on a slash:
var [pathname] = window.location.pathname.substr(1).split("/");

Javascript how to remove parameter from URL sting by value?

How to remove parameters with value = 3 from URL string?
Example URL string:
https://www.example.com/test/index.html?param1=4&param2=3&param3=2&param4=1&param5=3
If you are targeting browsers that support URL and URLSearchParams you can loop over the URL's searchParams object, check each parameter's value, and delete() as necessary. Finally using URL's href property to get the final url.
var url = new URL(`https://www.example.com/test/index.html?param1=4&param2=3&param3=2&param4=1&param5=3`)
//need a clone of the searchParams
//otherwise looping while iterating over
//it will cause problems
var params = new URLSearchParams(url.searchParams.toString());
for(let param of params){
if(param[1]==3){
url.searchParams.delete(param[0]);
}
}
console.log(url.href)
There is a way to do this with a single regex, using some magic, but I believe that would require using lookbehinds, which most JavaScript regex engines mostly don't yet support. As an alternative, we can try splitting the query string, then just examining each component to see if the value be 3. If so, then we remove that query parameter.
var url = "https://www.example.com/test/index.html?param1=4&param2=3&param3=2&param4=1&param5=3";
var parts = url.split(/\?/);
var params = parts[1].replace(/^.*\?/, "").split(/&/);
var param_out = "";
params.forEach(function(x){
if (!/.*=3$/.test(x))
param_out += x;
});
url = parts[0] + (param_out !== "" ? "?" + param_out : "");
console.log(url);
You could use a regular expression replace. Split off the query string and then .replace &s (or the initial ^) up until =3s:
const str = 'https://www.example.com/test/index.html?param1=4&param2=3&param3=2&param4=1&param5=3';
const [base, qs] = str.split('?');
const replacedQs = qs.replace(/(^|&)[^=]+=3\b/g, '');
const output = base + (replacedQs ? '?' + replacedQs : '');
console.log(output);

How to strip / replace something from a URL?

I have this URL
http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb
I want to replace the last part of my URL which is c939c38adcf1873299837894214a35eb with something else.
How can I do it?
Try this:
var url = 'http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb';
somethingelse = 'newhash';
var newUrl = url.substr(0, url.lastIndexOf('/') + 1) + somethingelse;
Note, using the built-in substr and lastIndexOf is far quicker and uses less memory than splitting out the component parts to an Array or using a regular expression.
You can follow this steps:
split the URL with /
replace the last item of array
join the result array using /
var url = 'http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb';
var res = url.split('/');
res[res.length-1] = 'someValue';
res = res.join('/');
console.log(res);
Using replace we can try:
var url = "http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb";
var replacement = 'blah';
url = url.replace(/(http.*\/).*/, "$1" + replacement);
console.log(url);
We capture everything up to and including the final path separator, then replace with that captured fragment and the new replacement.
Complete guide:
// url
var urlAsString = window.location.href;
// split into route parts
var urlAsPathArray = urlAsString.split("/");
// create a new value
var newValue = "routeValue";
// EITHER update the last parameter
urlAsPathArray[urlAsPathArray.length - 1] = newValue;
// OR replace the last parameter
urlAsPathArray.pop();
urlAsPathArray.push(newValue);
// join the array with the slashes
var newUrl = urlAsPathArray.join("/");
// log
console.log(newUrl);
// output
// http://192.168.22.124:3000/temp/box/routeValue
You could use a regular expression like this:
let newUrl = /^.*\//.exec(origUrl)[0] + 'new_ending';

parsing pathname in url.pathname()

I have a url as below
localhost:1340/promotionDetails/pwd1/pwd2?promotion_id=PROM008765
I used url module for parsing the url for the pathname below is the code
var url=require('url').parse('http://localhost:1340/promotionDetails/pwd1/pwd2? promotion_id=PROM008765', true).pathname
console.log(url);
The output that I got is
/promotionDetails/pwd1/pwd2
I used the split function to get the pwd1 and pwd2 from the path.I want to know if there is anyother way to get pwd1 and pwd2 without using the split function.Any help will be really helpful.
You can regex to get the url directories without using split.
var myurl = "localhost:1340/promotionDetails/pwd1/pwd2?promotion_id=PROM008765";
var match = myurl.match(/[^/?]*[^/?]/g);
/* matches everything between / or ?
[ 'localhost:1340',
'promotionDetails',
'pwd1',
'pwd2',
'promotion_id=PROM008765' ]
*/
console.log(match[2]);//pwd1
console.log(match[3]);//pwd2
Updated 2019 ES6 answer:
You can regex to get the url directories without using split.
const myurl = "localhost:1340/promotionDetails/pwd1/pwd2?promotion_id=PROM008765";
const filteredURL = myurl.match(/[^/?]*[^/?]/g).filter((urlParts) => {
return urlParts !== 'promotionDetails' && urlParts !== 'localhost:1340'
})
const [pwd1, pwd2] = filteredURL;
console.log(pwd1)
console.log(pwd2)

Get the last part of an url in Javascript

Using the following URL example, how would I get the obtain the username from it?
http://www.mysite.com/username_here801
A regex solution would be cool.
The following sample only gets the domain name:
var url = $(location).attr('href');
alert(get_domain(url));
function get_domain(url) {
return url.match(/http:\/\/.*?\//);
}
jQuery solutions are also acceptable.
var url = "http://www.mysite.com/username_here801";
var username = url.match(/username_(.+)/)[1];
http://jsfiddle.net/5LHFd/
To always return the text directly after the slash that follows the .com you can do this:
var url = "http://www.mysite.com/username_here801";
var urlsplit = url.split("/");
var username = urlsplit[3];
http://jsfiddle.net/5LHFd/2/
You can access it with document.location.pathname
If a RegEx solution is acceptable, you could try:
function get_path(url) {
// following regex extracts the path from URL
return url.replace(/^https?:\/\/[^\/]+\//i, "").replace(/\/$/, "");
}
You could use your getDomain() function to find out where your pathname start.:
function getUsername(url){
var position = getDomain(url).length + 1;
return url.slice(position);
}

Categories