How to parse a url using Javascript and Regular Expression?

How to parse a url using Javascript and Regular Expression? - javascript

I want to parse some urls's which have the following format :-
var url ="http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a"
Its not necessary that the domain name and other parts would be same for all url's, they can vary i.e I am looking at a general solution.
Basically I want to strip off all the other things and get only the part:
/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p
I thought to parse this using JavaScript and Regular Expression
I am doing like this:
var mapObj = {"/^(http:\/\/)?.*?\//":"","(&mycracker.+)":"","(&ref.+)":""};
var re = new RegExp(Object.keys(mapObj).join("|"),"gi");
url = url.replace(re, function(matched){
return mapObj[matched];
});
But its returning this
http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43pundefined
Where am I not doing the correct thing? Or is there another approach with an even easier solution?

You can use :
/(?:https?:\/\/[^\/]*)(\/.*?)(?=\&mycracker)/
Code :
var s="http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a";
var ss=/(?:https?:\/\/[^\/]*)(\/.*?)(?=\&mycracker)/;
console.log(s.match(ss)[1]);
Demo
Fiddle Demo
Explanation :

Why don't you just map a split array?
You don't quite need to regex the URL, but you will have to run an if statement inside the loop to remove specific GET params from them. In this particular case (key word particular) you just have to substring till the indexOf "&mycracker"
var url ="http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a"
var x = url.split("/");
var y = [];
x.map(function(data,index) { if (index >= 3) y.push(data); });
var path = "/"+y.join("/");
path = path.substring(0,path.indexOf("&mycracker"));

Change the following code a little bit and you can retrieve any parameter:
var url = "http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a"
var re = new RegExp(/http:\/\/[^?]+/);
var part1 = url.match(re);
var remain = url.replace(re, '');
//alert('Part1: ' + part1);
var rf = remain.split('&');
// alert('Part2: ' + rf);
var part2 = '';
for (var i = 0; i < rf.length; i++)
if (rf[i].match(/(p%5B%5D|sid)=/))
part2 += rf[i] + '&';
part2 = part2.replace(/&$/, '');
//alert(part2)
url = part1 + part2;
alert(url);

var url ="http://www.example.com/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a";
var newAddr = url.substr(22,url.length);
// newAddr == "/cooks/cooking-dress-wine/~no-order/pr?p%5B%5D=sort%3Dfeatured&sid=bks%2C43p&mycracker=ch_vn_clothing_subcategory_Puma&ref=b41c8097-8efe-4acf-8919-0fa81bcb590a"
22 is where to start slicing up the string.
url.length is how much of it to include.
This works as long as the domain name remains the same on the links.

Related

How to remove `&` sign and all text after that from url

I have this URL
https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id1=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request
Here I am getting sys_id two times with different parameters. So I need to remove the second & sign and all text after that.
I tried this
location.href.split('&')[2]
I am sure it doesn't work. Can anyone provide some better solution?

Firstly, you should split the string into an array then use slice to set the starting index number of the element which is 2 in your case and then join the array again into the string.
Read more about these methods JavaScript String split() Method, jQuery slice() Method and JavaScript Array join() Method
var url = 'https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request';
url = url.split("&").slice(0,2).join("&");
console.log(url);

Maybe like this:
var url='https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request';
var first=url.indexOf('&');
var second=url.indexOf('&',first+1);
var new_url=url.substring(0,second);
console.log(new_url);

You need to find the 2nd occurrence of &sys_id. From there onwards remove all text.
Below is working code:
let url='https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request';
let str1=url.indexOf('&sys_id');
let str2=url.indexOf('&sys_id',str1+1);
console.log(url.substring(0,str2));

This is a bit more verbose, but it handles all duplicate query params regardless of their position in the URL.
function removeDuplicateQueryParams(url) {
var params = {};
var parsedParams = '';
var hash = url.split('#'); // account for hashes
var parts = hash[0].split('?');
var origin = parts[0];
var retURL;
// iterate over all query params
parts[1].split('&').forEach(function(param){
// Since Objects can only have one key of the same name, this will inherently
// filter out duplicates and keep only the latest value.
// The key is param[0] and value is param[1].
param = param.split('=');
params[param[0]] = param[1];
});
Object.keys(params).forEach(function(key, ndx){
parsedParams += (ndx === 0)
? '?' + key +'='+ params[key]
: '&' + key +'='+ params[key];
});
return origin + parsedParams + (hash[1] ? '#'+hash[1] : '');
}
console.log( removeDuplicateQueryParams('http://fake.com?q1=fu&bar=fu&q1=fu&q1=diff') );
console.log( removeDuplicateQueryParams('http://fake.com?q1=fu&bar=fu&q1=fu&q1=diff#withHash') );

var url = "https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id1=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request"
url = url.slice(0, url.indexOf('&', url.indexOf('&') + 1));
console.log(url);
Try this :)

Try this:
var yourUrl = "https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request"
var indexOfFirstAmpersand = yourUrl.search("&"); //find index of first &
var indexOfSecondAmpersand = indexOfFirstAmpersand + yourUrl.substring((indexOfFirstAmpersand + 1)).search("&") + 1; //get index of second &
var fixedUrl = yourUrl.substring(0, indexOfSecondAmpersand)
$(".answer").text(fixedUrl);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<p class="answer">
</p>

You can manipulate the url using String.prototype.substring method. In the example below I created a function that takes a url string and checks for a duplicate parameter - it returns a new string with the second occurrence removed.
var url = "https://myApp-ajj.com/sp?id=cat_item&sys_id=cf9f149cdbd25f00d080591e5e961920&sys_id=cf9f149cdbd25f00d080591e5e961920&sysp_Id=a691acd9dbdf1bc0e9619fb&sysparm_CloneTable=sc_request&sysparm_CloneTable=sc_request";
function stripDuplicateUrlParameter(url, parameterName) {
//get the start index of the repeat occurrance
var repeatIdx = url.lastIndexOf('sys_id');
var prefix = url.substring(0, repeatIdx);
var suffix = url.substring(repeatIdx);
//remove the duplicate part from the string
suffix = suffix.substring(suffix.indexOf('&') + 1);
return prefix + suffix;
}
console.log(stripDuplicateUrlParameter(url));
This solves your specific problem, but wouldn't work if the parameter occurred more than twice or if the second occurrence of the string wasn't immediately following the first - you would probably write something more sophisticated.
As someone already asked - why is the url parameter being duplicated in the string anyway? Is there some way to fix that? (because the question asked seems to me to be a band-aid solution with this being the root issue).

How to strip / replace something from a URL?

I have this URL
http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb
I want to replace the last part of my URL which is c939c38adcf1873299837894214a35eb with something else.
How can I do it?

Try this:
var url = 'http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb';
somethingelse = 'newhash';
var newUrl = url.substr(0, url.lastIndexOf('/') + 1) + somethingelse;
Note, using the built-in substr and lastIndexOf is far quicker and uses less memory than splitting out the component parts to an Array or using a regular expression.

You can follow this steps:
split the URL with /
replace the last item of array
join the result array using /
var url = 'http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb';
var res = url.split('/');
res[res.length-1] = 'someValue';
res = res.join('/');
console.log(res);

Using replace we can try:
var url = "http://192.168.22.124:3000/temp/box/c939c38adcf1873299837894214a35eb";
var replacement = 'blah';
url = url.replace(/(http.*\/).*/, "$1" + replacement);
console.log(url);
We capture everything up to and including the final path separator, then replace with that captured fragment and the new replacement.

Complete guide:
// url
var urlAsString = window.location.href;
// split into route parts
var urlAsPathArray = urlAsString.split("/");
// create a new value
var newValue = "routeValue";
// EITHER update the last parameter
urlAsPathArray[urlAsPathArray.length - 1] = newValue;
// OR replace the last parameter
urlAsPathArray.pop();
urlAsPathArray.push(newValue);
// join the array with the slashes
var newUrl = urlAsPathArray.join("/");
// log
console.log(newUrl);
// output
// http://192.168.22.124:3000/temp/box/routeValue

You could use a regular expression like this:
let newUrl = /^.*\//.exec(origUrl)[0] + 'new_ending';

How can I extract a part of this url with JavaScript?

I have an url that looks like this:
http://localhost/assets/upload/img/hw6dNDBT-36x36.jpg
I want to extract hw6dNDBT.jpg, from the url above.
I tried playing around with regex patterns /img\/.*-/ but that
matches with img/hw6dNDBT-.
How can I do this in JavaScript?

try this:
var url = 'http://localhost/assets/upload/img/hw6dNDBT-36x36.jpg';
var filename = url.match(/img\/(.*)-[^.]+(\.[^.]+)/).slice(1).join('');
document.body.innerHTML = filename;

i would use split() method:
var str = "http://localhost/assets/upload/img/hw6dNDBT-36x36.jpg";
var strArr = str.split("/");
var size = strArr.length - 1;
var needle = strArr[size].split("-");
var fileTypeArr = strArr[size].split(".");
var name = needle[0]+"."+fileTypeArr[fileTypeArr.length-1];
name should now be your searched String so far it contains no / inside it
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split

/[^\/]+$/ should match all characters after the last / in the URL, which seems to be what you want to match.

No regex:
//this is a hack that lets the anchor tag do some parsing for you
var parser = document.createElement('a');
parser.href = 'http://localhost/assets/upload/img/hw6dNDBT-36x36.jpg';
//optional if you know you can always trim the start of the path
var path = parser.pathname.replace('/assets/uploads/');
var parts = path.split('/');
var img = '';
for(var i=0; i<parts.length; i++) {
if (parts[i] == 'img') {
//since we know the .jpg always follows 'img/'
img = parts[i+1];
}
}

Ah, you were so close! You just need to take your regex and use a capturing group, and then add a littttle bit more!
img\/(.*)-.*(\..*)
So, you can use that in this manner:
var result = /img\/(.*)-.*(\..*)/.exec();
var filename = result[1] + result[2];
Honestly capturing the .jpg, is a little excessive, if you know they are all going to be JPG images, you can probably just take out the second half of the regex.
Incase you are wondering, why do we uses result[1] and result[2]? Because result[0] stores the entire match, which is what you were getting back. The captured groups, which is what we create when we use the parentheses, are stored as the indexes after 0.

Here is some one-liner:
var myUrl = 'http://localhost/assets/upload/img/hw6dNDBT-36x36.jpg',
myValue = myUrl.split('/').pop().replace(/-(?=\d).[^.]+/,'');
We take everything after the last slash then cut out the dimension part.

How can I remove part of a url after a specific keyword?

For example:
example.com/fun/browse/apples/bananas
example.com/browse/gerbals/cooties
How can I find the keyword "browse", regardless of where it is in the url, and remove the following url part. In the above cases that would be "apples" and "gerbals"
I tried spliting it by the "/" and getting the indexOf browse, then removing the next item, but I cant seem to join everything together because that creates a double "//" in the new url.
Any help would be appreciated.
Javascript and jQuery both ok.
NOTE: I do not want to remove any other part of the url. I want to keep everything. I want to only remove the part of the url immediately after browse.

Your question is unclear but let's try :
var s = 'example.com/fun/browse/apples/bananas';
s.replace(/(\/browse)\/[^\/]+/, '$1'); // "example.com/fun/browse/bananas"
Also check this helper :
function removeAfter(s, keyword) {
return s.replace(
new RegExp('(\/' + keyword + ')\/[^\/]+'), '$1'
);
}
Usage :
var s = 'example.com/browse/gerbals/cooties';
removeAfter(s, 'browse'); // "example.com/browse/cooties"
removeAfter(s, 'gerbals'); // "example.com/browse/gerbals"
Demo : http://jsfiddle.net/wared/VRJtL/.

remove browser by using .splice() & rejoin it.
var arr = "example.com/fun/browse/apples/bananas".split('/');
var index = arr.indexOf("browse");
arr.splice(index+1,1); //removes apples
var URL = arr.join('/'); //joins back
result: "example.com/fun/browse/bananas"

Split the URL on the Keyword...
example:
var url = "http://www.foo.com/bar/alpha/beta";
var keyword = "alpha";
var result = url.split(keyword)[0];
//result = "http://www.foo.com/bar/" + keyword;
//adding the keyword is if you need the keyword in your response.

If you're not trying to remove 'fun' part, it's really simple:
var url = 'example.com/fun/browse/apples/bananas';
var result = url.replace(/browse\/[a-zA-Z\/]+/, 'browse/gerbals/cooties');

I'll throw out another option, just to make it interesting. :)
var url = "http://example.com/fun/browse/apples/bananas";
var targetWord = "browse";
var regexPattern = new RegExp("(^.*" + targetWord + "/?)[^/]*/?(.*$)");
var newURL = "";
var matchedURLparts = regexPattern.exec(url);
if (matchedURLparts) {
newURL = (matchedURLparts.length > 2) ? matchedURLparts[1] + matchedURLparts[2] : matchedURLparts[1];
}
else {
newURL = url;
}

Removing a string in javascript

I have a URL say
dummy URL
http://www.google.com/?v=as12&&src=test&img=test
Now I want to remove the &src=test& part alone.I know we can use indexof but somehow I could not get the idea of getting the next ampersand(&) and removing that part alone.
Any help.The new URL should look like
http://www.google.com/?v=as12&img=test

What about using this?:
http://jsfiddle.net/RMaNd/8/
var mystring = "http://www.google.com/?v=as12&&src=test&img=test";
mystring = mystring.replace(/&src=.+&/, ""); // Didn't realize it isn't necessary to escape "&"
alert(mystring);
This assumes that "any" character will come after the "=" and before the next "&", but you can always change the . to a character set if you know what it could be - using [ ]
This also assumes that there will be at least 1 character after the "=" but before the "&" - because of the +. But you can change that to * if you think the string could be "src=&img=test"
UPDATE:
Using split might be the correct choice for this problem, but only if the position of src=whatever is still after "&&" but unknown...for example, if it were "&&img=test&src=test". So as long as the "&&" is always there to separate the static part from the part you want to update, you can use something like this:
http://jsfiddle.net/Y7LdG/
var mystring1 = "http://www.google.com/?v=as12&&src=test&img=test";
var mystring2 = "http://www.google.com/?v=as12&&img=test&src=test";
var final1 = removeSrcPair(mystring1);
alert(final1);
var final2 = removeSrcPair(mystring2);
alert(final2);
function replaceSrc(str) {
return str.replace(/src=.*/g, "");
}
function removeSrcPair(orig) {
var the_split = orig.split("&&");
var split_second = the_split[1].split("&");
for (var i = split_second.length-1; i >= 0; i--) {
split_second[i] = replaceSrc(split_second[i]);
if (split_second[i] === "") {
split_second.splice(i, 1);
}
}
var joined = split_second.join("&");
return the_split[0] + "&" + joined;
}
This still assumes a few things - the main split is "&&"...the key is "src", then comes "=", then 0 or more characters...and of course, the key/value pairs are separated by "&". If your problem isn't this broad, then my first answer seems fine. If "src=test" won't always come first after "&&", you'd need to use a more "complex" Regex or this split method.

Something like:
url = "http://www.google.com/?v=as12&&src=test&img=test"
firstPart = url.split('&&')[0];
lastPart = url.split('&&')[1];
lastPart = lastPart.split('&')[1];
newUrl = firstPart+'&'+lastPart;
document.write(newUrl);

Details: Use the split method.
Solution Edited: I changed the below to test that the last query string exists
var url = "http://www.google.com/?v=as12&&src=test&img=test";
var newUrl;
var splitString = url.split('&');
if (splitString.length > 3)
{
newURL = splitString[0] + "&" + splitString[3];
}
else
{
newURL = splitString[0];
}

We Keep Coding

JavaScript is the programming language of the Web.

How to parse a url using Javascript and Regular Expression? - javascript

Related

How to remove `&` sign and all text after that from url

How to strip / replace something from a URL?

How can I extract a part of this url with JavaScript?

How can I remove part of a url after a specific keyword?

Removing a string in javascript

Categories

Resources