javascript retrieve all links from text [duplicate] - javascript

Does anyone have suggestions for detecting URLs in a set of strings?
arrayOfStrings.forEach(function(string){
// detect URLs in strings and do something swell,
// like creating elements with links.
});
Update: I wound up using this regex for link detection… Apparently several years later.
kLINK_DETECTION_REGEX = /(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gi
The full helper (with optional Handlebars support) is at gist #1654670.

First you need a good regex that matches urls. This is hard to do. See here, here and here:
...almost anything is a valid URL. There
are some punctuation rules for
splitting it up. Absent any
punctuation, you still have a valid
URL.
Check the RFC carefully and see if you
can construct an "invalid" URL. The
rules are very flexible.
For example ::::: is a valid URL.
The path is ":::::". A pretty
stupid filename, but a valid filename.
Also, ///// is a valid URL. The
netloc ("hostname") is "". The path
is "///". Again, stupid. Also
valid. This URL normalizes to "///"
which is the equivalent.
Something like "bad://///worse/////"
is perfectly valid. Dumb but valid.
Anyway, this answer is not meant to give you the best regex but rather a proof of how to do the string wrapping inside the text, with JavaScript.
OK so lets just use this one: /(https?:\/\/[^\s]+)/g
Again, this is a bad regex. It will have many false positives. However it's good enough for this example.
function urlify(text) {
var urlRegex = /(https?:\/\/[^\s]+)/g;
return text.replace(urlRegex, function(url) {
return '' + url + '';
})
// or alternatively
// return text.replace(urlRegex, '$1')
}
var text = 'Find me at http://www.example.com and also at http://stackoverflow.com';
var html = urlify(text);
console.log(html)
// html now looks like:
// "Find me at http://www.example.com and also at http://stackoverflow.com"
So in sum try:
$$('#pad dl dd').each(function(element) {
element.innerHTML = urlify(element.innerHTML);
});

Here is what I ended up using as my regex:
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
This doesn't include trailing punctuation in the URL. Crescent's function works like a charm :)
so:
function linkify(text) {
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(urlRegex, function(url) {
return '' + url + '';
});
}

I googled this problem for quite a while, then it occurred to me that there is an Android method, android.text.util.Linkify, that utilizes some pretty robust regexes to accomplish this. Luckily, Android is open source.
They use a few different patterns for matching different types of urls. You can find them all here:
http://grepcode.com/file/repository.grepcode.com/java/ext/com.google.android/android/2.0_r1/android/text/util/Regex.java#Regex.0WEB_URL_PATTERN
If you're just concerned about url's that match the WEB_URL_PATTERN, that is, urls that conform to the RFC 1738 spec, you can use this:
/((?:(http|https|Http|Https|rtsp|Rtsp):\/\/(?:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,64}(?:\:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,25})?\#)?)?((?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]{0,64}\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|(?:jobs|j[emop])|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\:\d{1,5})?)(\/(?:(?:[a-zA-Z0-9\;\/\?\:\#\&\=\#\~\-\.\+\!\*\'\(\)\,\_])|(?:\%[a-fA-F0-9]{2}))*)?(?:\b|$)/gi;
Here is the full text of the source:
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\#)?)?"
+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ "(?:" // plus top level domain
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(?:biz|b[abdefghijmnorstvwyz])"
+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(?:edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(?:gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(?:info|int|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(?:name|net|n[acefgilopruz])"
+ "|(?:org|om)"
+ "|(?:pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw]))"
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9])))"
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\#\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)";
If you want to be really fancy, you can test for email addresses as well. The regex for email addresses is:
/[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}\\#[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}(\\.[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25})+/gi
PS: The top level domains supported by above regex are current as of June 2007. For an up to date list you'll need to check https://data.iana.org/TLD/tlds-alpha-by-domain.txt.

Based on Crescent Fresh answer
if you want to detect links with http:// OR without http:// and by www. you can use the following
function urlify(text) {
var urlRegex = /(((https?:\/\/)|(www\.))[^\s]+)/g;
//var urlRegex = /(https?:\/\/[^\s]+)/g;
return text.replace(urlRegex, function(url,b,c) {
var url2 = (c == 'www.') ? 'http://' +url : url;
return '' + url + '';
})
}

This library on NPM looks like it is pretty comprehensive https://www.npmjs.com/package/linkifyjs
Linkify is a small yet comprehensive JavaScript plugin for finding URLs in plain-text and converting them to HTML links. It works with all valid URLs and email addresses.

Function can be further improved to render images as well:
function renderHTML(text) {
var rawText = strip(text)
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return rawText.replace(urlRegex, function(url) {
if ( ( url.indexOf(".jpg") > 0 ) || ( url.indexOf(".png") > 0 ) || ( url.indexOf(".gif") > 0 ) ) {
return '<img src="' + url + '">' + '<br/>'
} else {
return '' + url + '' + '<br/>'
}
})
}
or for a thumbnail image that links to fiull size image:
return '<img style="width: 100px; border: 0px; -moz-border-radius: 5px; border-radius: 5px;" src="' + url + '">' + '' + '<br/>'
And here is the strip() function that pre-processes the text string for uniformity by removing any existing html.
function strip(html)
{
var tmp = document.createElement("DIV");
tmp.innerHTML = html;
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return tmp.innerText.replace(urlRegex, function(url) {
return '\n' + url
})
}

There is existing npm package: url-regex, just install it with yarn add url-regex or npm install url-regex and use as following:
const urlRegex = require('url-regex');
const replaced = 'Find me at http://www.example.com and also at http://stackoverflow.com or at google.com'
.replace(urlRegex({strict: false}), function(url) {
return '' + url + '';
});

let str = 'https://example.com is a great site'
str.replace(/(https?:\/\/[^\s]+)/g,"<a href='$1' target='_blank' >$1</a>")
Short Code Big Work!...
Result:-
<a href="https://example.com" target="_blank" > https://example.com </a>

If you want to detect links with http:// OR without http:// OR ftp OR other possible cases like removing trailing punctuation at the end, take a look at this code.
https://jsfiddle.net/AndrewKang/xtfjn8g3/
A simple way to use that is to use NPM
npm install --save url-knife

Detect URLs in text and make clickable.
const detectURLInText = ( contentElement ) => {
const elem = document.querySelector(contentElement);
elem.innerHTML = elem.innerHTML.replace(/(https?:\/\/[^\s]+)/g, `<a class='link' href="$1">$1</a>`)
return elem
}
detectURLInText( '#myContent');
<div id="myContent">
Hell world!, detect URLs in text and make clickable.
IP: https://123.0.1.890:8080
Web: https://any-domain.com
</div>

try this:
function isUrl(s) {
if (!isUrl.rx_url) {
// taken from https://gist.github.com/dperini/729294
isUrl.rx_url=/^(?:(?:https?|ftp):\/\/)?(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?$/i;
// valid prefixes
isUrl.prefixes=['http:\/\/', 'https:\/\/', 'ftp:\/\/', 'www.'];
// taken from https://w3techs.com/technologies/overview/top_level_domain/all
isUrl.domains=['com','ru','net','org','de','jp','uk','br','pl','in','it','fr','au','info','nl','ir','cn','es','cz','kr','ua','ca','eu','biz','za','gr','co','ro','se','tw','mx','vn','tr','ch','hu','at','be','dk','tv','me','ar','no','us','sk','xyz','fi','id','cl','by','nz','il','ie','pt','kz','io','my','lt','hk','cc','sg','edu','pk','su','bg','th','top','lv','hr','pe','club','rs','ae','az','si','ph','pro','ng','tk','ee','asia','mobi'];
}
if (!isUrl.rx_url.test(s)) return false;
for (let i=0; i<isUrl.prefixes.length; i++) if (s.startsWith(isUrl.prefixes[i])) return true;
for (let i=0; i<isUrl.domains.length; i++) if (s.endsWith('.'+isUrl.domains[i]) || s.includes('.'+isUrl.domains[i]+'\/') ||s.includes('.'+isUrl.domains[i]+'?')) return true;
return false;
}
function isEmail(s) {
if (!isEmail.rx_email) {
// taken from http://stackoverflow.com/a/16016476/460084
var sQtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
var sDtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
var sAtom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
var sQuotedPair = '\\x5c[\\x00-\\x7f]';
var sDomainLiteral = '\\x5b(' + sDtext + '|' + sQuotedPair + ')*\\x5d';
var sQuotedString = '\\x22(' + sQtext + '|' + sQuotedPair + ')*\\x22';
var sDomain_ref = sAtom;
var sSubDomain = '(' + sDomain_ref + '|' + sDomainLiteral + ')';
var sWord = '(' + sAtom + '|' + sQuotedString + ')';
var sDomain = sSubDomain + '(\\x2e' + sSubDomain + ')*';
var sLocalPart = sWord + '(\\x2e' + sWord + ')*';
var sAddrSpec = sLocalPart + '\\x40' + sDomain; // complete RFC822 email address spec
var sValidEmail = '^' + sAddrSpec + '$'; // as whole string
isEmail.rx_email = new RegExp(sValidEmail);
}
return isEmail.rx_email.test(s);
}
will also recognize urls such as google.com , http://www.google.bla , http://google.bla , www.google.bla but not google.bla

Generic Object Oriented Solution
For people like me that use frameworks like angular that don't allow manipulating DOM directly, I created a function that takes a string and returns an array of url/plainText objects that can be used to create any UI representation that you want.
URL regex
For URL matching I used (slightly adapted) h0mayun regex: /(?:(?:https?:\/\/)|(?:www\.))[^\s]+/g
My function also drops punctuation characters from the end of a URL like . and , that I believe more often will be actual punctuation than a legit URL ending (but it could be! This is not rigorous science as other answers explain well) For that I apply the following regex onto matched URLs /^(.+?)([.,?!'"]*)$/.
Typescript code
export function urlMatcherInText(inputString: string): UrlMatcherResult[] {
if (! inputString) return [];
const results: UrlMatcherResult[] = [];
function addText(text: string) {
if (! text) return;
const result = new UrlMatcherResult();
result.type = 'text';
result.value = text;
results.push(result);
}
function addUrl(url: string) {
if (! url) return;
const result = new UrlMatcherResult();
result.type = 'url';
result.value = url;
results.push(result);
}
const findUrlRegex = /(?:(?:https?:\/\/)|(?:www\.))[^\s]+/g;
const cleanUrlRegex = /^(.+?)([.,?!'"]*)$/;
let match: RegExpExecArray;
let indexOfStartOfString = 0;
do {
match = findUrlRegex.exec(inputString);
if (match) {
const text = inputString.substr(indexOfStartOfString, match.index - indexOfStartOfString);
addText(text);
var dirtyUrl = match[0];
var urlDirtyMatch = cleanUrlRegex.exec(dirtyUrl);
addUrl(urlDirtyMatch[1]);
addText(urlDirtyMatch[2]);
indexOfStartOfString = match.index + dirtyUrl.length;
}
}
while (match);
const remainingText = inputString.substr(indexOfStartOfString, inputString.length - indexOfStartOfString);
addText(remainingText);
return results;
}
export class UrlMatcherResult {
public type: 'url' | 'text'
public value: string
}

Here is a little solution for react app without using any library please note that this method work if the url is not attached to any character
this component will return a paragraph with kink detection !
import React from "react";
interface Props {
paragraph: string,
}
const REGEX = /^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/gm;
const Paragraph: React.FC<Props> = ({ paragraph }) => {
const paragraphArray = paragraph.split(' ');
return <div>
{
paragraphArray.map((word: any) => {
return word.match(REGEX) ? (
<>
{word} {' '}
</>
) : word + ' '
})
}
</div>;
};
export default LinkParaGraph;

tmp.innerText is undefined. You should use tmp.innerHTML
function strip(html)
{
var tmp = document.createElement("DIV");
tmp.innerHTML = html;
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return tmp.innerHTML .replace(urlRegex, function(url) {
return '\n' + url
})

You can use a regex like this to extract normal url patterns.
(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})
If you need more sophisticated patterns, use a library like this.
https://www.npmjs.com/package/pattern-dreamer

Related

JS/JQUERY: How to match and replace occurances inside specified strings?

I swear i tried figuring this out myself all day, but my regex-foo is just not that good.
I'm trying to create a small parser function to convert strings with urls to html coded and tags
I know how complex a regex can be trying to figure out which urls to covert to what from a big string, so what I did is simply prefix the string to covert with a flag to tell the parser how to format it, and post fix it with the ";" char to tell the parser where that particular URL ends. This way the parser has lesser guest work to do resulting in easier to regex-match and faster for execution. I really dont need a generalize match and replace all.
So my formatting is as follows, where "X" is the url string:
For URLs it will be url=X;
For IMAGES it will be img=X;
so anything in between my prefix and post fix must be converted accordingly..
So for example, for images in my document, the string could be:
click this image img=http://example.com/image1.jpg;
and i need that converted to
click this image <a href="http://example.com/image1.jpg" target="_blank">
<img class="img img-responsive" src="http://example.com/image1.jpg"/></a>
I am able to do this easily in PHP buy preg_match() function
preg_match('/\img=(.+?)\;/i', $item_des, $matches)
here's the code block:
I decided to push this routine to the browser instead of the backend (PHP) so i need similar or better JS solution.
Hoping anyone can help here, thanks!
try code below:
var str = "click this image img=http://example.com/image1.jpg;image2 img=http://example.com/image2.jpg;"
var phrases = str.split(';');
var totalRes = '';
phrases.forEach(function(str){
totalRes += processPhrase(str);
});
console.log(totalRes);
function processPhrase(str) {
var img = str.split('img=')
var res = '';
if (img.length > 1) { //img=X
var url = img[1].replace(';', '');
res = img[0] + "<a href='" + url + "' target='_blank'><img src='" + url + "'/></a>";
} else {
var url = str.split('url=');
//Do for url=X here
}
console.info(res);
return res;
}
You can use this regexp /(img|url)=(.+?);/g:
(img|url) : the type, should be grouped so we will know what to do with the value
= : literal "="
(.+?) : a number of characters (use the non-greedy ? so it will match as fewer as possible)
; : literal ";"
Read more about non-greedy regexps here.
Example:
var str = "click this image img=http://i.imgur.com/3wY30O4.jpg?a=123&b=456; and check this URL url=http://google.com/;. Bye!";
// executer is an object that has functions that apply the changes for each type (you can modify the functions for your need)
var executer = {
"url": function(e) {
return '<a target="_blank" href="' + e + '">' + e + '</a>';
},
"img": function(e) {
return '<a target="_blank" href="' + e + '"><img src="' + e + '"/></a>';
}
}
var res = str.replace(/(img|url)=(.+?);/g, function(m, type, value) {
return executer[type](value); // executer[type] will be either executer.url or executer.img, then we pass the value to that function and return its returned value
});
console.log(res);

Regex mapping in Firefox context menu contentScript

I am developing context menu add-on for Firefox. I am trying to get the selectedText and validate if it is a number. If it is a number i am using that number value to process further.
But, i got stuck at a point where i am trying to replace [(,)] using regex in javascript replace method.
Following is the code which fails to map any number starting/ending with ( or ):
var menuItemLRG = contextMenu.Item({
label: "LRG",
data: "http://myurl/&val=:",
contentScript: 'self.on("click", function (node, data) {' +
' var selectedText = window.getSelection().toString();' +
' var formattedText1 = selectedText.trim();' +
' var formattedText2 = formattedText1.replace(/^[,\[\]()]*/g,"");' +
' var formattedText3 = formattedText2.replace(/[,\[\]()]*$/g,"");' +
' console.log(formattedText3); '+
' var regExp = new RegExp(/^[0-9]+$/);' +
' if (regExp.test(formattedText3) == true) {' +
' console.log("URL to follow :"+data+formattedText3);' +
' window.open(data+formattedText3);' +
' } '+
'});'
});
Above code fails to replace ( or ) in sample inputs: (5663812, 11620033).
But, a vanilla test like the following succeeds:
<script>
var str = "(2342423,])";
var tmpVal1 = str.replace(/^[,\[\]()]*/g,"");
var tmpVal2 = tmpVal1.replace(/[,\[\]()]*$/g,"");
var regExp = new RegExp(/^[0-9]+$/);
if (regExp.test(tmpVal2) == true) {
alert(tmpVal2);
}
</script>
After many trial and error found the issue. When we try to escape a character inside a single quotes we need to add one more escape for the escape character to get recognized, otherwise the single escape \] will be considered as ] which leads to abrupt ending of of the regex pattern.
In this case:
' var formattedText2 = formattedText1.replace(/^[,\[\]()]*/g,"");'
is decoded as :
var formattedText2 = formattedText1.replace(/^[,[]()]*/g,"");
instead of as:
var formattedText2 = formattedText1.replace(/^[,\[\]()]*/g,"");
So, by adding one more escape character for an escape character resolved the pattern correctly:
' var formattedText2 = formattedText1.replace(/^[,\\[\\]()]*/g,"");'
Sorry for wasting your time in analyzing the cause, if any.

extract URI from given text (facebook like URL detection) [duplicate]

Does anyone have suggestions for detecting URLs in a set of strings?
arrayOfStrings.forEach(function(string){
// detect URLs in strings and do something swell,
// like creating elements with links.
});
Update: I wound up using this regex for link detection… Apparently several years later.
kLINK_DETECTION_REGEX = /(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gi
The full helper (with optional Handlebars support) is at gist #1654670.
First you need a good regex that matches urls. This is hard to do. See here, here and here:
...almost anything is a valid URL. There
are some punctuation rules for
splitting it up. Absent any
punctuation, you still have a valid
URL.
Check the RFC carefully and see if you
can construct an "invalid" URL. The
rules are very flexible.
For example ::::: is a valid URL.
The path is ":::::". A pretty
stupid filename, but a valid filename.
Also, ///// is a valid URL. The
netloc ("hostname") is "". The path
is "///". Again, stupid. Also
valid. This URL normalizes to "///"
which is the equivalent.
Something like "bad://///worse/////"
is perfectly valid. Dumb but valid.
Anyway, this answer is not meant to give you the best regex but rather a proof of how to do the string wrapping inside the text, with JavaScript.
OK so lets just use this one: /(https?:\/\/[^\s]+)/g
Again, this is a bad regex. It will have many false positives. However it's good enough for this example.
function urlify(text) {
var urlRegex = /(https?:\/\/[^\s]+)/g;
return text.replace(urlRegex, function(url) {
return '' + url + '';
})
// or alternatively
// return text.replace(urlRegex, '$1')
}
var text = 'Find me at http://www.example.com and also at http://stackoverflow.com';
var html = urlify(text);
console.log(html)
// html now looks like:
// "Find me at http://www.example.com and also at http://stackoverflow.com"
So in sum try:
$$('#pad dl dd').each(function(element) {
element.innerHTML = urlify(element.innerHTML);
});
Here is what I ended up using as my regex:
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
This doesn't include trailing punctuation in the URL. Crescent's function works like a charm :)
so:
function linkify(text) {
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return text.replace(urlRegex, function(url) {
return '' + url + '';
});
}
I googled this problem for quite a while, then it occurred to me that there is an Android method, android.text.util.Linkify, that utilizes some pretty robust regexes to accomplish this. Luckily, Android is open source.
They use a few different patterns for matching different types of urls. You can find them all here:
http://grepcode.com/file/repository.grepcode.com/java/ext/com.google.android/android/2.0_r1/android/text/util/Regex.java#Regex.0WEB_URL_PATTERN
If you're just concerned about url's that match the WEB_URL_PATTERN, that is, urls that conform to the RFC 1738 spec, you can use this:
/((?:(http|https|Http|Https|rtsp|Rtsp):\/\/(?:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,64}(?:\:(?:[a-zA-Z0-9\$\-\_\.\+\!\*\'\(\)\,\;\?\&\=]|(?:\%[a-fA-F0-9]{2})){1,25})?\#)?)?((?:(?:[a-zA-Z0-9][a-zA-Z0-9\-]{0,64}\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|(?:jobs|j[emop])|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\:\d{1,5})?)(\/(?:(?:[a-zA-Z0-9\;\/\?\:\#\&\=\#\~\-\.\+\!\*\'\(\)\,\_])|(?:\%[a-fA-F0-9]{2}))*)?(?:\b|$)/gi;
Here is the full text of the source:
"((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\#)?)?"
+ "((?:(?:[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}\\.)+" // named host
+ "(?:" // plus top level domain
+ "(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])"
+ "|(?:biz|b[abdefghijmnorstvwyz])"
+ "|(?:cat|com|coop|c[acdfghiklmnoruvxyz])"
+ "|d[ejkmoz]"
+ "|(?:edu|e[cegrstu])"
+ "|f[ijkmor]"
+ "|(?:gov|g[abdefghilmnpqrstuwy])"
+ "|h[kmnrtu]"
+ "|(?:info|int|i[delmnoqrst])"
+ "|(?:jobs|j[emop])"
+ "|k[eghimnrwyz]"
+ "|l[abcikrstuvy]"
+ "|(?:mil|mobi|museum|m[acdghklmnopqrstuvwxyz])"
+ "|(?:name|net|n[acefgilopruz])"
+ "|(?:org|om)"
+ "|(?:pro|p[aefghklmnrstwy])"
+ "|qa"
+ "|r[eouw]"
+ "|s[abcdeghijklmnortuvyz]"
+ "|(?:tel|travel|t[cdfghjklmnoprtvwz])"
+ "|u[agkmsyz]"
+ "|v[aceginu]"
+ "|w[fs]"
+ "|y[etu]"
+ "|z[amw]))"
+ "|(?:(?:25[0-5]|2[0-4]" // or ip address
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]"
+ "|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9])))"
+ "(?:\\:\\d{1,5})?)" // plus option port number
+ "(\\/(?:(?:[a-zA-Z0-9\\;\\/\\?\\:\\#\\&\\=\\#\\~" // plus option query params
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)";
If you want to be really fancy, you can test for email addresses as well. The regex for email addresses is:
/[a-zA-Z0-9\\+\\.\\_\\%\\-]{1,256}\\#[a-zA-Z0-9][a-zA-Z0-9\\-]{0,64}(\\.[a-zA-Z0-9][a-zA-Z0-9\\-]{0,25})+/gi
PS: The top level domains supported by above regex are current as of June 2007. For an up to date list you'll need to check https://data.iana.org/TLD/tlds-alpha-by-domain.txt.
Based on Crescent Fresh answer
if you want to detect links with http:// OR without http:// and by www. you can use the following
function urlify(text) {
var urlRegex = /(((https?:\/\/)|(www\.))[^\s]+)/g;
//var urlRegex = /(https?:\/\/[^\s]+)/g;
return text.replace(urlRegex, function(url,b,c) {
var url2 = (c == 'www.') ? 'http://' +url : url;
return '' + url + '';
})
}
This library on NPM looks like it is pretty comprehensive https://www.npmjs.com/package/linkifyjs
Linkify is a small yet comprehensive JavaScript plugin for finding URLs in plain-text and converting them to HTML links. It works with all valid URLs and email addresses.
Function can be further improved to render images as well:
function renderHTML(text) {
var rawText = strip(text)
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return rawText.replace(urlRegex, function(url) {
if ( ( url.indexOf(".jpg") > 0 ) || ( url.indexOf(".png") > 0 ) || ( url.indexOf(".gif") > 0 ) ) {
return '<img src="' + url + '">' + '<br/>'
} else {
return '' + url + '' + '<br/>'
}
})
}
or for a thumbnail image that links to fiull size image:
return '<img style="width: 100px; border: 0px; -moz-border-radius: 5px; border-radius: 5px;" src="' + url + '">' + '' + '<br/>'
And here is the strip() function that pre-processes the text string for uniformity by removing any existing html.
function strip(html)
{
var tmp = document.createElement("DIV");
tmp.innerHTML = html;
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return tmp.innerText.replace(urlRegex, function(url) {
return '\n' + url
})
}
There is existing npm package: url-regex, just install it with yarn add url-regex or npm install url-regex and use as following:
const urlRegex = require('url-regex');
const replaced = 'Find me at http://www.example.com and also at http://stackoverflow.com or at google.com'
.replace(urlRegex({strict: false}), function(url) {
return '' + url + '';
});
let str = 'https://example.com is a great site'
str.replace(/(https?:\/\/[^\s]+)/g,"<a href='$1' target='_blank' >$1</a>")
Short Code Big Work!...
Result:-
<a href="https://example.com" target="_blank" > https://example.com </a>
If you want to detect links with http:// OR without http:// OR ftp OR other possible cases like removing trailing punctuation at the end, take a look at this code.
https://jsfiddle.net/AndrewKang/xtfjn8g3/
A simple way to use that is to use NPM
npm install --save url-knife
Detect URLs in text and make clickable.
const detectURLInText = ( contentElement ) => {
const elem = document.querySelector(contentElement);
elem.innerHTML = elem.innerHTML.replace(/(https?:\/\/[^\s]+)/g, `<a class='link' href="$1">$1</a>`)
return elem
}
detectURLInText( '#myContent');
<div id="myContent">
Hell world!, detect URLs in text and make clickable.
IP: https://123.0.1.890:8080
Web: https://any-domain.com
</div>
try this:
function isUrl(s) {
if (!isUrl.rx_url) {
// taken from https://gist.github.com/dperini/729294
isUrl.rx_url=/^(?:(?:https?|ftp):\/\/)?(?:\S+(?::\S*)?#)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?$/i;
// valid prefixes
isUrl.prefixes=['http:\/\/', 'https:\/\/', 'ftp:\/\/', 'www.'];
// taken from https://w3techs.com/technologies/overview/top_level_domain/all
isUrl.domains=['com','ru','net','org','de','jp','uk','br','pl','in','it','fr','au','info','nl','ir','cn','es','cz','kr','ua','ca','eu','biz','za','gr','co','ro','se','tw','mx','vn','tr','ch','hu','at','be','dk','tv','me','ar','no','us','sk','xyz','fi','id','cl','by','nz','il','ie','pt','kz','io','my','lt','hk','cc','sg','edu','pk','su','bg','th','top','lv','hr','pe','club','rs','ae','az','si','ph','pro','ng','tk','ee','asia','mobi'];
}
if (!isUrl.rx_url.test(s)) return false;
for (let i=0; i<isUrl.prefixes.length; i++) if (s.startsWith(isUrl.prefixes[i])) return true;
for (let i=0; i<isUrl.domains.length; i++) if (s.endsWith('.'+isUrl.domains[i]) || s.includes('.'+isUrl.domains[i]+'\/') ||s.includes('.'+isUrl.domains[i]+'?')) return true;
return false;
}
function isEmail(s) {
if (!isEmail.rx_email) {
// taken from http://stackoverflow.com/a/16016476/460084
var sQtext = '[^\\x0d\\x22\\x5c\\x80-\\xff]';
var sDtext = '[^\\x0d\\x5b-\\x5d\\x80-\\xff]';
var sAtom = '[^\\x00-\\x20\\x22\\x28\\x29\\x2c\\x2e\\x3a-\\x3c\\x3e\\x40\\x5b-\\x5d\\x7f-\\xff]+';
var sQuotedPair = '\\x5c[\\x00-\\x7f]';
var sDomainLiteral = '\\x5b(' + sDtext + '|' + sQuotedPair + ')*\\x5d';
var sQuotedString = '\\x22(' + sQtext + '|' + sQuotedPair + ')*\\x22';
var sDomain_ref = sAtom;
var sSubDomain = '(' + sDomain_ref + '|' + sDomainLiteral + ')';
var sWord = '(' + sAtom + '|' + sQuotedString + ')';
var sDomain = sSubDomain + '(\\x2e' + sSubDomain + ')*';
var sLocalPart = sWord + '(\\x2e' + sWord + ')*';
var sAddrSpec = sLocalPart + '\\x40' + sDomain; // complete RFC822 email address spec
var sValidEmail = '^' + sAddrSpec + '$'; // as whole string
isEmail.rx_email = new RegExp(sValidEmail);
}
return isEmail.rx_email.test(s);
}
will also recognize urls such as google.com , http://www.google.bla , http://google.bla , www.google.bla but not google.bla
Generic Object Oriented Solution
For people like me that use frameworks like angular that don't allow manipulating DOM directly, I created a function that takes a string and returns an array of url/plainText objects that can be used to create any UI representation that you want.
URL regex
For URL matching I used (slightly adapted) h0mayun regex: /(?:(?:https?:\/\/)|(?:www\.))[^\s]+/g
My function also drops punctuation characters from the end of a URL like . and , that I believe more often will be actual punctuation than a legit URL ending (but it could be! This is not rigorous science as other answers explain well) For that I apply the following regex onto matched URLs /^(.+?)([.,?!'"]*)$/.
Typescript code
export function urlMatcherInText(inputString: string): UrlMatcherResult[] {
if (! inputString) return [];
const results: UrlMatcherResult[] = [];
function addText(text: string) {
if (! text) return;
const result = new UrlMatcherResult();
result.type = 'text';
result.value = text;
results.push(result);
}
function addUrl(url: string) {
if (! url) return;
const result = new UrlMatcherResult();
result.type = 'url';
result.value = url;
results.push(result);
}
const findUrlRegex = /(?:(?:https?:\/\/)|(?:www\.))[^\s]+/g;
const cleanUrlRegex = /^(.+?)([.,?!'"]*)$/;
let match: RegExpExecArray;
let indexOfStartOfString = 0;
do {
match = findUrlRegex.exec(inputString);
if (match) {
const text = inputString.substr(indexOfStartOfString, match.index - indexOfStartOfString);
addText(text);
var dirtyUrl = match[0];
var urlDirtyMatch = cleanUrlRegex.exec(dirtyUrl);
addUrl(urlDirtyMatch[1]);
addText(urlDirtyMatch[2]);
indexOfStartOfString = match.index + dirtyUrl.length;
}
}
while (match);
const remainingText = inputString.substr(indexOfStartOfString, inputString.length - indexOfStartOfString);
addText(remainingText);
return results;
}
export class UrlMatcherResult {
public type: 'url' | 'text'
public value: string
}
Here is a little solution for react app without using any library please note that this method work if the url is not attached to any character
this component will return a paragraph with kink detection !
import React from "react";
interface Props {
paragraph: string,
}
const REGEX = /^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$/gm;
const Paragraph: React.FC<Props> = ({ paragraph }) => {
const paragraphArray = paragraph.split(' ');
return <div>
{
paragraphArray.map((word: any) => {
return word.match(REGEX) ? (
<>
{word} {' '}
</>
) : word + ' '
})
}
</div>;
};
export default LinkParaGraph;
tmp.innerText is undefined. You should use tmp.innerHTML
function strip(html)
{
var tmp = document.createElement("DIV");
tmp.innerHTML = html;
var urlRegex =/(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
return tmp.innerHTML .replace(urlRegex, function(url) {
return '\n' + url
})
You can use a regex like this to extract normal url patterns.
(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})
If you need more sophisticated patterns, use a library like this.
https://www.npmjs.com/package/pattern-dreamer

Deal with query strings in JS, like http_build_query, etc

Here's what I have
if(condition1) {
location.href = location.href+'/?site_type=normal';
}
else if(condition2) {
location.href = location.href+'/?site_type=other';
}
Of course, if the location.href already has query vars on it, thats a problem, etc.
I need to
Find the vars from the query string
if site_type already exists, replace the value with either 'normal' or 'other'
rebuild the url with the new site_type
edit:
I found I needed to account for all kinds of URLs:
domain.com
domain.com/path/to/sth/
domain.com/?site_type=normal
domain.com?var=123&foo=987
domain.com/path/?site_type=normal&var=123&foo=987
So, here's what I came up with, suggestions welcome:
var searchstring = window.location.search;
var url = window.location.href;
console.log('search: ' + searchstring);
console.log( 'url: ' + url);
// strip search from url
url = url.replace(searchstring,"");
console.log( 'url: ' + url);
//strip site_type from search
searchstring = searchstring.replace("&site_type=normal","")
.replace("&site_type=other","")
.replace("?site_type=normal","")
.replace("?site_type=other","")
.replace("?","")
;
console.log('search: ' + searchstring);
if(searchstring != ''){searchstring = '&' + searchstring;}
var final = url + '?site_type=normal' + searchstring;
final = final.replace("&&","&");
console.log('final: ' + final);
You can directly access the query string with window.location.search. You can convert it to an object using this regex trick found here.
var queryString = {};
window.location.search.replace(/([^?=&]+)(=([^&]*))?/g, function($0, $1, $2, $3) {
queryString[$1] = $3; }
);
Then set the site_type on queryString appropriately.
queryString["site_type"] = "normal";
And finally, convert it back into a string and set that as the window.location.search.
var searchString = "";
for ( q in queryString ) {
searchString+="&" + q + "=" + queryString[q];
}
window.location.search = searchString;
Here's a way to do this:
//remove existing param and append new one..
var newHref = window.location.href.replace(window.location.search,"") + '?site_type=other';
//change href
window.location.href = newHref;
works only if you have one parameter that you want to replace, otherwise it would remove all parameters.
for example if you have
yourpage.com/?site_type=normal
and you need only website not query vars you cans clear them
var novars= location.href.replace(window.location.search,"")
this case novars = youroage.com
for just getting variables u can do this:
var site_type = window.location.search.replace("?site_type=","");
here i will get site_type value whether its normal or other
this case your variable site_type = "normal"
for rebuilding url u can just add new site_type
location.href = novars+"?site_type=normal"
or
location.href = novars+"?site_type=other"

Adding/Modify query string / GET variables in a url with javascript

So I am wanting to replace GET variable values in a url and if the variable does not exist, then add it to the url.
EDIT: I am doing this to a elements href not the pages current location..
I am not good with javascript but I do know how to use jQuery quite well and the basics of javascript. I do know how to write regex but not how to use the javascript syntax of regex and what functions to use it with.
Here is what I have so far and it does have an error on line 3: See it on jsfiddle(or below): http://jsfiddle.net/MadLittleMods/C93mD/
function addParameter(url, param, value) {
var pattern = new RegExp(param + '=(.*?);', 'gi');
return url.replace(pattern, param + '=' + value + ';');
alert(url);
}
No need to use jQuery on this one. Regular Expressions and string functions are sufficient. See my commented code below:
function addParameter(url, param, value) {
// Using a positive lookahead (?=\=) to find the
// given parameter, preceded by a ? or &, and followed
// by a = with a value after than (using a non-greedy selector)
// and then followed by a & or the end of the string
var val = new RegExp('(\\?|\\&)' + param + '=.*?(?=(&|$))'),
parts = url.toString().split('#'),
url = parts[0],
hash = parts[1]
qstring = /\?.+$/,
newURL = url;
// Check if the parameter exists
if (val.test(url))
{
// if it does, replace it, using the captured group
// to determine & or ? at the beginning
newURL = url.replace(val, '$1' + param + '=' + value);
}
else if (qstring.test(url))
{
// otherwise, if there is a query string at all
// add the param to the end of it
newURL = url + '&' + param + '=' + value;
}
else
{
// if there's no query string, add one
newURL = url + '?' + param + '=' + value;
}
if (hash)
{
newURL += '#' + hash;
}
return newURL;
}
And here is the Fiddle
Update:
The code now handles the case where there is a hash on the URL.
Edit
Missed a case! The code now checks to see if there is a query string at all.
I would go with this small but complete library to handle urls in js:
https://github.com/Mikhus/jsurl
See Change URL parameters. It answers your question in a more general manner (changing any url parameter). There are solutions for both jQuery and regular js in the answers section.
It also looks like url.replace should be location.replace but I may be wrong (that statement's based on a quick google search for 'url.replace javascript').
<script type="text/javascript">
$(document).ready(function () {
$('input.letter').click(function () {
//0- prepare values
var qsTargeted = 'letter=' + this.value; //"letter=A";
var windowUrl = '';
var qskey = qsTargeted.split('=')[0];
var qsvalue = qsTargeted.split('=')[1];
//1- get row url
var originalURL = window.location.href;
//2- get query string part, and url
if (originalURL.split('?').length > 1) //qs is exists
{
windowUrl = originalURL.split('?')[0];
var qs = originalURL.split('?')[1];
//3- get list of query strings
var qsArray = qs.split('&');
var flag = false;
//4- try to find query string key
for (var i = 0; i < qsArray.length; i++) {
if (qsArray[i].split('=').length > 0) {
if (qskey == qsArray[i].split('=')[0]) {
//exists key
qsArray[i] = qskey + '=' + qsvalue;
flag = true;
break;
}
}
}
if (!flag)// //5- if exists modify,else add
{
qsArray.push(qsTargeted);
}
var finalQs = qsArray.join('&');
//6- prepare final url
window.location = windowUrl + '?' + finalQs;
}
else {
//6- prepare final url
//add query string
window.location = originalURL + '?' + qsTargeted;
}
})
});
</script>

Categories