Let's assume, I have the following text:
This is a sample url : http://example.com.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" />
<img src="http://example.com/sample2.png" class="sample-image" />
Another url http://example2.com
Here is regex code that I am using to parse the above text:
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&##\/%?=~_|!:,.;-]*[-A-Z0-9+&##\/%=~_|])/gim;
const emails = /(\w+#[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
return function(text) {
if(text.match(urls)) {
text = text.replace(urls, "$1")
}
if(text.match(emails)) {
text = text.replace(emails, "$1")
}
return text
}
The above code does this to my text:
This is a sample url : http://example.com.
These are some images:
<img src="<a href=" class="sample-image">"http://example.com/sample1.png">"
<img src="<a href=" class="sample-image">"http://example.com/sample2.png">"
Another url http://example2.com
And I desire the following result:
This is a sample url : http://example.com.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" /> <!-- Do not change -->
<img src="http://example.com/sample2.png" class="sample-image" /> <!-- Do not change -->
Another url http://example2.com
How can I achieve the above result?
It's always better to avoid using regex for parsing HTML.
RegEx match open tags except XHTML self-contained tags
Using regular expressions to parse HTML: why not?
Instead, generate a temporary DOM element with the content and fetch all text nodes to update the content. Where apply replace the method with regex on the text node contents.
var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com';
// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&##\/%?=~_|!:,.;-]*[-A-Z0-9+&##\/%=~_|])/gim;
const emails = /(\w+#[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
// for replacing the content
function update(text) {
return text.replace(urls, "$1").replace(emails, "$1");
}
// create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;
console.log(
// get all child nodes and convert into array
// for older browser use `[].slice.call()`
Array.from(temp.childNodes)
// iterate over the elements to generate the content array
.map(function(n) {
// if node is text then update the content and return it
if (n.nodeType == 3)
return update(n.textContent);
// otehrwise return the html content
else
return n.outerHTML;
// join them
}).join('')
)
UPDATE : In case you need to keep the escaped HTML then you need to add an additional method which generates corresponding escaped HTML of a text node.
var html = 'This is a sample url : http://example.com These are some images:<img src="http://example.com/sample1.png" class="sample-image" /><img src="http://example.com/sample2.png" class="sample-image" />Another url http://example2.com hi <a href="#">Sam</a>';
// regex for replacing content
const urls = /(\b(https?|ftp):\/\/[A-Z0-9+&##\/%?=~_|!:,.;-]*[-A-Z0-9+&##\/%=~_|])/gim;
const emails = /(\w+#[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
// for replacing the content
function update(text) {
return text.replace(urls, "$1").replace(emails, "$1");
}
// function for generating escaped html content for text node
function getEncodedText(node) {
// temporary element
var temp = document.createElement('div');
// append the text node
temp.appendChild(node);
// get the escaped html content
return temp.innerHTML
}
// create a DOM element
var temp = document.createElement('div');
// set the string as your content
temp.innerHTML = html;
console.log(
// get all child nodes and convert into array
// for older browser use `[].slice.call()`
Array.from(temp.childNodes)
// iterate over the elements to generate the content array
.map(function(n) {
// if node is text then update the escaped html content and return it
if (n.nodeType == 3)
return update(getEncodedText(n));
// otehrwise return the html content
else
return n.outerHTML;
// join them
}).join('')
)
How about:
str='This is a sample url : http://example.com.\nThese are some images:\n<img src="http://example.com/sample1.png" class="sample-image" />\n<img src="http://example.com/sample2.png" class="sample-image" />\nAnother url http://example2.com';
str= str.replace(/[^"](https?:\/\/[^"\s]+)/g, '$1');
console.log(str);
Output:
This is a sample url :http://example.com.
These are some images:
<img src="http://example.com/sample1.png" class="sample-image" />
<img src="http://example.com/sample2.png" class="sample-image" />
Another urlhttp://example2.com
Related
i want to replace html links in a string with text links, for example:
<a href="test.com"> should become test.com.
I cant figure out any regex matching all my patterns. Cause links might have more attributes in different orders:
<a class="test" href="test.com" title="test">
How can i achieve that?
let str = '<a class="test" href="test.com" title="test">'
let result = str.split(/href="/)[1].split('"')[0]
console.log(result)
Create a temporary DOM element with the string as HTML content and iterate over all a tags and replace it with the corresponding link(by getting href attribute).
let html = `<a class="test" href="test.com" title="test">`;
// create a temporary div element
let tempDiv = document.createElement('div');
// set html content as your string
tempDiv.innerHTML = html;
// get all a tags and iterate
tempDiv.querySelectorAll('a').forEach(ele => {
// replace element with corresponding link
ele.replaceWith(ele.getAttribute('href')) // or ele.href
})
// get html content of temporary element
console.log(tempDiv.innerHTML)
Or alternately you can use DOMParser for parsing html content.
let html = `<a class="test" href="test.com" title="test">`;
// parser
let parser = new DOMParser();
// parse the string which returs a document object
doc = parser.parseFromString(html, "text/html");
// get all a tags and iterate
doc.querySelectorAll('a').forEach(ele => {
// replace element with corresponding link
ele.replaceWith(ele.getAttribute('href')) // or ele.href
})
// get html content from body
console.log(doc.body.innerHTML)
UPDATE : With regex you can extract and replace the a tag in the following method(not prefered).
var str = '<a class="test" href="test.com" title="test">';
console.log(str.replace(/<a[^>]*href="([^"]+)"[^>]*>(?:.*?<\/a>)?/g, '$1'));
var str1 = '<a class="test" href="test.com" title="test">abc</a>';
console.log(str1.replace(/<a[^>]*href="([^"]+)"[^>]*>(?:.*?<\/a>)?/g, '$1'));
Reference : Using regular expressions to parse HTML: why not?
I am using jQuery and Regex to search a text string for http or https and convert the string to a URL. I need the code to skip the string if it starts with a quote.
below is my code:
// Get the content
var str = jQuery(this).html();
// Set the regex string
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var replaced_text = str.replace(exp, function(url) {
clean_url = url.replace(/https?:\/\//gi,'');
return '' + clean_url + '';
})
jQuery(this).html(replaced_text);
Here is an example of my issue:
Text The School of Computer Science and Informatics. She blogs at http://www.wordpress.com and can be found on Twitter #Abcdef.
The current code successfully finds the text that starts with http or https and converts it to a URL but it also converts the twitter URL. I need to ignore the text if it starts with a quote or is within an a tag, etc...
Any help is much appreciated
What about adding [^"'] to the exp variable?
var exp = /(\b[^"'](https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
Snippet:
// Get the content
var str = jQuery("#text2replace").html();
// Set the regex string
var exp = /(\b[^"'](https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var replaced_text = str.replace(exp, function(url) {
clean_url = url.replace(/https?:\/\//gi,'');
return '' + clean_url + '';
})
jQuery("#text2replace").html(replaced_text);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="text2replace">
The School of Computer Science and Informatics. She blogs at http://www.wordpress.com and can be found on Twitter #Abcdef.
</div>
If you really just want to ignore the quotation marks, this could help:
var replaced_text = $("#selector").html().replace(/([^"])(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig, '$1$2');
This works for me:
This will recognize urls and convert them to hyperlinks, but will ignore urls, wrapped in " (quotes).
See the code below or this jsfiddle for a working example.
Example HTML:
<ul class="js-replaceUrls">
<li>
www.link-only-www.com
</li>
<li>
http://link-starts-with-HTTP.com
</li>
<li>
https://www.link-starts-with-https-and-www.com
</li>
<a href="https://link-starts-with-https.com">
Link in anchor tag
</a>
</ul>
RegEX:
/(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gmi
jQuery:
// RECOGNIZE URLS AND CONVERT THEM TO HYPERLINKS
// Ignore if hyperlink is found in HTML attr, like "href"
$('.js-replaceUrls').each(function(){
// GET THE CONTENT
var str = $(this).html();
// SET THE REGEX STRING
var regex = /(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gmi;
// REPLACE PLAIN TEXT LINKS BY HYPERLINKS
var replaced_text = str.replace(regex, "<a href='$1' class='js-link'>$1</a>");
// ECHO LINK
$(this).html(replaced_text);
});
// DEFINE URLS WITHOUT "http" OR "https"
var linkHasNoHttp = $(".js-link:not([href*=http],[href*=https])");
// ADD "http://" TO "href"
$(linkHasNoHttp).each(function() {
var linkHref = $(this).attr("href");
$(this).attr("href" , "http://" + linkHref);
});
See this jsfiddle for a working example.
I have string with html code.
<h2 class="some-class">
<a href="#link" class="link" id="first-link"
<span class="bold">link</span>
</a>
NEED TO GET THIS
</h2>
I need to get only text content of h2.
I create this regular expression:
(?<=>)(.*)(?=<\/h2>)
But it's useful if h2 has no inner tags. Otherwise I get this:
<a href="#link" class="link" id="first-link"
<span class="bold">link</span>
</a>
NEED TO GET THIS
Never use regex to parse HTML, check these famous answers:
Using regular expressions to parse HTML: why not?
RegEx match open tags except XHTML self-contained tags
Instead, generate a temp element with the text as HTML and get content by filtering out text nodes.
var str = `<h2 class="some-class">
<a href="#link" class="link" id="first-link"
<span class="bold">link</span>
</a>
NEED TO GET THIS
</h2>`;
// generate a temporary DOM element
var temp = document.createElement('div');
// set content
temp.innerHTML = str;
// get the h2 element
var h2 = temp.querySelector('h2');
console.log(
// get all child nodes and convert into array
// for older browser use [].slice.call(h2...)
Array.from(h2.childNodes)
// iterate over elements
.map(function(e) {
// if text node then return the content, else return
// empty string
return e.nodeType === 3 ? e.textContent.trim() : '';
})
// join the string array
.join('')
// you can use reduce method instead of map
// .reduce(function(s, e) { return s + (e.nodeType === 3 ? e.textContent.trim() : ''); }, '')
)
Reference :
Fastest way to convert JavaScript NodeList to Array?
Rgex is not good for parsing HTML, but if your html is not valid or any way you like to use regex:
(?!>)([^><]+)(?=<\/h2>)
try Demo
It's getting last texts before closing tag of </h2> (IF EXISTS)
To avoid null results changed * to +.
This Regex is completely limit and fitting to limited situations as question mentioned.
demo
var h2 = document.querySelector('h2')
var h2_clone = h2.cloneNode(true)
for (let el of h2_clone.children) {
el.remove()
}
alert(h2_clone.innerText)
I'm fairly new to JavaScript and I have this RSS Feed I'm working with currently.
When I retrieve an item from the RSS feed, the following is displayed
Google Home Page http://www.google.com
How can I split this string, so that I can embed the second part of it (http://www.google.com) into the first part(Google Home Page)?
First - exclude the link by using following RegEx pattern (searches for string which starts with http://).
/http:\/\/.*[^\W+]/g
The matched value (Array) is being stored into url, now we are able to create the anchor element. (the value of href is the element 0 inside our matches array).
The link content is being generated by replacing the URL with empty space inside the retrievedResult. trim() is optional, I've used it just to remove remaining space.
retrievedResult.replace(url[0], "").trim()
Finally you can append the built anchor element.
var retrievedResult = "Google Home Page http://www.google.com";
var re = /http:\/\/.*[^\W+]/g;
var url = retrievedResult.match(re);
var anchor = '' + retrievedResult.replace(url[0], "").trim() + '';
$('body').append(anchor);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
Okay, so this will be the string:
var string = "Google Home Page http://www.google.com";
Then we split it:
var split = string.split('http'); // ['Google Home Page ', '://www.google.com']
Then we create an a element:
var a = document.createElement('a');
Then we add the link as the href attribute of your anchor element:
a.href = 'http' + split[1];
And then we add the text as textContent of your anchor element:
a.textContent = split[0];
And finally we add the element to the body:
document.body.appendChild(a);
Here an example:
var string = "Google Home Page http://www.google.com";
var split = string.split('http');
var a = document.createElement('a');
a.href = 'http' + split[1];
a.textContent = split[0];
document.body.appendChild(a);
You can use jquery to get to your result
Working Example:
//This is HTML part
<div id="linkcontainer"></div>
<input id="str" value='Google Home Page http://www.google.com'>
<a id="createlink">CreateLink</a>
//This is js part
$('#createlink').click(function(){
createLink();
});
//function that makes link
function createLink(){
var str = $('#str').val();
var http = str.indexOf('http');
var url = str.substring(http);
var text = str.substring(0,http);
$('#linkcontainer').html(''+text+'');
}
Try this code on jsfiddle
I have this code:
var str = document.getElementById('mesajyazi').innerHTML;
alert('input: '+str);
// first create an element and add the string as its HTML
var container = $('<div>').html(str);
// then use .replaceWith(function()) to modify the HTML structure
container.find('img').replaceWith(function() { return this.alt; })
// finally get the HTML back as string
var strAlt = container.html();
alert('output: '+strAlt);
It doesn't work.
But if I change var str to simple string text.
var str = 'This is a string with <img src="./images/logo.png" alt="logo" /> an image';
It works.
P.S. - inspired by this Replace all <img> tag with img alt text :)