parse tag with regex in javascript - javascript

I have this string:
s='data-id="a1429883480588" class="privateMessage" #zaza
data-id="a1429883480589" class="privateMessage" #zaza2
data-id="a1429883480598" class="privateMessage" #zaza3'
My goal is to capture the what's between : data-id=" and " to have results:
[a1429883480588, a1429883480589, a1429883480598]
I tried with
var splitted = s.match(/data-id="(\w)+(?=")/g)
But this also captures data-id=" and "
Any idea on how to write this regex ?
It must be done with JS since it is nodeJS function !

If you're happy that the string will always be well formed and not mangled up. Here's one that'll do it:
var s = '<span data-id="a1429883480588" class="privateMessage">#zaza</span> ';
s += '<span data-id="a1429883480589" class="privateMessage">#zaza2</span> ';
s += '<span data-id="a1429883480598" class="privateMessage">#zaza3</span>';
s.match(/data-id="\w+"/g).map(function(attributeAndValue) {
return attributeAndValue.split('"')[1];
})
The concerns raised above about using RegEx to parse HTML are valid but more for HTML in the wild.

Here's the cheerio equivalent, just for reference or whatever
var cheerio = require('cheerio');
var markup = '<span data-id="a1429883480588" class="privateMessage">#zaza</span> <span data-id="a1429883480589" class="privateMessage">#zaza2</span> <span data-id="a1429883480598" class="privateMessage">#zaza3</span>';
var $ = cheerio.load('<div>'+markup+'</div>');
var ids = Array.prototype.map.call($('[data-id]'), function(e) {
return $(e).attr('data-id');
});
console.log(ids);
// [ 'a1429883480588', 'a1429883480589', 'a1429883480598' ]

Related

Remove HTML element span with class notranslate from string

Need to remove specific HTML tag span with class "notranslate", The following solution is removing all HTML tag from my text.
My expected result is: Deleted String: Adding string: Idea No.<p>d</p> value Details
var str = 'Idea No.<p>d</p> {{value}} Details';
var addStr = 'Adding string: ' + str.replace('{{', '<span class="notranslate">').replace('}}', '</span>');
console.log('Deleted String: ' + addStr.replace(new RegExp(/<\/?[\w\s="/.':;#-\/\?]+>/gi), ''));
If you really want to do it with a RegEx, you can use the below to strip any HTML span element which has the notranslate class. It takes into account the fact that you can have other properties on the element and multiple class names. As long as there is a <span> with class notranslate, it will strip the HTML tag and keep the content.
/<span.*?class=(?:"|"(?:[^"]*)\s)notranslate(?:"|\s(?:[^"]*)").*?>(.*?)<\/span>/
Working snippet:
let str1 = 'I want <span class="notranslate" data-xyz="whatever">this</span> to be removed.';
console.log('original:', str1);
console.log('modified:', str1.replace(/<span.*?class=(?:"|"(?:[^"]*)\s)notranslate(?:"|\s(?:[^"]*)").*?>(.*?)<\/span>/, "$1"));
let str2 = 'I want <span class="whatever notranslate another-class" data-xyz="whatever">this</span> to be removed.';
console.log('original:', str2);
console.log('modified:', str2.replace(/<span.*?class=(?:"|"(?:[^"]*)\s)notranslate(?:"|\s(?:[^"]*)").*?>(.*?)<\/span>/, "$1"));
If you can have multiple occurrences of that tag in the same string, you can add the g (global) flag.
/<span.*?class=(?:"|"(?:[^"]*)\s)notranslate(?:"|\s(?:[^"]*)").*?>(.*?)<\/span>/g
let str1 = 'I want <span class="notranslate" data-xyz="whatever">this</span> but <span class="notranslate" data-xyz="whatever">also this</span> to be removed.';
console.log('original:', str1);
console.log('modified:', str1.replace(/<span.*?class=(?:"|"(?:[^"]*)\s)notranslate(?:"|\s(?:[^"]*)").*?>(.*?)<\/span>/g, "$1"));
Parsing DOM is complex enough to not write it by hand.
If you can run it in a browser, here is the solution:
var str = 'Idea No.<p>d</p> {{value}} Details';
var addStr = 'Adding string: ' + str.replace('{{', '<span class="notranslate">').replace('}}', '</span>');
const dom = document.createElement('div');
dom.innerHTML = addStr;
const notranslate = dom.getElementsByClassName('notranslate');
for (let elem of notranslate) {
elem.remove();
}
console.log(dom.innerHTML);
To remove a specific HTML tag but to keep the innerHtml,
try this:
var str = 'Idea No.<p>d</p> {{value}} Details';
var addStr = 'Adding string: ' + str.replace('{{', '<span class="notranslate">').replace('}}', '</span>');
const dom = document.createElement('div');
dom.innerHTML = addStr;
const span = dom.getElementsByClassName('notranslate');
while(span.length) {
var parent = span[0].parentNode;
while(span[0].firstChild) {
parent.insertBefore( span[ 0 ].firstChild, span[0]);
}
parent.removeChild(span[0]);
}
console.log(dom.innerHTML); //Adding string: Idea No.<p>d</p> value Details
the method replace all tag because you use in RegExp the option 'gi' where 'gi' perform a global case-insensitive replacement. If you what replace a specific class you must define in regExp

jQuery using Regex to find links within text but exclude if the link is in quotes

I am using jQuery and Regex to search a text string for http or https and convert the string to a URL. I need the code to skip the string if it starts with a quote.
below is my code:
// Get the content
var str = jQuery(this).html();
// Set the regex string
var exp = /(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var replaced_text = str.replace(exp, function(url) {
clean_url = url.replace(/https?:\/\//gi,'');
return '' + clean_url + '';
})
jQuery(this).html(replaced_text);
Here is an example of my issue:
Text The School of Computer Science and Informatics. She blogs at http://www.wordpress.com and can be found on Twitter #Abcdef.
The current code successfully finds the text that starts with http or https and converts it to a URL but it also converts the twitter URL. I need to ignore the text if it starts with a quote or is within an a tag, etc...
Any help is much appreciated
What about adding [^"'] to the exp variable?
var exp = /(\b[^"'](https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
Snippet:
// Get the content
var str = jQuery("#text2replace").html();
// Set the regex string
var exp = /(\b[^"'](https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig;
var replaced_text = str.replace(exp, function(url) {
clean_url = url.replace(/https?:\/\//gi,'');
return '' + clean_url + '';
})
jQuery("#text2replace").html(replaced_text);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="text2replace">
The School of Computer Science and Informatics. She blogs at http://www.wordpress.com and can be found on Twitter #Abcdef.
</div>
If you really just want to ignore the quotation marks, this could help:
var replaced_text = $("#selector").html().replace(/([^"])(\b(https?|ftp|file):\/\/[-A-Z0-9+&##\/%?=~_|!:,.;]*[-A-Z0-9+&##\/%=~_|])/ig, '$1$2');
This works for me:
This will recognize urls and convert them to hyperlinks, but will ignore urls, wrapped in " (quotes).
See the code below or this jsfiddle for a working example.
Example HTML:
<ul class="js-replaceUrls">
<li>
www.link-only-www.com
</li>
<li>
http://link-starts-with-HTTP.com
</li>
<li>
https://www.link-starts-with-https-and-www.com
</li>
<a href="https://link-starts-with-https.com">
Link in anchor tag
</a>
</ul>
RegEX:
/(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gmi
jQuery:
// RECOGNIZE URLS AND CONVERT THEM TO HYPERLINKS
// Ignore if hyperlink is found in HTML attr, like "href"
$('.js-replaceUrls').each(function(){
// GET THE CONTENT
var str = $(this).html();
// SET THE REGEX STRING
var regex = /(([a-z]+:\/\/)?(([a-z0-9\-]+\.)+([a-z]{2}|aero|arpa|biz|com|coop|edu|gov|info|int|jobs|mil|museum|name|nato|net|org|pro|travel|local|internal))(:[0-9]{1,5})?(\/[a-z0-9_\-\.~]+)*(\/([a-z0-9_\-\.]*)(\?[a-z0-9+_\-\.%=&]*)?)?(#[a-zA-Z0-9!$&'()*+.=-_~:#/?]*)?)(\s+|$)/gmi;
// REPLACE PLAIN TEXT LINKS BY HYPERLINKS
var replaced_text = str.replace(regex, "<a href='$1' class='js-link'>$1</a>");
// ECHO LINK
$(this).html(replaced_text);
});
// DEFINE URLS WITHOUT "http" OR "https"
var linkHasNoHttp = $(".js-link:not([href*=http],[href*=https])");
// ADD "http://" TO "href"
$(linkHasNoHttp).each(function() {
var linkHref = $(this).attr("href");
$(this).attr("href" , "http://" + linkHref);
});
See this jsfiddle for a working example.

String replace logic

I'm trying to build a function, that receives a string with this format:
"hello wor**"
The * could be anywhere on the string.
It should return:
<span>hello wor</span><input type='text'></input>
So the string could be "hel** wor*d" also
and the return should be:
<span>hel</span><input type='text'> <span>wor</span><input type='text'><span>d</span>
I could do it easily with a loop on each char, but I'm looking for more elegant solutions.
I think that it could be solved with a regex, and using replace I got the "*" covered:
var text = "hello wor**";
text.replace(/\*+/g, "<input type='text'></input>");
I have not yet found a way of capturing the remaining text to render the
<span>
You're not using the result of the replace function. Try this:
var text = "*hel** wor*d*";
var element = text.split(/\s*\*+\s*/g);
element = "<span>"+ element.join("</span><input type='text'><span>") + "</span>";
element = element.replace(/<span><\/span>/g, "");
console.log(element);
'hello wor**'.replace(/\*+/g, "<input type='text'></input>");
This returns hello wor. All you have to do is concatenate the string with the rest of the data you want, like so:
var text = "hello wor**";
text = '<span>' + text.replace(/\*+/g, '') + '</span><input type=\'text\'></input>';
<html>
<head>
</head>
<body>
<span id="hi">hello wor**</span>
</body>
</html>
i use jquery in this
$( document ).ready(function() {
var texty = $('#hi').text();
$('#hi').replaceWith(texty.replace(/\*+/g, "<input type='text'></input>"))
});

Unable to span wrap html text with single or double quotes

Contenteditable div
var1 = 'x';
var2 = someVar
Target
<span class="frag">var1 = 'x'</span>;
<span class="frag">var2 = someVar</span>;
JS
$('#board_code_dup').children().each(function (index, child) {
var text = $(child).html();
//HOW TO DO THIS RIGHT?
text = text.replace(/([A-Za-z0-9_]+\s*=\s*[A-Za-z0-9&'"]+)/g, '<span class="frag">$1</span>');
});
How would i use HTML entities " &apos; here instead of ' " so that i can properly wrap it?
This is what I've tried "$('#board_code_dup').text()" and split it on "\n" instead ..
It will convert entities behind the scene like quotes or double quotes in the source. This might match in reg-exp.
var result = $('#board_code_dup').text().split("\n").map(function (el, idx){
return el.replace(/\s*;\s*$/,'').replace( /(\w+\s*=\s*[\w&"']+)/g,'<span class="frag">$1</span>');
});
alert(result.join(''));
Js fiddle Here
I hope this will help ..

Escape characters in String in a HTML page?

I have a string in the below non-escaped format in a HTML page:
<a href="http://somesite/product?page=blahk&id=EA393216&tabs=7,0&selections=quarter:Q2+2013^&wicket:pageMapName=wicket-2\">SomeThing</a>
What I need is to use jQuery/JavaScript to replace that string with just the link "SomeThing".
I have looked at some examples in StackOverflow, but they don't seem to work. I'm just getting started with jQuery and JavaScript, so would appreciate any help here.
Any ideas?
Try html() and text() in jquery to decode:
var str = '<a href="http://somesite/product?page=blahk&id=EA393216&tabs=7,0&selections=quarter:Q2+2013^&wicket:pageMapName=wicket-2\">SomeThing</a>';
var decoded = $('<div />').html(str).text();
alert($(decoded).text());
See Fiddle demo
var str = '<a href="http://somesite/product?page=blahk&id=EA393216&tabs=7,0&selections=quarter:Q2+2013^&wicket:pageMapName=wicket-2\">SomeThing</a>';
var helper = document.createElement('p');
// evaluate as HTML once, text now "<a href..."
helper.innerHtml = str;
// evaluate as HTML again, helper now has child element a
helper.innerHtml = helper.innerText;
// get text content only ("SomeThing")
alert(helper.innerText);
Here is a possible starting point.
Hope this gets you started!
function parseString(){
var str = '<a href="http://somesite/product?page=blahk&id=EA393216&tabs=7,0&selections=quarter:Q2+2013^&wicket:pageMapName=wicket-2\">SomeThing</a>';
var begin = str.indexOf('\">',0)+2; //--determine where the opening anchor tag ends
var end = str.indexOf('</a>',0); //--determine where the closing anchor tag begins
var parsedString = str.substring(begin,end); //--grab whats in between;
/*//--or all inline
var parsedString = str.substring(str.indexOf('\">',0)+2,str.indexOf('</a>',0));
*/
console.log(parsedString);
}
parseStr();

Categories