Wrapping Sentences within <p> Tags with <span>'s, But Keep Other Tags - javascript

To give you an idea of what I need, I have been using the below code to parse content within tags and wrap each sentence within tags so I can then interact with sentences on a page.
$('p').each(function() {
var sentences = $(this)
.text()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});
However, the following line demonstrates my problem:
<p>This is a link and it is removed with the above code! Here is another sentence.</p>
Nested tags such as <a>, <img> etc...within <p> tags that I'm searching through are removed with the code that I'm using. I need to keep these tags intact, so the content stays the same within the <p> tags.
I need:
<p><span class="sentence">This is a link and it is removed with the above code!</sentence><sentence>Here is another sentence.</sentence></p>
After reading this barn-burner about parsing HTML with regex, I've concluded that I need to use a combo of an HTML parser of some sort to traverse through sub-tags within a <p> tag, and then use a regex to find the sentences. I think the regex I have listed above should work for most of my uses, if that helps.
So: how should I do it?

It is really difficult to tokenise language, reliably, into sentences and that is without the added complexity of throwing html into the equation. There are some applications etc out there that attempt to deal with Natural Language Processing, an example would be the Stanford Tokenizer with runs on Java (not Javascript)
And as people keep mentioning, a regex is not the solution to this problem, language is not regular so don't expect a Regular Expression only solution.
There is a question here on SO, Basic NLP in CoffeeScript or JavaScript — Punkt tokenizaton, simple trained Bayes models — where to start? Which I think summarises things fairly simply for Javascript.
Anyway, to at least give you a little something that you could play with, I knocked up a little code for you. This works reasonable well until the markup/language begins to resemble anything slightly complex or different, but ultimately fails the mark by a long way. But, it may be enough for what you need, I don't know.
CSS
.emphasis {
font-style: italic;
}
.bold {
font-weight: bold;
}
.emphasis.bold {
font-style: italic;
font-weight: bold;
}
.unidentified {
background-color: pink;
}
.sentence0 {
background-color: yellow;
}
.sentence1 {
background-color: green;
}
.sentence2 {
background-color: red;
}
.whitespace {
white-space: pre;
background-color: blue;
}
Javascript
/*jslint maxerr: 50, indent: 4, browser: true */
(function () {
"use strict";
var rxOpen = new RegExp("<[^\\/].+?>"),
rxClose = new RegExp("<\\/.+?>"),
rxWhitespace = new RegExp("^\\s+?"),
rxSupStart = new RegExp("^<sup\\b[^>]*>"),
rxSupEnd = new RegExp("<\/sup>"),
sentenceEnd = [],
color = 0,
rxIndex;
sentenceEnd.push(new RegExp("[^\\d][\\.!\\?]+"));
sentenceEnd.push(new RegExp("(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*?$)"));
sentenceEnd.push(new RegExp("(?![^\\(]*?\\))"));
sentenceEnd.push(new RegExp("(?![^\\[]*?\\])"));
sentenceEnd.push(new RegExp("(?![^\\{]*?\\})"));
sentenceEnd.push(new RegExp("(?![^\\|]*?\\|)"));
//sentenceEnd.push(new RegExp("(?![^\\\\]*?\\\\)"));
//sentenceEnd.push(new RegExp("(?![^\\/.]*\\/)")); // all could be a problem, but this one is problematic
rxIndex = new RegExp(sentenceEnd.reduce(function (previousValue, currentValue) {
return previousValue + currentValue.source;
}, ""));
function indexSentenceEnd(html) {
var index = html.search(rxIndex);
if (index !== -1) {
index += html.match(rxIndex)[0].length - 1;
}
return index;
}
function pushSpan(array, className, string, classNameOpt) {
if (className === "sentence") {
className += color % 2;
if (classNameOpt) {
className += " " + classNameOpt;
}
color += 1;
}
array.push('<span class="' + className + '">' + string + '</span>');
}
function addSupToPrevious(html, array) {
var sup = html.search(rxSupStart),
end = 0,
last;
if (sup !== -1) {
end = html.search(rxSupEnd);
if (end !== -1) {
last = array.pop();
end = end + 6;
array.push(last.slice(0, -7) + html.slice(0, end) + last.slice(-7));
}
}
return html.slice(end);
}
function leadingWhitespaces(html, array) {
var whitespace = html.search(rxWhitespace),
count = 0;
if (whitespace !== -1) {
count = html.match(rxWhitespace)[0].length;
pushSpan(array, "whitespace", html.slice(0, count));
}
return html.slice(count);
}
function paragraphIsSentence(html, array) {
var index = indexSentenceEnd(html);
if (index === -1 || index === html.length) {
pushSpan(array, "sentence", html, "paragraphIsSentence");
html = "";
}
return html;
}
function paragraphNoMarkup(html, array) {
var open = html.search(rxOpen),
index = 0;
if (open === -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
pushSpan(array, "sentence", html.slice(0, index += 1), "paragraphNoMarkup");
}
return html.slice(index);
}
function sentenceUncontained(html, array) {
var open = html.search(rxOpen),
index = 0,
close;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index < open || index > close) {
pushSpan(array, "sentence", html.slice(0, index += 1), "sentenceUncontained");
} else {
index = 0;
}
}
return html.slice(index);
}
function sentenceContained(html, array) {
var open = html.search(rxOpen),
index = 0,
close,
count;
if (open !== -1) {
index = indexSentenceEnd(html);
if (index === -1) {
index = html.length;
}
close = html.search(rxClose);
if (index > open && index < close) {
count = html.match(rxClose)[0].length;
pushSpan(array, "sentence", html.slice(0, close + count), "sentenceContained");
index = close + count;
} else {
index = 0;
}
}
return html.slice(index);
}
function anythingElse(html, array) {
pushSpan(array, "sentence2", html, "anythingElse");
return "";
}
function guessSenetences() {
var paragraphs = document.getElementsByTagName("p");
Array.prototype.forEach.call(paragraphs, function (paragraph) {
var html = paragraph.innerHTML,
length = html.length,
array = [],
safety = 100;
while (length && safety) {
html = addSupToPrevious(html, array);
if (html.length === length) {
html = leadingWhitespaces(html, array);
if (html.length === length) {
html = paragraphIsSentence(html, array);
if (html.length === length) {
html = paragraphNoMarkup(html, array);
if (html.length === length) {
html = sentenceUncontained(html, array);
if (html.length === length) {
html = sentenceContained(html, array);
if (html.length === length) {
html = anythingElse(html, array);
}
}
}
}
}
}
length = html.length;
safety -= 1;
}
paragraph.innerHTML = array.join("");
});
}
guessSenetences();
}());
On jsfiddle

you need to use .html() instead of .text() if you want to keep tags intact.
Check below code and let me know if it doesn't work out.
DEMO
$('p').each(function() {
var sentences = $(this)
.html()
.replace(/(((?![.!?]['"]?\s).)*[.!?]['"]?)(\s|$)/g,
'<span class="sentence">$1</span>$3');
$(this).html(sentences);
});

Related

Format color while typing in textarea or pre

I'm trying to create a comments section that lets users #someone. When the user types #random and then space, I want it to be highlighted. So I've created something that searches and replaces the string, but I then when the html is replaced, it places the cursor at the beginning. Any way to solve this? Any other way of doing something like this?
$('#textarea').keyup(function() {
txt = this.innerText.split(" ")
new_txt = this.innerText
for (var i = txt.length - 1; i >= 0; i--) {
if (txt[i].startsWith('#') == false) {
delete txt[i]
}
}
txt = txt.sort().join(" ").trim().split(" ")
console.log(txt)
if (txt.length > 0 && txt[0] != "") {
for (var i = 0; i < txt.length; i++) {
new_txt = new_txt.replace(txt[i], '<mark>' + txt[i] + '</mark>')
}
$('#my_console_log').text(new_txt)
this.innerHTML = new_txt
}
});
pre {
border: solid black 1px;
}
mark {
background: blue;
color: red;
}
<script src="https://code.jquery.com/jquery-1.10.2.js"></script>
<title>Test page</title>
<form>
<pre id='textarea' contentEditable='true'></pre>
<div id="my_console_log"></div>
</form>
Here is a simple plugin available which can be useful to you,
Download the plugin and edit the file jquery.hashtags.js and remove the condition for #. You can also change the style as per your requirement.
(function($) {
$.fn.hashtags = function() {
$(this).wrap('<div class="jqueryHashtags"><div class="highlighter"></div></div>').unwrap().before('<div class="highlighter"></div>').wrap('<div class="typehead"></div></div>');
$(this).addClass("theSelector");
autosize($(this));
$(this).on("keyup", function() {
var str = $(this).val();
$(this).parent().parent().find(".highlighter").css("width",$(this).css("width"));
str = str.replace(/\n/g, '<br>');
if(!str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([a-zA-Z0-9]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([a-zA-Z0-9]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([\u0600-\u06FF]+)/g) && !str.match(/(http|ftp|https):\/\/[\w-]+(\.[\w-]+)+([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?#([\u0600-\u06FF]+)/g)) {
// Remove below condition for hashtag.
if(!str.match(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#/g)) { //arabic support, CJK support
str = str.replace(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}else{
str = str.replace(/#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#(([_a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}
// Keep this condition.
if(!str.match(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#/g)) {
str = str.replace(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}else{
str = str.replace(/#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))#(([a-zA-Z0-9]+)|([\u0600-\u06FF]+)|([ㄱ-ㅎㅏ-ㅣ가-힣]+)|([ぁ-んァ-ン]+)|([一-龯]+))/g,'<span class="hashtag">#$1</span>');
}
}
$(this).parent().parent().find(".highlighter").html(str);
});
$(this).parent().prev().on('click', function() {
$(this).parent().find(".theSelector").focus();
});
};
})(jQuery);
Instead of replacing the html just append a class with the color that you want

Handling multi-language paragraph font style in HTML

I have a webpage, which consists of mostly Persian content, and in some paragraphs there is a word or some words in English. The content is generated automatically and I can't change it from my HTML source.
I need to detect where these English words are, and give them font-size:xx; The reason of the former is that my English font - which I haven't chosen and changing it would be out of the question- looks bigger than my Persian font and it has to be some pixels less than the font-size I assigned to Persian font of every page.
here goes an example:
<span class="common">سلام دنیا (helo world)</span>
This whole span receives the following style:
.common{
font-size:26px;
font-family:'Arial';
}
and I can't assign a different font-size to the "hello world" part.
Since the page content is produces via a script code which gets data from DB, I can't manually give English words any embedded style, like surrounding them with <em lang="en"></em> tag.
Is there any way to automatically detect English words and give them style- by assigning a class maybe?-
Any attempt to help will be highly appreciated.
You can manipulate DOM by searching for English char sequence and wrap those sequence with your own span.
This is not the complete solution, but you can do something like this:
document.querySelectorAll("span").forEach(function(el){
el.innerHTML = el.innerText.replace(/[a-z]+/g, '<span class="uncommon">$&</span>');
});
.common{
font-size:26px;
font-family:'Arial';
}
.uncommon{
font-size:36px;
font-weight: 600;
}
<span class="common">سلام دنیا (helo world)</span>
The above answer works. Just for further usage, I provide an Angular version of this idea. Hope this would help someone in future.
.directive( 'showData', function ( $compile ) {
return {
scope: true,
link: function ( scope, element, attrs ) {
var el;
var farsi = 0;
attrs.$observe( 'template', function ( tpl )
{
//var tpl = attrs.template;
//if ( angular.isDefined( tpl ) )
{
// compile the provided template against the current scope
//now work on tpl:
function containASCII(str){
var flag_only_ascii = 1;
var flag_contain_ascii = 0;
for(var i=0;i<str.length;i++){
if(str.charCodeAt(i)<127){
flag_contain_ascii = 1;
}
else
{
flag_only_ascii = 0;
}
}
if(flag_only_ascii == 1 && flag_contain_ascii == 0)
return 1; //just ascii
if(flag_only_ascii == 0 && flag_contain_ascii ==1)
{
return 2;//combination
}
if(flag_only_ascii == 0 && flag_contain_ascii == 0){
return 0; //just english...
}
if(flag_only_ascii == 1 && flag_contain_ascii == 1){
return 3; //other
}
};
if(scope.TranslationValue == 1)
{
var split_span = tpl.split(" ");
for (i = 0 ; i < split_span.length ; i++)
{
var str_1 = split_span[i];
if(containASCII(str_1) == 3){
//if(str_1.search("<em2 ") == -1)
split_span[i] = "<em2 class='uncommon'>" + str_1 + "</em2>";
}
}
var final_str = split_span.join(" ");
}
else
{
final_str = tpl;
}
final_str = '<span rep-eng-text-font>' + final_str + '</span>';
element.html(final_str);
// add the template content
}
});
}
};
})

Angularjs ng-bind-html with custom Filter

I am currently working with ng-bind-html. Basically, what I am trying to do is, when I post a blog, the blog contains links and other styling. So when I am trying to show the list of blogs, I am using ng-bing-html like this:
<p ng-bind-html="blog.blogContent"></p>
which works fine.
But in addition, I try to truncate the blog and show only few paragraphs with view more option by passing a custom filter. But when I pass the filter I get the following:
<p ng-bind-html="blog.blogContent | Truncate"></p>
Error: [$sanitize:badparse] The sanitizer was unable to parse the
following block of html: <a href="https:.......
My Filter looks like this:
return function (text, length, end) {
if (text !== undefined) {
if (isNaN(length)) {
length = 450;
}
if (end === undefined) {
end = ".......";
}
if (text.length <= length || text.length - end.length <= length) {
return text;
} else {
return String(text).substring(0, length - end.length) + end;
}
}
You can solve this using custom directives and filters. try this one: https://stackoverflow.com/a/45076560/6816707
I used the solution posted by Minouris in this post (Javascript truncate HTML text) and adapted it into an AngularJS filter. It seems to work pretty well. The filter is
angular.module('plunker').filter('Truncate', function() {
return function(text, length, end) {
if (text !== undefined) {
if (isNaN(length)) {
length = 20;
}
if (end === undefined) {
end = ".......";
}
if (text.length <= length || text.length - end.length <= length) {
return text;
}
var truncated = text.substring(0, length);
// Remove line breaks and surrounding whitespace
truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
// If the text ends with an incomplete start tag, trim it off
truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
// If the text ends with a truncated end tag, fix it.
var truncatedEndTagExpr = /<\/((?:\w*))$/g;
var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
if (truncatedEndTagMatch != null) {
var truncatedEndTag = truncatedEndTagMatch[1];
// Check to see if there's an identifiable tag in the end tag
if (truncatedEndTag.length > 0) {
// If so, find the start tag, and close it
var startTagExpr = new RegExp(
"<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
var testString = truncated;
var startTagMatch = startTagExpr.exec(testString);
var startTag = null;
while (startTagMatch != null) {
startTag = startTagMatch[1];
testString = testString.replace(startTagExpr, '');
startTagMatch = startTagExpr.exec(testString);
}
if (startTag != null) {
truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
}
} else {
// Otherwise, cull off the broken end tag
truncated = truncated.replace(truncatedEndTagExpr, '');
}
}
// Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
// check to see that he closing tag before it is for that tag. If not, append a closing tag.
var testString = reverseHtml(truncated);
var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
var tagMatch = reverseTagOpenExpr.exec(testString);
while (tagMatch != null) {
var tag = tagMatch[0];
var tagName = tagMatch[2];
var startPos = tagMatch.index;
var endPos = startPos + tag.length;
var fragment = testString.substring(0, endPos);
// Test to see if an end tag is found in the fragment. If not, append one to the end
// of the truncated HTML, thus closing the last unclosed tag
if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
truncated += '</' + reverseHtml(tagName) + '>';
}
// Get rid of the already tested fragment
testString = testString.replace(fragment, '');
// Get another tag to test
tagMatch = reverseTagOpenExpr.exec(testString);
}
return truncated;
}
}
function reverseHtml(str) {
var ph = String.fromCharCode(206);
var result = str.split('').reverse().join('');
while (result.indexOf('<') > -1) {
result = result.replace('<',ph);
}
while (result.indexOf('>') > -1) {
result = result.replace('>', '<');
}
while (result.indexOf(ph) > -1) {
result = result.replace(ph, '>');
}
return result;
}
});
Working plunkr:
http://plnkr.co/edit/oCwmGyBXB26omocT2q9m?p=preview
I havent tested the above solution and you may run into issues with more complicated HTML strings. May I suggest using a Jquery library like https://github.com/pathable/truncate to be safe?

Get the DOM path of the clicked <a>

HTML
<body>
<div class="lol">
<a class="rightArrow" href="javascriptVoid:(0);" title"Next image">
</div>
</body>
Pseudo Code
$(".rightArrow").click(function() {
rightArrowParents = this.dom(); //.dom(); is the pseudo function ... it should show the whole
alert(rightArrowParents);
});
Alert message would be:
body div.lol a.rightArrow
How can I get this with javascript/jquery?
Here is a native JS version that returns a jQuery path. I'm also adding IDs for elements if they have them. This would give you the opportunity to do the shortest path if you see an id in the array.
var path = getDomPath(element);
console.log(path.join(' > '));
Outputs
body > section:eq(0) > div:eq(3) > section#content > section#firehose > div#firehoselist > article#firehose-46813651 > header > h2 > span#title-46813651
Here is the function.
function getDomPath(el) {
var stack = [];
while ( el.parentNode != null ) {
console.log(el.nodeName);
var sibCount = 0;
var sibIndex = 0;
for ( var i = 0; i < el.parentNode.childNodes.length; i++ ) {
var sib = el.parentNode.childNodes[i];
if ( sib.nodeName == el.nodeName ) {
if ( sib === el ) {
sibIndex = sibCount;
}
sibCount++;
}
}
if ( el.hasAttribute('id') && el.id != '' ) {
stack.unshift(el.nodeName.toLowerCase() + '#' + el.id);
} else if ( sibCount > 1 ) {
stack.unshift(el.nodeName.toLowerCase() + ':eq(' + sibIndex + ')');
} else {
stack.unshift(el.nodeName.toLowerCase());
}
el = el.parentNode;
}
return stack.slice(1); // removes the html element
}
Using jQuery, like this (followed by a solution that doesn't use jQuery except for the event; lots fewer function calls, if that's important):
$(".rightArrow").click(function () {
const rightArrowParents = [];
$(this)
.parents()
.addBack()
.not("html")
.each(function () {
let entry = this.tagName.toLowerCase();
const className = this.className.trim();
if (className) {
entry += "." + className.replace(/ +/g, ".");
}
rightArrowParents.push(entry);
});
console.log(rightArrowParents.join(" "));
return false;
});
Live example:
$(".rightArrow").click(function () {
const rightArrowParents = [];
$(this)
.parents()
.addBack()
.not("html")
.each(function () {
let entry = this.tagName.toLowerCase();
const className = this.className.trim();
if (className) {
entry += "." + className.replace(/ +/g, ".");
}
rightArrowParents.push(entry);
});
console.log(rightArrowParents.join(" "));
return false;
});
<div class=" lol multi ">
Click here
</div>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
(In the live examples, I've updated the class attribute on the div to be lol multi to demonstrate handling multiple classes.)
That uses parents to get the ancestors of the element that was clicked, removes the html element from that via not (since you started at body), then loops through creating entries for each parent and pushing them on an array. Then we use addBack to add the a back into the set, which also changes the order of the set to what you wanted (parents is special, it gives you the parents in the reverse of the order you wanted, but then addBack puts it back in DOM order). Then it uses Array#join to create the space-delimited string.
When creating the entry, we trim className (since leading and trailing spaces are preserved, but meaningless, in the class attribute), and then if there's anything left we replace any series of one or more spaces with a . to support elements that have more than one class (<p class='foo bar'> has className = "foo bar", so that entry ends up being p.foo.bar).
Just for completeness, this is one of those places where jQuery may be overkill, you can readily do this just by walking up the DOM:
$(".rightArrow").click(function () {
const rightArrowParents = [];
for (let elm = this; elm; elm = elm.parentNode) {
let entry = elm.tagName.toLowerCase();
if (entry === "html") {
break;
}
const className = elm.className.trim();
if (className) {
entry += "." + className.replace(/ +/g, ".");
}
rightArrowParents.push(entry);
}
rightArrowParents.reverse();
console.log(rightArrowParents.join(" "));
return false;
});
Live example:
$(".rightArrow").click(function () {
const rightArrowParents = [];
for (let elm = this; elm; elm = elm.parentNode) {
let entry = elm.tagName.toLowerCase();
if (entry === "html") {
break;
}
const className = elm.className.trim();
if (className) {
entry += "." + className.replace(/ +/g, ".");
}
rightArrowParents.push(entry);
}
rightArrowParents.reverse();
console.log(rightArrowParents.join(" "));
return false;
});
<div class=" lol multi ">
Click here
</div>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
There we just use the standard parentNode property (or we could use parentElement) of the element repeatedly to walk up the tree until either we run out of parents or we see the html element. Then we reverse our array (since it's backward to the output you wanted), and join it, and we're good to go.
I needed a native JS version, that returns CSS standard path (not jQuery), and deals with ShadowDOM. This code is a minor update on Michael Connor's answer, just in case someone else needs it:
function getDomPath(el) {
if (!el) {
return;
}
var stack = [];
var isShadow = false;
while (el.parentNode != null) {
// console.log(el.nodeName);
var sibCount = 0;
var sibIndex = 0;
// get sibling indexes
for ( var i = 0; i < el.parentNode.childNodes.length; i++ ) {
var sib = el.parentNode.childNodes[i];
if ( sib.nodeName == el.nodeName ) {
if ( sib === el ) {
sibIndex = sibCount;
}
sibCount++;
}
}
// if ( el.hasAttribute('id') && el.id != '' ) { no id shortcuts, ids are not unique in shadowDom
// stack.unshift(el.nodeName.toLowerCase() + '#' + el.id);
// } else
var nodeName = el.nodeName.toLowerCase();
if (isShadow) {
nodeName += "::shadow";
isShadow = false;
}
if ( sibCount > 1 ) {
stack.unshift(nodeName + ':nth-of-type(' + (sibIndex + 1) + ')');
} else {
stack.unshift(nodeName);
}
el = el.parentNode;
if (el.nodeType === 11) { // for shadow dom, we
isShadow = true;
el = el.host;
}
}
stack.splice(0,1); // removes the html element
return stack.join(' > ');
}
Here is a solution for exact matching of an element.
It is important to understand that the selector (it is not a real one) that the chrome tools show do not uniquely identify an element in the DOM. (for example it will not distinguish between a list of consecutive span elements. there is no positioning/indexing info)
An adaptation from a similar (about xpath) answer
$.fn.fullSelector = function () {
var path = this.parents().addBack();
var quickCss = path.get().map(function (item) {
var self = $(item),
id = item.id ? '#' + item.id : '',
clss = item.classList.length ? item.classList.toString().split(' ').map(function (c) {
return '.' + c;
}).join('') : '',
name = item.nodeName.toLowerCase(),
index = self.siblings(name).length ? ':nth-child(' + (self.index() + 1) + ')' : '';
if (name === 'html' || name === 'body') {
return name;
}
return name + index + id + clss;
}).join(' > ');
return quickCss;
};
And you can use it like this
console.log( $('some-selector').fullSelector() );
Demo at http://jsfiddle.net/gaby/zhnr198y/
The short vanilla ES6 version I ended up using:
Returns the output I'm used to read in Chrome inspector e.g body div.container input#name
function getDomPath(el) {
let nodeName = el.nodeName.toLowerCase();
if (el === document.body) return 'body';
if (el.id) nodeName += '#' + el.id;
else if (el.classList.length)
nodeName += '.' + [...el.classList].join('.');
return getDomPath(el.parentNode) + ' ' + nodeName;
};
I moved the snippet from T.J. Crowder to a tiny jQuery Plugin. I used the jQuery version of him even if he's right that this is totally unnecessary overhead, but i only use it for debugging purpose so i don't care.
Usage:
Html
<html>
<body>
<!-- Two spans, the first will be chosen -->
<div>
<span>Nested span</span>
</div>
<span>Simple span</span>
<!-- Pre element -->
<pre>Pre</pre>
</body>
</html>
Javascript
// result (array): ["body", "div.sampleClass"]
$('span').getDomPath(false)
// result (string): body > div.sampleClass
$('span').getDomPath()
// result (array): ["body", "div#test"]
$('pre').getDomPath(false)
// result (string): body > div#test
$('pre').getDomPath()
Repository
https://bitbucket.org/tehrengruber/jquery.dom.path
I've been using Michael Connor's answer and made a few improvements to it.
Using ES6 syntax
Using nth-of-type instead of nth-child, since nth-of-type looks for children of the same type, rather than any child
Removing the html node in a cleaner way
Ignoring the nodeName of elements with an id
Only showing the path until the closest id, if any. This should make the code a bit more resilient, but I left a comment on which line to remove if you don't want this behavior
Use CSS.escape to handle special characters in IDs and node names
~
export default function getDomPath(el) {
const stack = []
while (el.parentNode !== null) {
let sibCount = 0
let sibIndex = 0
for (let i = 0; i < el.parentNode.childNodes.length; i += 1) {
const sib = el.parentNode.childNodes[i]
if (sib.nodeName === el.nodeName) {
if (sib === el) {
sibIndex = sibCount
break
}
sibCount += 1
}
}
const nodeName = CSS.escape(el.nodeName.toLowerCase())
// Ignore `html` as a parent node
if (nodeName === 'html') break
if (el.hasAttribute('id') && el.id !== '') {
stack.unshift(`#${CSS.escape(el.id)}`)
// Remove this `break` if you want the entire path
break
} else if (sibIndex > 0) {
// :nth-of-type is 1-indexed
stack.unshift(`${nodeName}:nth-of-type(${sibIndex + 1})`)
} else {
stack.unshift(nodeName)
}
el = el.parentNode
}
return stack
}
All the examples from other ответов did not work very correctly for me, I made my own, maybe my version will be more suitable for the rest
const getDomPath = element => {
let templateElement = element
, stack = []
for (;;) {
if (!!templateElement) {
let attrs = ''
for (let i = 0; i < templateElement.attributes.length; i++) {
const name = templateElement.attributes[i].name
if (name === 'class' || name === 'id') {
attrs += `[${name}="${templateElement.getAttribute(name)}"]`
}
}
stack.push(templateElement.tagName.toLowerCase() + attrs)
templateElement = templateElement.parentElement
} else {
break
}
}
return stack.reverse().slice(1).join(' > ')
}
const currentElement = document.querySelectorAll('[class="serp-item__thumb justifier__thumb"]')[7]
const path = getDomPath(currentElement)
console.log(path)
console.log(document.querySelector(path))
console.log(currentElement)
var obj = $('#show-editor-button'),
path = '';
while (typeof obj.prop('tagName') != "undefined"){
if (obj.attr('class')){
path = '.'+obj.attr('class').replace(/\s/g , ".") + path;
}
if (obj.attr('id')){
path = '#'+obj.attr('id') + path;
}
path = ' ' +obj.prop('tagName').toLowerCase() + path;
obj = obj.parent();
}
console.log(path);
hello this function solve the bug related to current element not show in the path
check this now
$j(".wrapper").click(function(event) {
selectedElement=$j(event.target);
var rightArrowParents = [];
$j(event.target).parents().not('html,body').each(function() {
var entry = this.tagName.toLowerCase();
if (this.className) {
entry += "." + this.className.replace(/ /g, '.');
}else if(this.id){
entry += "#" + this.id;
}
entry=replaceAll(entry,'..','.');
rightArrowParents.push(entry);
});
rightArrowParents.reverse();
//if(event.target.nodeName.toLowerCase()=="a" || event.target.nodeName.toLowerCase()=="h1"){
var entry = event.target.nodeName.toLowerCase();
if (event.target.className) {
entry += "." + event.target.className.replace(/ /g, '.');
}else if(event.target.id){
entry += "#" + event.target.id;
}
rightArrowParents.push(entry);
// }
where $j = jQuery Variable
also solve the issue with .. in class name
here is replace function :
function escapeRegExp(str) {
return str.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, "\\$1");
}
function replaceAll(str, find, replace) {
return str.replace(new RegExp(escapeRegExp(find), 'g'), replace);
}
Thanks
$(".rightArrow")
.parents()
.map(function () {
var value = this.tagName.toLowerCase();
if (this.className) {
value += '.' + this.className.replace(' ', '.', 'g');
}
return value;
})
.get().reverse().join(", ");

JQuery/Javascript - Search DOM for text and insert HTML

How do I search the DOM for a certain string in the document's text (say, "cheese") then insert some HTML immediately after that string (say, "< b >is fantastic< /b >").
I have tried the following:
for (var tag in document.innerHTML) {
if (tag.matches(/cheese/) != undefined) {
document.innerHTML.append(<b>is fantastic</b>
}
}
(The above is more of an illustration of what I have tried, not the actual code. I expect the syntax is horribly wrong so please excuse any errors, they are not the problem).
Cheers,
Pete
There are native methods for finding text inside a document:
MSIE:textRange.findText()
Others: window.find()
Manipulate the given textRange if something was found.
Those methods should provide much more performance than the traversing of the whole document.
Example:
<html>
<head>
<script>
function fx(a,b)
{
if(window.find)
{
while(window.find(a))
{
var node=document.createElement('b');
node.appendChild(document.createTextNode(b));
var rng=window.getSelection().getRangeAt(0);
rng.collapse(false);
rng.insertNode(node);
}
}
else if(document.body.createTextRange)
{
var rng=document.body.createTextRange();
while(rng.findText(a))
{
rng.collapse(false);
rng.pasteHTML('<b>'+b+'</b>');
}
}
}
</script>
</head>
<body onload="fx('cheese','is wonderful')">
<p>I've made a wonderful cheesecake with some <i>cheese</i> from my <u>chees</u>e-factory!</p>
</body>
</html>
This is crude and not the way to do it, but;
document.body.innerHTML = document.body.innerHTML.replace(/cheese/, 'cheese <b>is fantastic</b>');
You can use this with JQuery:
$('*:contains("cheese")').each(function (idx, elem) {
var changed = $(elem).html().replace('cheese', 'cheese <b>is fantastic</b>');
$(elem).html(changed);
});
I haven't tested this, but something along these lines should work.
Note that * will match all elements, even html, so you may want to use body *:contains(...) instead to make sure only elements that are descendants of the document body are looked at.
Sample Solution:
<ul>
<li>cheese</li>
<li>cheese</li>
<li>cheese</li>
</ul>
Jquery codes:
$('ul li').each(function(index) {
if($(this).text()=="cheese")
{
$(this).text('cheese is fantastic');
}
});
The way to do this is to traverse the document and search each text node for the desired text. Any way involving innerHTML is hopelessly flawed.
Here's a function that works in all browsers and recursively traverses the DOM within the specified node and replaces occurrences of a piece of text with nodes copied from the supplied template node replacementNodeTemplate:
function replaceText(node, text, replacementNodeTemplate) {
if (node.nodeType == 3) {
while (node) {
var textIndex = node.data.indexOf(text), currentNode = node;
if (textIndex == -1) {
node = null;
} else {
// Split the text node after the text
var splitIndex = textIndex + text.length;
var replacementNode = replacementNodeTemplate.cloneNode(true);
if (splitIndex < node.length) {
node = node.splitText(textIndex + text.length);
node.parentNode.insertBefore(replacementNode, node);
} else {
node.parentNode.appendChild(replacementNode);
node = null;
}
currentNode.deleteData(textIndex, text.length);
}
}
} else {
var child = node.firstChild, nextChild;
while (child) {
nextChild = child.nextSibling;
replaceText(child, text, replacementNodeTemplate);
child = nextChild;
}
}
}
Here's an example use:
replaceText(document.body, "cheese", document.createTextNode("CHEESE IS GREAT"));
If you prefer, you can create a wrapper function to allow you to specify the replacement content as a string of HTML instead:
function replaceTextWithHtml(node, text, html) {
var div = document.createElement("div");
div.innerHTML = html;
var templateNode = document.createDocumentFragment();
while (div.firstChild) {
templateNode.appendChild(div.firstChild);
}
replaceText(node, text, templateNode);
}
Example:
replaceTextWithHtml(document.body, "cheese", "cheese <b>is fantastic</b>");
I've incorporated this into a jsfiddle example: http://jsfiddle.net/timdown/azZsa/
Works in all browsers except IE I think, need confirmation though.
This supports content in iframes as well.
Note, other examples I have seen, like the one above, are RECURSIVE which is potentially bad in javascript which can end in stack overflows, especially in a browser client which has limited memory for such things. Too much recursion can cause javascript to stop executing.
If you don't believe me, try the examples here yourself...
If anyone would like to contribute, the code is here.
function grepNodes(searchText, frameId) {
var matchedNodes = [];
var regXSearch;
if (typeof searchText === "string") {
regXSearch = new RegExp(searchText, "g");
}
else {
regXSearch = searchText;
}
var currentNode = null, matches = null;
if (frameId && !window.frames[frameId]) {
return null;
}
var theDoc = (frameId) ? window.frames[frameId].contentDocument : document;
var allNodes = (theDoc.all) ? theDoc.all : theDoc.getElementsByTagName('*');
for (var nodeIdx in allNodes) {
currentNode = allNodes[nodeIdx];
if (!currentNode.nodeName || currentNode.nodeName === undefined) {
break;
}
if (!(currentNode.nodeName.toLowerCase().match(/html|script|head|meta|link|object/))) {
matches = currentNode.innerText.match(regXSearch);
var totalMatches = 0;
if (matches) {
var totalChildElements = 0;
for (var i=0;i<currentNode.children.length;i++) {
if (!(currentNode.children[i].nodeName.toLowerCase().match(/html|script|head|meta|link|object/))) {
totalChildElements++;
}
}
matchedNodes.push({node: currentNode, numMatches: matches.length, childElementsWithMatch: 0, nodesYetTraversed: totalChildElements});
}
for (var i = matchedNodes.length - 1; i >= 0; i--) {
previousElement = matchedNodes[i - 1];
if (!previousElement) {
continue;
}
if (previousElement.nodesYetTraversed !== 0 && previousElement.numMatches !== previousElement.childElementsWithMatch) {
previousElement.childElementsWithMatch++;
previousElement.nodesYetTraversed--;
}
else if (previousElement.nodesYetTraversed !== 0) {
previousElement.nodesYetTraversed--;
}
}
}
}
var processedMatches = [];
for (var i =0; i < matchedNodes.length; i++) {
if (matchedNodes[i].numMatches > matchedNodes[i].childElementsWithMatch) {
processedMatches.push(matchedNodes[i].node);
}
}
return processedMatches;
};

Categories