Is it possible to generate the most specific XPath expression automatically from the position of the cursor on the web page?
The XPath expression would change with "onMouseMove event".
If it's possible, how would you implement it? Or is it already implemented in some Javascript or Python library? I would prefer it in Python with a combination of some web library but Javascript would be good and acceptable too.
See the Get XPath thread in DZone Snippets for finding the XPath. See the How do I check if the mouse is over an element in jQuery? question here for identifying when the mouse cursor is over an element.
I have answered an almost identical question (using jQuery) at Return XPath location with jQuery? Need some feedback on a function
If you change the click event to mouseenter you would have what you ask for..
$(document).delegate('*','mouseenter',function(){
var path = $(this).parents().andSelf();
var xpath='/';
for (var i = 0; i < path.length; i++)
{
var nd = path[i].nodeName.toLowerCase();
xpath += '/';
if (nd != 'html' && nd != 'body')
{
xpath += nd;
if (path[i].id != '')
{
xpath += '#' + path[i].id;
}
else
{
xpath += '['+ ($(path[i-1]).children().index(path[i])+1) +']';
}
if (path[i].className != '')
xpath += '.' + path[i].className;
}
else
{xpath += nd;}
}
$('#xpath').html(xpath); // show the xpath in an element with id xpath..
return false;
});
Demo at http://jsfiddle.net/gaby/hsv97/25/
Update with no jQuery used.. (for modern browsers)
function getXpath(event){
var hierarchy = [],
current = event.srcElement||event.originalTarget;
while (current.parentNode){
hierarchy.unshift(current);
current = current.parentNode;
}
var xPath = hierarchy.map(function(el,i){
return el.nodeName.toLowerCase() + ((el.id !== '') ? '#'+el.id : '') + ((el.className !== '') ? '.'+el.className.split(' ').join('.') : '');
}).join('/');
document.getElementById('xpath').innerHTML = xPath;
return xPath;
}
if (document.addEventListener){
document.addEventListener('mouseover', getXpath, false);
} else {
document.onmouseover = getXpath;
}
Demo at http://jsfiddle.net/gaby/hsv97/29/
vanilla javascript (with indices) http://jsfiddle.net/nycu2/1/
function nodeindex(element, array) {
var i,
found = -1,
element_name = element.nodeName.toLowerCase(),
matched
;
for (i = 0; i != array.length; ++i) {
matched = array[i];
if (matched.nodeName.toLowerCase() === element_name) {
++found;
if (matched === element) {
return found;
}
}
}
return -1;
}
function xpath(element, suffix) {
var parent, child_index, node_name;
parent = element.parentElement;
if (parent) {
node_name = element.nodeName.toLowerCase();
child_index = nodeindex(element, parent.children) + 1;
return xpath(parent, '/' + node_name + '[' + child_index + ']' + suffix);
} else {
return '//html[1]' + suffix;
}
}
function xpathstring(event) {
var
e = event.srcElement || event.originalTarget,
path = xpath(e, '');;
document.querySelector('.xpathresult').value = path;
highlight();
}
Related
I have a JS function which searches for a string in the HTML source, and outputs the parent node:
function searchHTML(searchTerm) {
queue = [document.body],
curr
;
while (curr = queue.pop()) {
if (!curr.textContent.match(searchTerm)) continue;
for (var i = 0; i < curr.childNodes.length; ++i) {
switch (curr.childNodes[i].nodeType) {
case Node.TEXT_NODE : // 3
if (curr.childNodes[i].textContent.match(searchTerm)) {
console.log(curr);
// End of search
}
break;
case Node.ELEMENT_NODE : // 1
queue.push(curr.childNodes[i]);
break;
}
}
}
}
Currently, its output (in Javascript console) is not a string.
I need to perform regex on the output (curr), so I need it to be a string.
What I have tried:
curr = curr.toString()
curr = curr.replace(/[0-9]/g, "")
You can use .text() jQuery function to get the string from an HTML.
Here is an example of how you get string :
text= curr.text();
curr = text.replace(/[0-9]/g, "");
It seems to me you need to find the commonAncestorContainer for the term searched. That means if the term starts in a node and ends in another, you don't really have a clear definition of the common parent, until you get the a range.
I put together the function below where you can call search('My Term') and it should get a commonAncestorContainer. Some tweek should still be needed to search the same term more than once and to make sure that words ending inside an element still consider that element as the parent instead of the next one after the parent.
var search = function (searchTerm) {
// Stop if there is nothing to look for
if (!searchTerm || typeof searchTerm !== 'string')
return null;
searchTerm = searchTerm.toLowerCase();
var bodyText = document.body.textContent.toLowerCase(),
range = document.createRange(),
startOffset = bodyText.indexOf(searchTerm),
endOffset = startOffset + searchTerm.length,
iterationObject = {
index: 0,
length: bodyText.length,
startOffset: startOffset,
endOffset: endOffset,
startInNodeOffset: -1,
endInNodeOffset: -1,
startNode: null,
endNode: null
};
var textContent = function (textNode) {
return textNode.nodeValue || textNode.textContent || textNode.wholeText;
};
(function iterate (node, iterationObject) {
if (node.nodeType === 1) {
var childNodes = node.childNodes;
// Keep iterating but we should try to stop it when nodes are found
for (var i = 0, iLen = childNodes.length; i < iLen; i++)
iterate(childNodes[i], iterationObject);
} else if (node.nodeType === 3) {
var text = textContent(node),
startInNodeOffset,
endInNodeOffset;
// Change index and move on
if (iterationObject.index + text.length < iterationObject.startOffset)
iterationObject.index += text.length;
else if (iterationObject.startNode === null) {
startInNodeOffset = iterationObject.startOffset - iterationObject.index;
// Start range in the current node
// This condition should really only be needed to decide if the selection should start
// before or after this node. But that is another story.
if (startInNodeOffset <= text.length) {
iterationObject.startNode = node;
iterationObject.startInNodeOffset = startInNodeOffset;
}
iterationObject.index += text.length;
} else {
// Now try to find the endNode
if (iterationObject.index + text.length < iterationObject.endOffset)
iterationObject.index += text.length;
else if (iterationObject.endNode === null) {
endInNodeOffset = iterationObject.endOffset - iterationObject.index;
if (endInNodeOffset <= text.length) {
iterationObject.endNode = node;
iterationObject.endInNodeOffset = endInNodeOffset;
}
}
}
}
if (iterationObject.startNode !== null && iterationObject.endNode !== null)
return;
})(document.body, iterationObject);
if (iterationObject.startInNodeOffset > -1 && iterationObject.endInNodeOffset > -1) {
range.setStart(iterationObject.startNode, iterationObject.startInNodeOffset);
range.setEnd(iterationObject.endNode, iterationObject.endInNodeOffset);
return range.commonAncestorContainer;
}
return null;
};
If you are using jQuery, you can try outerHTML to get the string from the commonAncestorContainer.
var parentElement = search('Whatever'),
result = '';
if (parentElement !== null)
result = $(parentElement).outerHTML();
You can create a temporary DOM node, and then append curr to it. Then get the innerHTML and the result will be a string:
var tempNode = document.createElement("div");
tempNode.appendChild(curr);
console.log(temp.innerHTML);
Working example here:
http://codepen.io/anon/pen/QypxwM
I have the following HTML structure:
<div class="content">
<p>somecontent</p>
<p>another content <span id="name-1">content</span> 1234214</p>
</div>
I want to wrap only numbers in additional span (1234214). So far I've made this:
jQuery(window).load(function() {
jQuery('.content p').html(function(index, value) {
return value.replace(/(\d+)/g, '<span class="mathjaxfont">$1</span>');
});
});
However this replaces the 1 in span id. How can I exclude checking element attributes?
You might want not only to exclude attributes (think about the h1-element for example) but constrain your replacing on the text nodes. See this questions for some ideas on how to get only and work with text nodes: How do I select text nodes with jQuery?
This answer in above question How do I select text nodes with jQuery? gives you a collection of text-nodes on which you can do your string-replacing.
You should use .contents() and .replaceWith() for this:
jQuery('.content p').contents().each(function() {
var method = this.nodeType == 1 ? 'html' : 'replaceWith';
$(this)[method](this.textContent.replace(
/(\d+)/g,
'<span class="mathjaxfont">$1</span>'
));
});
Here's a JSFiddle.
Long and hard solution, but should work in nested elements.
The idea is to handle element's .html() string character by character, wrapping numbers when they are found, but omitting numbers inside tags' definition.
Fiddle.
$(document).ready(function()
{
$('.content p').each(function()
{
$(this).html(handleHtml($(this).html()));
});
});
function handleHtml(html)
{
var resultHtml = "";
var numberStr = "";
var re = /[0-9]/;
var isTag = false, quote = "";
for (var i = 0; i < html.length; i++)
{
var char = html.substr(i, 1);
if (!isTag && re.test(char))
{
numberStr += char;
}
else
{
if (numberStr)
{
resultHtml += wrapNumber(numberStr);
numberStr = "";
}
resultHtml += char;
if (isTag && !quote && (char == '"' || char == "'"))
{
quote = char;
}
else if (quote && quote == char)
{
quote = "";
}
if (char == '<')
{
isTag = true;
}
else if (!quote && char == '>')
{
isTag = false;
}
}
}
if (numberStr)
{
resultHtml += wrapNumber(numberStr);
}
return resultHtml;
}
function wrapNumber(number)
{
return '<span class="mathjaxfont">' + number+ "</span>";
}
I have this little javascript to add some classes to some hyperlinks to give them a nice effect. It works on any links inside an element with the class linkroll. However, when the link is nested within some elements and div's, it doesnt apply the effect. I am finding that querySelectorAll is not very consistent.
I was thinking if I rewrite this and use something like jQuery's each() function, I may have better results. Here's how it looks now:
var supports3DTransforms = document.body.style['webkitPerspective'] !== undefined || document.body.style['MozPerspective'] !== undefined;
function linkify() {
if (supports3DTransforms) {
var selector = '.linkroll a';
var nodes = document.querySelectorAll(selector);
for (var i = 0, len = nodes.length; i < len; i++) {
var node = nodes[i];
var sibling = node.nextSibling; // Do not apply to images
if (sibling.nodeName != "img") {
if (!node.className || !node.className.match(/roll/g)) {
node.className += ' roll';
node.innerHTML = '<span data-title="' + node.text + '">' + node.innerHTML + '</span>';
}
}
};
}
}
linkify();
How could I possibly rewrite document.querySelectorAll(selector); and use something like jQuery's each() instead?
Try this:
var supports3DTransforms = $('body').css('-webkit-perspective') !== undefined || $('body').css('MozPerspective') !== undefined;
function linkify() {
if (supports3DTransforms) {
$('.linkroll a').each(function (i, el) {
var $el = $(el),
$sibling = $el.next();
!$sibling.is('img') && !$el.hasClass('roll') && $el.addClass('roll').wrap('<span data-title="' + $el.text() + '"></span>');
});
}
}
linkify();
Example fiddle
I am having issues figuring out how to resolve the getElementsByClassName issue in IE. How would I best implement the robert nyman (can't post the link to it since my rep is only 1) resolution into my code? Or would a jquery resolution be better? my code is
function showDesc(name) {
var e = document.getElementById(name);
//Get a list of elements that have a class name of service selected
var list = document.getElementsByClassName("description show");
//Loop through those items
for (var i = 0; i < list.length; ++i) {
//Reset all class names to description
list[i].className = "description";
}
if (e.className == "description"){
//Set the css class for the clicked element
e.className += " show";
}
else{
if (e.className == "description show"){
return;
}
}}
and I am using it on this page dev.msmnet.com/services/practice-management to show/hide the description for each service (works in Chrome and FF). Any tips would be greatly appreciated.
I was curious to see what a jQuery version of your function would look like, so I came up with this:
function showDesc(name) {
var e = $("#" + name);
$(".description.show").removeClass("show");
if(e.attr("class") == "description") {
e.addClass("show");
} else if(e.hasClass("description") && e.hasClass("show")) {
return;
}
}
This should support multiple classes.
function getElementsByClassName(findClass, parent) {
parent = parent || document;
var elements = parent.getElementsByTagName('*');
var matching = [];
for(var i = 0, elementsLength = elements.length; i < elementsLength; i++){
if ((' ' + elements[i].className + ' ').indexOf(findClass) > -1) {
matching.push(elements[i]);
}
}
return matching;
}
You can pass in a parent too, to make its searching the DOM a bit faster.
If you want getElementsByClassName('a c') to match HTML <div class="a b c" /> then try changing it like so...
var elementClasses = elements[i].className.split(/\s+/),
matchClasses = findClass.split(/\s+/), // Do this out of the loop :)
found = 0;
for (var j = 0, elementClassesLength = elementClasses.length; j < elementClassesLength; j++) {
if (matchClasses.indexOf(elementClasses[j]) > -1) {
found++;
}
}
if (found == matchClasses.length) {
// Push onto matching array
}
If you want this function to only be available if it doesn't already exist, wrap its definition with
if (typeof document.getElementsByClassName != 'function') { }
Even easier jQuery solution:
$('.service').click( function() {
var id = "#" + $(this).attr('id') + 'rt';
$('.description').not(id).hide();
$( id ).show();
}
Why bother with a show class if you are using jQuery?
Heres one I put together, reliable and possibly the fastest. Should work in any situation.
function $class(className) {
var children = document.getElementsByTagName('*') || document.all;
var i = children.length, e = [];
while (i--) {
var classNames = children[i].className.split(' ');
var j = classNames.length;
while (j--) {
if (classNames[j] == className) {
e.push(children[i]);
break;
}
}
}
return e;
}
I used to implement HTMLElement.getElementByClassName(), but at least Firefox and Chrome, only find the half of the elements when those elements are a lot, instead I use something like (actually it is a larger function):
getElmByClass(clm, parent){
// clm: Array of classes
if(typeof clm == "string"){ clm = [clm] }
var i, m = [], bcl, re, rm;
if (document.evaluate) { // Non MSIE browsers
v = "";
for(i=0; i < clm.length; i++){
v += "[contains(concat(' ', #"+clc+", ' '), ' " + base[i] + " ')]";
}
c = document.evaluate("./"+"/"+"*" + v, parent, null, 5, null);
while ((node = c.iterateNext())) {
m.push(node);
}
}else{ // MSIE which doesn't understand XPATH
v = elm.getElementsByTagName('*');
bcl = "";
for(i=0; i < clm.length; i++){
bcl += (i)? "|":"";
bcl += "\\b"+clm[i]+"\\b";
}
re = new RegExp(bcl, "gi");
for(i = 0; i < v.length; i++){
if(v.className){
rm = v[i].className.match(bcl);
if(rm && rm.length){ // sometimes .match returns an empty array so you cannot use just 'if(rm)'
m.push(v[i])
}
}
}
}
return m;
}
I think there would be a faster way to iterate without XPATH, because RegExp are slow (perhaps a function with .indexOf, it shuld be tested), but it is working well
You can replace getElementsByClassName() with the following:
function getbyclass(n){
var elements = document.getElementsByTagName("*");
var result = [];
for(z=0;z<elements.length;z++){
if(elements[z].getAttribute("class") == n){
result.push(elements[z]);
}
}
return result;
}
Then you can use it like this:
getbyclass("description") // Instead of document.getElementsByClassName("description")
Let's say I have a large HTML file with different kinds of tags, similar to the StackOverflow one you're looking at right now.
Now let's say you click an element on the page, what would the Javascript function look like that calculates the most basic XPath that refers to that specific element?
I know there are an infinite ways of refering to that element in XPath, but I'm looking for something that just looks at the DOM tree, with no regard for IDs, classes, etc.
Example:
<html>
<head><title>Fruit</title></head>
<body>
<ol>
<li>Bananas</li>
<li>Apples</li>
<li>Strawberries</li>
</ol>
</body>
</html>
Let's say you click on Apples. The Javascript function would return the following:
/html/body/ol/li[2]
It would basically just work its way upward the DOM tree all the way to the HTML element.
Just to clarify, the 'on-click' event-handler isn't the problem. I can make that work. I'm just not sure how to calculate the element's position within the DOM tree and represent it as an XPath.
PS
Any answer with or without the use of the JQuery library is appreciated.
PPS
I completely new to XPath, so I might even have made a mistake in the above example, but you'll get the idea.
Edit at August 11, 2010: Looks like somebody else asked a similar question: generate/get the Xpath for a selected textnode
Firebug can do this, and it's open source (BSD) so you can reuse their implementation, which does not require any libraries.
3rd party edit
This is an extract from the linked source above. Just in case the link above will change. Please check the source to benefit from changes and updates or the full featureset provided.
Xpath.getElementXPath = function(element)
{
if (element && element.id)
return '//*[#id="' + element.id + '"]';
else
return Xpath.getElementTreeXPath(element);
};
Above code calls this function.
Attention i added some line-wrapping to avoid horizontal scroll bar
Xpath.getElementTreeXPath = function(element)
{
var paths = []; // Use nodeName (instead of localName)
// so namespace prefix is included (if any).
for (; element && element.nodeType == Node.ELEMENT_NODE;
element = element.parentNode)
{
var index = 0;
var hasFollowingSiblings = false;
for (var sibling = element.previousSibling; sibling;
sibling = sibling.previousSibling)
{
// Ignore document type declaration.
if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
continue;
if (sibling.nodeName == element.nodeName)
++index;
}
for (var sibling = element.nextSibling;
sibling && !hasFollowingSiblings;
sibling = sibling.nextSibling)
{
if (sibling.nodeName == element.nodeName)
hasFollowingSiblings = true;
}
var tagName = (element.prefix ? element.prefix + ":" : "")
+ element.localName;
var pathIndex = (index || hasFollowingSiblings ? "["
+ (index + 1) + "]" : "");
paths.splice(0, 0, tagName + pathIndex);
}
return paths.length ? "/" + paths.join("/") : null;
};
A function I use to get an XPath similar to your situation, it uses jQuery:
function getXPath( element )
{
var xpath = '';
for ( ; element && element.nodeType == 1; element = element.parentNode )
{
var id = $(element.parentNode).children(element.tagName).index(element) + 1;
id > 1 ? (id = '[' + id + ']') : (id = '');
xpath = '/' + element.tagName.toLowerCase() + id + xpath;
}
return xpath;
}
Small, powerfull and pure-js function
It returns xpath for the element and elements iterator for xpath.
https://gist.github.com/iimos/e9e96f036a3c174d0bf4
function xpath(el) {
if (typeof el == "string") return document.evaluate(el, document, null, 0, null)
if (!el || el.nodeType != 1) return ''
if (el.id) return "//*[#id='" + el.id + "']"
var sames = [].filter.call(el.parentNode.children, function (x) { return x.tagName == el.tagName })
return xpath(el.parentNode) + '/' + el.tagName.toLowerCase() + (sames.length > 1 ? '['+([].indexOf.call(sames, el)+1)+']' : '')
}
Probably you will need to add a shim for IE8 that don't support the [].filter method: this MDN page gives such code.
Usage
Getting xpath for node:
var xp = xpath(elementNode)
Executing xpath:
var iterator = xpath("//h2")
var el = iterator.iterateNext();
while (el) {
// work with element
el = iterator.iterateNext();
}
The firebug implementation can be modified slightly to check for element.id further up the dom tree:
/**
* Gets an XPath for an element which describes its hierarchical location.
*/
var getElementXPath = function(element) {
if (element && element.id)
return '//*[#id="' + element.id + '"]';
else
return getElementTreeXPath(element);
};
var getElementTreeXPath = function(element) {
var paths = [];
// Use nodeName (instead of localName) so namespace prefix is included (if any).
for (; element && element.nodeType == 1; element = element.parentNode) {
var index = 0;
// EXTRA TEST FOR ELEMENT.ID
if (element && element.id) {
paths.splice(0, 0, '/*[#id="' + element.id + '"]');
break;
}
for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling) {
// Ignore document type declaration.
if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
continue;
if (sibling.nodeName == element.nodeName)
++index;
}
var tagName = element.nodeName.toLowerCase();
var pathIndex = (index ? "[" + (index+1) + "]" : "");
paths.splice(0, 0, tagName + pathIndex);
}
return paths.length ? "/" + paths.join("/") : null;
};
I have just modified DanS' solution in order to use it with textNodes. Very useful to serialize HTML range object.
/**
* Gets an XPath for an node which describes its hierarchical location.
*/
var getNodeXPath = function(node) {
if (node && node.id)
return '//*[#id="' + node.id + '"]';
else
return getNodeTreeXPath(node);
};
var getNodeTreeXPath = function(node) {
var paths = [];
// Use nodeName (instead of localName) so namespace prefix is included (if any).
for (; node && (node.nodeType == 1 || node.nodeType == 3) ; node = node.parentNode) {
var index = 0;
// EXTRA TEST FOR ELEMENT.ID
if (node && node.id) {
paths.splice(0, 0, '/*[#id="' + node.id + '"]');
break;
}
for (var sibling = node.previousSibling; sibling; sibling = sibling.previousSibling) {
// Ignore document type declaration.
if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
continue;
if (sibling.nodeName == node.nodeName)
++index;
}
var tagName = (node.nodeType == 1 ? node.nodeName.toLowerCase() : "text()");
var pathIndex = (index ? "[" + (index+1) + "]" : "");
paths.splice(0, 0, tagName + pathIndex);
}
return paths.length ? "/" + paths.join("/") : null;
};
There is nothing built in to get the xpath of an HTML element, but the reverse is common for example using the jQuery xpath selector.
If you need to determine the xpath of an HTML element you will have to provide a custom function to do this. Here are a couple of example javascript/jQuery impls to calculate the xpath.
Just for fun, an XPath 2.0 one line implementation:
string-join(ancestor-or-self::*/concat(name(),
'[',
for $x in name()
return count(preceding-sibling::*
[name() = $x])
+ 1,
']'),
'/')
The solution below is preferable if you need to reliably determine the absolute XPath of an element.
Some other answers either rely partly on the element id (which is not reliable since there can potentially be multiple elements with identical ids) or they generate XPaths that actually specify more elements than the one given (by erroneously omitting the sibling index in certain circumstances).
The code has been adapted from Firebug's source code by fixing the above-mentioned problems.
getXElementTreeXPath = function( element ) {
var paths = [];
// Use nodeName (instead of localName) so namespace prefix is included (if any).
for ( ; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode ) {
var index = 0;
for ( var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling ) {
// Ignore document type declaration.
if ( sibling.nodeType == Node.DOCUMENT_TYPE_NODE ) {
continue;
}
if ( sibling.nodeName == element.nodeName ) {
++index;
}
}
var tagName = element.nodeName.toLowerCase();
// *always* include the sibling index
var pathIndex = "[" + (index+1) + "]";
paths.unshift( tagName + pathIndex );
}
return paths.length ? "/" + paths.join( "/") : null;
};
function getPath(event) {
event = event || window.event;
var pathElements = [];
var elem = event.currentTarget;
var index = 0;
var siblings = event.currentTarget.parentNode.getElementsByTagName(event.currentTarget.tagName);
for (var i=0, imax=siblings.length; i<imax; i++) {
if (event.currentTarget === siblings[i] {
index = i+1; // add 1 for xpath 1-based
}
}
while (elem.tagName.toLowerCase() != "html") {
pathElements.unshift(elem.tagName);
elem = elem.parentNode;
}
return pathElements.join("/") + "[" + index + "]";
}
EDITED TO ADD SIBLING INDEX INFORMATION
Use https://github.com/KajeNick/jquery-get-xpath
<script src="https://code.jquery.com/jquery-3.4.1.min.js"></script>
<script src="../src/jquery-get-xpath.js"></script>
<script>
jQuery(document).ready(function ($) {
$('body').on('click', 'ol li', function () {
let xPath = $(this).jGetXpath();
console.log(xPath);
});
});
</script>
Console will show: /html/body/ol/li[2]
I have came across this problem and found it hard to solve fully. as in my case it was giving half xpath. so i modified it a little to give full path. here is my answer.
window.onclick = (e) => {
let pathArr = e.path;
let element = pathArr[0];
var xpath = '';
if(pathArr.length<=2 && pathArr[0].nodeType!=1){
for (let i = 0; i < pathArr.length - 1 && pathArr[i].nodeType == 1; i++) {
element = pathArr[i];
var id = $(element.parentNode).children(element.tagName).index(element) + 1;
id > 1 ? (id = '[' + id + ']') : (id = '');
xpath = '/' + element.tagName.toLowerCase() + id + xpath;
}
}
else{
xpath="/html/document"
}
return xpath;