cheerio missing div elements, why? - javascript

Here's a very simple example so that you can reproduce the bug quickly. Thanks.
// main.js
const fs = require('fs');
const cheerio = require('cheerio');
let data = fs.readFileSync("tmp.html", 'utf8');
let data2 = fs.readFileSync("tmp2.html", 'utf8');
const $ = cheerio.load(data);
const $2 = cheerio.load(data2);
$('body idx\\:entry').eq(0).text()
// '
//
// abaniquear
//
// '
// -> Umm, what's happening?
// -> Under the `idx:entry` element, there are three children, `idx:orth`, `div`, `div`.
// -> But in the above only the first two `idx:orth`, `div` has been detected thus `text()` returns only this
// -> while it should return `abanicar .. vt .. (Andes) .. see also: abanicar`.
// -> weird, right??
$2('body idx\\:entry').eq(0).text()
// '
//
// abaniqueo
//
//
//
//
// m
//
//
// fanning
//
//
//
// '
// -> Yes, this is a normal output.
// -> It `text()`-ed the three `idx:orth`, `div`, `div` children elements.
<!-- tmp.html -->
<html>
<body>
<mbp:pagebreak /><a id="filepos22099" /><mbp:frameset>
<idx:entry scriptable="yes"
><idx:orth value="abaniquear"></idx:orth>
<div>
<div><b>abaniquear</b></div>
</div>
<div>
<div>
<div>
<span><i>vt</i></span>
</div>
<div>
<span>
<span><i>(Andes)</i></span></span
>
<div>see also: abanicar</div>
</div>
</div>
</div>
</idx:entry>
<hr />
</mbp:frameset>
<mbp:pagebreak />
</body>
</html>
<!-- tmp2.html -->
<html>
<body>
<mbp:pagebreak /><a id="filepos22099" /><mbp:frameset>
<idx:entry scriptable="yes"
><idx:orth value="abaniqueo"
><idx:infl> <idx:iform name="" value="abaniqueos" /></idx:infl
></idx:orth>
<div>
<div><b>abaniqueo</b></div>
</div>
<div>
<div>
<div>
<span><i>m</i></span>
</div>
<div>
<span> <span>fanning</span></span>
</div>
</div>
</div>
</idx:entry>
<hr />
</mbp:frameset>
<mbp:pagebreak />
</body>
</html>
SO told me It looks like your post is mostly code; please add some more details. so I'll say again here,
Umm, what's happening?
In the $('body idx\\:entry').eq(0).text() where tmp.html is loaded,
under the idx:entry element, there are three children, idx:orth, div, div.
But in the above only the first two idx:orth, div has been detected thus text() returns only this while it should return abanicar .. vt .. (Andes) .. see also: abanicar.
weird, right??
In the $2('body idx\\:entry').eq(0).text() where tmp2.html is loaded,
Yes, this is a normal output.
It text()-ed the three idx:orth, div, div children elements.
So, how can I fix this bug? Thanks, again.

Solved the problem by specifying xml: true.
const $ = cheerio.load(data, {
xml: true,
});

Related

How to pick out span tag that doesn't have a class

Using document.getElementsByClassName("span3 pickItem").outerHTML) I set a variable htmlData to contain:
<div itemscope="" class="span3 pickItem">
<p itemprop="name" class="name">
<a href="/user/view?id=4943">
<span>John Doe</span>
<br />
<span>'Arizona'</span>
<br />
<span>'Student'</span>
</a>
</p>
</div>
How can I pick each value from the span tag and console.log them as such:
console.log(...span[0]) output: John Doe
console.log(...span[1]) output: Arizona
console.log(...span[2]) output: Student
Could do something like this
let namesArr = [];
let name = document.querySelectorAll("span");
name.forEach(function(names) {
namesArr.push(names.innerHTML);//Stores all names in array so you can access later
});
console.log(namesArr[0]);
console.log(namesArr[1]);
console.log(namesArr[2]);
Something like this should work:
// console log all names
const items = document.querySelectorAll('div.pickItem span')
items.forEach(item => {
console.log(item.innerText)
})
// console each from array index
console.log(items[0].innerText)
console.log(items[1].innerText)
console.log(items[2].innerText)
let spans = document.getElementsByTagName('span');
console.log(spans[0].innerHTML); //'Joe Doe'
You don't even need the htmlData variable because the DOM elements already exist. If you want to learn about parsing a string of HTML (this is what your htmlData variable has in it) into DOM elements, you can reivew DOMParser.parseFromString() - Web APIs | MDN.
Select the anchor
Select its child spans and map their textContent properties
function getTextFromSpan (span) {
// Just return the text as-is:
// return span.textContent?.trim() ?? '';
// Or, you can also remove the single quotes from the text value if they exist:
const text = span.textContent?.trim() ?? '';
const singleQuote = `'`;
const hasQuotes = text.startsWith(singleQuote) && text.endsWith(singleQuote);
return hasQuotes ? text.slice(1, -1) : text;
}
const anchor = document.querySelector('div.span3.pickItem > p.name > a');
const spanTexts = [...anchor.querySelectorAll(':scope > span')].map(getTextFromSpan);
for (const text of spanTexts) {
console.log(text);
}
<div itemscope="" class="span3 pickItem">
<p itemprop="name" class="name">
<a href="/user/view?id=4943">
<span>John Doe</span>
<br />
<span>'Arizona'</span>
<br />
<span>'Student'</span>
</a>
</p>
</div>

get a object value from a json API by Index in a forEach loop in JavaScript?

I am trying to learn how to use fetch() APIs this weekend...
and I saw this interesting API service, and I tried to learn how to use this
and I get a little problem, with javascript
the problem
I want to get the data from a .Json (and this works fine),
but when I want to put the values in the <div> and getting by object[index] is not showing anything
from what I know it seems possible,
but in this case, is not (...I search everywhere on the internet, no result)
basically...
this don't work object[index]; //index variable, is a number
this works object.object1; //normal method
what I tried
yes, I tried the traditional method using obj1.obj2 and is working fine, with the result I want!
but is not efficient, like I want.
because I want to get the values by index
and put the value in the <div>
with the index of the NodeListOf<element>
complete code, I wrote
open the snippet to see the code
let orarioText = document.querySelectorAll('.orario');
const fetchPreghieraTime = async() => {
const data = await fetch('http://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8');
const orarioJson = await data.json();
const orario = orarioJson.data.timings;
orarioText.forEach((item, index) => {
item.textContent = orario[index];
console.log(item.textContent + "" + index);
});
}
fetchPreghieraTime();
<div class="container">
<div class="tempo-preghiera-container">
<!-- 1 -->
<div class="tempo-fajr">
<div class="nome-preghiera">fajr</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 2 -->
<div class="tempo-duhr">
<div class="nome-preghiera">duhr</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 3 -->
<div class="tempo-asr">
<div class="nome-preghiera">asr</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 4 -->
<div class="tempo-maghrib">
<div class="nome-preghiera">maghrib</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 5 -->
<div class="tempo-isha">
<div class="nome-preghiera">isha</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
</div>
</div>
<script src="./script.js"></script>
What do I mean by efficient?
this is the idea to write less, simpler code:
elementClass.forEach((item, index) => {
item.textContent = object[index];
});
you can see how inefficient method below, that is working
elementClass[0].textContent = object.Fajr;
elementClass[1].textContent = object.Dhuhr;
elementClass[2].textContent = object.Asr;
elementClass[3].textContent = object.Maghrib;
elementClass[4].textContent = object.Isha;
if you can I want the less code, or the simpler solution
( I don't want you to give a faster program possible, no no, for me if is simple logic that is enough for me )
(think if I need to write all the name of the object if there is like 50 items, etc..., that is why)
the first Idea is coming to my mind because of arrays...
and in arrays, you can use brackets with a number, that start with 0, and that is (is not working)
the problem
code doesn't work
let orarioText = document.querySelectorAll('.orario');
//there are 5 elements with the same class
orarioText.forEach((item, index) => {
item.textContent = orarioJson.data.timings[index];
});
this is WORKING fine
let orarioText = document.querySelectorAll('.orario');
//there are 5 elements with the same class
orarioText.forEach((item, index) => {
item.textContent = orarioJson.data.timings.Fajr; //here you see the [index] is replaced by the actual name
});
if you want to try also, here is the API I used
here is the API service link, I usedhttp://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8
and this is how it looks:
{"code":200,"status":"OK","data":{"timings":{"Fajr":"05:31","Sunrise":"07:19","Dhuhr":"12:37","Asr":"15:26","Sunset":"17:56","Maghrib":"17:56","Isha":"19:26","Imsak":"05:21","Midnight":"00:37"}
in case, is not clear the problem to you:
you can write a comment, asking for more info, I will answer it :)
short summary of what I asked
I want that this HTML
<!-- 0 -->
<div class="myClass"></div>
<!-- 1 -->
<div class="myClass"></div>
<!-- 2 -->
<div class="myClass"></div>
<!-- 3 -->
<div class="myClass"></div>
<!-- 4 -->
<div class="myClass"></div>
to become like this HTML after JS
<!-- 0 -->
<div class="myClass">obj1 value</div>
<!-- 1 -->
<div class="myClass">obj2 value</div>
<!-- 2 -->
<div class="myClass">obj3 value</div>
<!-- 3 -->
<div class="myClass">obj4 value</div>
<!-- 4 -->
<div class="myClass">obj5 value</div>
I hope there is someone amazing helpful developer,
who have more experience,
that can help me
(and help also the future developers who see this question)
and thank you all the community!
You can get the property name that you need from the parent element. It has it in its class name "tempo-....". It just needs one change in the HTML, as you used a different spelling for dhurh. So align that with the spelling in the JSON response.
Here is how you can extract that name from that "tempo" class and then use it to access the timing from the response object:
Find the parent element with .parentNode
Get the class attribute value with .className
Extract the part after "tempo-" using .match and the first entry in the returned array
Convert the first letter to a capital and the rest to lowercase.
Use it as a dynamic property
let orarioText = document.querySelectorAll('.orario');
const fetchPreghieraTime = async() => {
const data = await fetch('http://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8');
const orarioJson = await data.json();
const orario = orarioJson.data.timings;
orarioText.forEach((item, index) => {
let name = item.parentNode.className.match(/(?<=\btempo-)\w+/)[0];
item.textContent = orario[name[0].toUpperCase() + name.slice(1).toLowerCase()];
});
}
fetchPreghieraTime();
<div class="container">
<div class="tempo-preghiera-container">
<!-- 1 -->
<div class="tempo-fajr">
<div class="nome-preghiera">fajr</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 2 -->
<div class="tempo-dhuhr">
<div class="nome-preghiera">duhr</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 3 -->
<div class="tempo-asr">
<div class="nome-preghiera">asr</div>
<div class="orario ">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 4 -->
<div class="tempo-maghrib">
<div class="nome-preghiera">maghrib</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
<!-- 5 -->
<div class="tempo-isha">
<div class="nome-preghiera">isha</div>
<div class="orario">error</div>
<!-- this error text, It will change dinamically with JS -->
</div>
</div>
</div>
<script src="./script.js"></script>
I will answer the same question, so I will help someone in the future :)
short answer
with .JSON format,
❌ you can't use the bracket with a number variable
orario[index]
✅ inside the brackets, you need to put a string (for example ["Fajr"], and is like writing .Fajr)
and so there isn't any method to access this by JSON!
but... I found a solution for you!
like you said also in the question, this method works very well in arrays
so we need to do something like this pseudocode:
FETCH -> JSON -> ARRAY -> ARRAY[index]
I saw on the internet that javascript, has this functionality, so you can use it too!
Object.values(orario);
more details about: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/values
so...
I put this in variable called orarioArray, so is simple for you!
const orarioArray = Object.values(orario);
now what you need is only add this to your forEach loop
orarioText.forEach((item, index) => {
item.textContent = orarioArray[index];
});
edited complete code:
let allTempoContainer = document.querySelector('.all-tempo-container');
let orarioArrayValue;
let orarioArrayName;
const fetchPreghieraTime = async() => {
const data = await fetch('http://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8');
const orarioJson = await data.json();
const orario = orarioJson.data.timings;
orarioArrayValue = Object.values(orario);
orarioArrayName = Object.keys(orario);
for (let index = 0; index < orarioArrayName.length; index++) {
createOrarioCard(index);
}
}
function createOrarioCard(index) {
var OrarioCardTemplate = document.querySelectorAll("template")[0];
var OrarioCardClone = OrarioCardTemplate.content.cloneNode(true);
allTempoContainer.appendChild(OrarioCardClone);
let orarioText = document.querySelectorAll('.orario');
let preghieraText = document.querySelectorAll('.nome-preghiera');
preghieraText[index].textContent = orarioArrayName[index];
orarioText[index].textContent = orarioArrayValue[index];
}
fetchPreghieraTime();
<div class="container">
<div class="all-tempo-container">
<!-- here it will generate the code -->
</div>
</div>
<template>
<div class="orario-container" style="display: flex; justify-content: space-between;">
<div class="nome-preghiera">loading...</div>
<div class="orario">loading...</div>
</div>
</template>
<script src="./script.js"></script>
previus code:
let orarioText = document.querySelectorAll('.orario');
const fetchPreghieraTime = async() => {
const data = await fetch('http://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8');
const orarioJson = await data.json();
const orario = orarioJson.data.timings;
// JSON to Array
const orarioArray = Object.values(orario);
orarioText.forEach((item, index) => {
item.textContent = orarioArray[index];
});
}
fetchPreghieraTime();
<!-- 0 -->
<div class="orario"></div>
<!-- 1 -->
<div class="orario"></div>
<!-- 2 -->
<div class="orario"></div>
<!-- 3 -->
<div class="orario"></div>
<!-- 4 -->
<div class="orario"></div>
try this
<div class="container">
<div class="tempo-preghiera-container">
</div>
</div>
const fetchPreghieraTime = async () => {
const data = await fetch(
"http://api.aladhan.com/v1/timingsByCity?city=Milano&country=Italy&method=8"
);
const orarioJson = await data.json();
const orario = orarioJson.data.timings;
let orarioText = document.querySelector(".tempo-preghiera-container");
for (const property in orario) {
if (property == "Sunset"
|| property == "Sunrise"
|| property == "Midnight" ) continue;
let div = document.createElement("div");
div.classList.add("orario");
let text = document.createTextNode(property + " - " + orario[property]);
div.appendChild(text);
orarioText.appendChild(div);
}
};
fetchPreghieraTime();
output
<div class="container">
div class="tempo-preghiera-container">
<div class="orario">Fajr - 05:30</div>
<div class="orario">Dhuhr - 12:37</div>
<div class="orario">Asr - 15:28</div>
<div class="orario">Maghrib - 17:57</div>
<div class="orario">Isha - 19:27</div>
<div class="orario">Imsak - 05:20</div>
<div class="orario">Midnight - 00:37</div>
</div>
</div>

Creating a template out of HTML Elements

lets say i have a parent-div. And in this div-container, i want to display 5 elements which have all the same structure. For example:
<div class="element">
<p class="name">
</p>
<div class="logo">
</div>
</div>
Is there a way to make an object or prototype out of it, so i dont have to generate every single HTML Element with their classes and src values with the appendChild-function and Dot-Notations in a for-loop?
Im thinking of something like:
for(let i = 0; i<=5;i++){
var element = new element(class,src1,src2 ...);
}
And the "element" is defined in a external class file or something familiar.
Im a beginner, so please show mercy :)
You'll need to clone the node from the template's content. For example:
const templateElement = document.querySelector("#someTemplate")
.content
.querySelector(".element");
// create an Array of nodes (so in memory)
const fiveNodes = [];
for (let i = 0; i < 5; i += 1) {
const nwNode = templateElement.cloneNode(true);
// ^ clone the whole tree
nwNode.querySelector("p.name").textContent += ` #${i + 1}`;
fiveNodes.push(nwNode);
}
// append the nodes to document.body
// this is faster than appending every element in the loop
fiveNodes.forEach(el => document.body.append(el));
<template id="someTemplate">
<div class="element">
<p class="name">I am node</p>
<div class="logo"></div>
</div>
</template>

Compare order of two HTML elements

I have a function which accepts two parameters, each of type HTML element. It is supposed to return which element appears first in the document order. Is there any simple way to determine this?
Template -
<body>
<div id="div1">
<div id="div2">
</div>
</div>
<div id="div3">
<div id="div4">
</div>
</div>
</body>
JS -
const elem1 = document.getElementById('div2');
const elem2 = document.getElementById('div4');
const firstAppearingElement = checkOrder(elem1, elem2); // it should return elem1
function checkOrder(element1, element2) {
// check which one appears first in dom tree
}
You can try with Node.compareDocumentPosition()
The Node.compareDocumentPosition() method compares the position of the
given node against another node in any document.
The syntax is object.compareDocumentPosition (nodeToCompare);
let first = document.getElementById('a');
let second=document.getElementById('b');
// Because the result returned by compareDocumentPosition() is a bitmask, the bitwise AND operator has to be used for meaningful results.See link above for more
if (first.compareDocumentPosition(second) & Node.DOCUMENT_POSITION_FOLLOWING) {
console.log('element with id a is before element with id b'); //
} else {
console.log('element with id a is after element with id b');
}
<div id="a"></div>
<div id="b"></div>

Javascript Elements with class / variable ID

There's a page with some HTML as follows:
<dd id="fc-gtag-VARIABLENAMEONE" class="fc-content-panel fc-friend">
Then further down the page, the code will repeat with, for example:
<dd id="fc-gtag-VARIABLENAMETWO" class="fc-content-panel fc-friend">
How do I access these elements using an external script?
I can't seem to use document.getElementByID correctly in this instance. Basically, I want to search the whole page using oIE (InternetExplorer.Application Object) created with VBScript and pull through every line (specifically VARIABLENAME(one/two/etc)) that looks like the above two into an array.
I've researched the Javascript and through trial and error haven't gotten anywhere with this specific page, mainly because there's no tag name, and the tag ID always changes at the end. Can someone help? :)
EDIT: I've attempted to use the Javascript provided as an answer to get results, however nothing seems to happen when applied to my page. I think the tag is ALSO in a tag so it's getting complicated - here's a major part of the code from the webpage I will be scanning.
<dd id="fc-gtag-INDIAN701" class="fc-content-panel fc-friend">
<div class="fc-pic">
<img src="http://image.xboxlive.com/global/t.58570942/tile/0/20400" alt="INDIAN701"/>
</div>
<div class="fc-stats">
<div class="fc-gtag">
<a class="fc-gtag-link" href='/en-US/MyXbox/Profile?gamertag=INDIAN701'>INDIAN701</a>
<div class="fc-gscore-icon">3690</div>
</div>
<div class="fc-presence-text">Last seen 9 hours ago playing Halo 3</div>
</div>
<div class="fc-actions">
<div class="fc-icon-actions">
<div class="fc-block">
<span class="fc-buttonlabel">Block User</span>
</div>
</div>
<div class="fc-text-actions">
<div class="fc-action"> </div>
<span class="fc-action">
View Profile
</span>
<span class="separator-icon">|</span>
<span class="fc-action">
Compare Games
</span>
<span class="separator-icon">|</span>
<span class="fc-action">
Send Message
</span>
<span class="separator-icon">|</span>
<span class="fc-action">
Send Friend Request
</span>
</div>
</div>
</dd>
This then REPEATS, with a different username (the above username is INDIAN701).
I tried the following but clicking the button doesn't yield any results:
<script language="vbscript">
Sub window_onLoad
Set oIE = CreateObject("InternetExplorer.Application")
oIE.visible = True
oIE.navigate "http://live.xbox.com/en-US/friendcenter/RecentPlayers?Length=12"
End Sub
</script>
<script type="text/javascript">
var getem = function () {
var nodes = oIE.document.getElementsByTagName('dd'),
a = [];
for (i in nodes) {
(nodes[i].id) && (nodes[i].id.match(/fc\-gtag\-/)) && (a.push(nodes[i]));
}
alert(a[0].id);
alert(a[1].id);
}
</script>
<body>
<input type="BUTTON" value="Try" onClick="getem()">
</body>
Basically I'm trying to get a list of usernames from the recent players list (I was hoping I wouldn't have to explain this though :) ).
var getem = function () {
var nodes = document.getElementsByTagName('dd'),
a = [];
for (var i in nodes) if (nodes[i].id) {
(nodes[i].id.match(/fc\-gtag\-/)) && (a.push(nodes[i].id.split('-')[2]));
}
alert(a[0]);
};
please try it by clicking here!
var getem = function () {
var nodes = document.getElementsByTagName('dd'),
a = [];
for (var i in nodes) if (nodes[i].id) {
(nodes[i].id.match(/fc\-gtag\-/)) && (a.push(nodes[i]));
}
alert(a[0].id);
alert(a[1].id);
};
try it out on jsbin
<body>
<script type="text/javascript">
window.onload = function () {
var outputSpan = document.getElementById('outputSpan'),
iFrame = frames['subjectIFrame'];
iFrame.document.location.href = 'http://live.xbox.com/en-US/friendcenter/RecentPlayers?Length=1';
(function () {
var nodes = iFrame.document.getElementsByTagName('dd'),
a = [];
for (var i in nodes) if (nodes[i].id) {
(nodes[i].id.match(/fc\-gtag\-/)) && (a.push(nodes[i].id.split('-')[2]));
}
for (var j in a) if (a.hasOwnProperty(j)) {
outputSpan.innerHTML += (a[j] + '<br />');
}
})();
};
</script>
<span id="outputSpan"></span>
<iframe id="subjectIFrame" frameborder="0" height="100" width="100" />
</body>
What does "I can't seem to use document.getElementsByID correctly in this instance" mean? Are you referring to the fact that you are misspelling getElementByID?
So...something like this (jQuery)?
var els = [];
$('.fc-content-panel.fc-friend').each(function() {
els.push(this));
});
Now you have an array of all the elements that have both of those classes.

Categories