Conversion of HTML as text to json - javascript

A request returns html tags as text and i am wondering how i can covert that text to json or something else usable. So the request returns this type of text:
"<ul> <li>Coffee</li> <li>Tea</li> <li>Milk</li> </ul>"
Is there a way i can convert that text inn to something usable with javascript? And some requests return much bigger chunks of html text as well.
I have tried to split on this text up but i have not achived anything useful.

An approach would be to treat the HTML as XML document, then convert it to JSON. I used an available JSON convert function and applied it to your text snippet, and it fits hopefully well. This could be an approach that would fit your needs.
let htmlContent = '<ul> <li>Coffee</li><li>Tea</li><li>Milk</li> </ul>';
function xmlToJson(xml) {
// Create the return object
var obj = {};
if (xml.nodeType == 1) { // element
// do attributes
if (xml.attributes.length > 0) {
obj["#attributes"] = {};
for (var j = 0; j < xml.attributes.length; j++) {
var attribute = xml.attributes.item(j);
obj["#attributes"][attribute.nodeName] = attribute.nodeValue;
}
}
} else if (xml.nodeType == 3) { // text
obj = xml.nodeValue;
}
// do children
if (xml.hasChildNodes()) {
for(var i = 0; i < xml.childNodes.length; i++) {
var item = xml.childNodes.item(i);
var nodeName = item.nodeName;
if (typeof(obj[nodeName]) == "undefined") {
obj[nodeName] = xmlToJson(item);
} else {
if (typeof(obj[nodeName].push) == "undefined") {
var old = obj[nodeName];
obj[nodeName] = [];
obj[nodeName].push(old);
}
obj[nodeName].push(xmlToJson(item));
}
}
}
return obj;
};
function parseXml(xml) {
var dom = null;
if (window.DOMParser) {
try {
dom = (new DOMParser()).parseFromString(xml, "text/xml");
}
catch (e) { dom = null; }
}
else if (window.ActiveXObject) {
try {
dom = new ActiveXObject('Microsoft.XMLDOM');
dom.async = false;
if (!dom.loadXML(xml)) // parse error ..
window.alert(dom.parseError.reason + dom.parseError.srcText);
}
catch (e) { dom = null; }
}
else
alert("cannot parse xml string!");
return dom;
};
let dom = parseXml(htmlContent);
let json = xmlToJson(dom);
console.log(json);

Related

How to convert xml to JSON in suitescript 2.0

I have an XML response that i want to convert it to JSON, i'm currently usingg XPath
var responseNode = xml.XPath.select({
node : xmlDocument,
xpath : '//SOAP-ENV:Envelope'
});
and it is not very efficient because to extract data from one xml tag, i have to write a lot of extra code. I tried using external libs with suitescript but they didn't worked. Is there any better way to convert XML to JSON
I have a project that needs to convert xml to json rencently, so I wrote the following function.
require(['N/xml'], function (xmlMod) {
//This function refer to https://davidwalsh.name/convert-xml-json
function xmlToJson(xmlNode) {
// Create the return object
var obj = Object.create(null);
if (xmlNode.nodeType == xmlMod.NodeType.ELEMENT_NODE) { // element
// do attributes
if (xmlNode.hasAttributes()) {
obj['#attributes'] = Object.create(null);
for (var j in xmlNode.attributes) {
if(xmlNode.hasAttribute({name : j})){
obj['#attributes'][j] = xmlNode.getAttribute({
name : j
});
}
}
}
} else if (xmlNode.nodeType == xmlMod.NodeType.TEXT_NODE) { // text
obj = xmlNode.nodeValue;
}
// do children
if (xmlNode.hasChildNodes()) {
for (var i = 0, childLen = xmlNode.childNodes.length; i < childLen; i++) {
var childItem = xmlNode.childNodes[i];
var nodeName = childItem.nodeName;
if (nodeName in obj) {
if (!Array.isArray(obj[nodeName])) {
obj[nodeName] = [
obj[nodeName]
];
}
obj[nodeName].push(xmlToJson(childItem));
} else {
obj[nodeName] = xmlToJson(childItem);
}
}
}
return obj;
};
var str = '<?xml version="1.0"?><ALEXA VER="0.9" URL="davidwalsh.name/" HOME="0" AID="="><SD TITLE="A" FLAGS="" HOST="davidwalsh.name"><TITLE TEXT="David Walsh Blog :: PHP, MySQL, CSS, Javascript, MooTools, and Everything Else">Hello World</TITLE><LINKSIN NUM="1102">Netsuite</LINKSIN><SPEED TEXT="1421" PCT="51"/></SD><SD><POPULARITY URL="davidwalsh.name/" TEXT="7131"/><REACH RANK="5952"/><RANK DELTA="-1648"/></SD></ALEXA>';
var xmlObj = xmlMod.Parser.fromString({
text: str
});
var jsonObj = xmlToJson(xmlObj.documentElement);
log.debug('jsonObj', jsonObj);
});
The cenvert function referred to David Walsh's function located at: https://davidwalsh.name/convert-xml-json
I just revised it compatible with Netsuite.
Hope it works for you.
Here's a sample function from my NetSuite environment. I did not write this,but it is currently working.
//*********** PARSE XML INTO JSON ***********
function nsXMLToJSON(node){
var obj=nsXMLToJSONDirty(node);
var cleanObj=cleanObject(obj,true);
return cleanObj;
//*********** HELPER FUNCTIONS ***********
function nsXMLToJSONDirty(node){
var obj={};
if(!'nodeType' in node){
return obj;
}
if(node.nodeType==1 || node.nodeType=='ELEMENT_NODE'){
if(Object.keys(node.attributes).length > 0){
obj["#attributes"]={};
for(var j in node.attributes){
var attribute=node.attributes[j];
if(attribute){
obj["#attributes"][attribute.name]=attribute.value;
}
}
}
}else if(node.nodeType==3 || node.nodeType=='TEXT_NODE'){
obj=node.nodeValue;
}
if(node.hasChildNodes()){
var childNodes=node.childNodes;
for(var k in childNodes){
var item=childNodes[k];
var nodeName=item.nodeName;
if(typeof (obj[nodeName])=="undefined"){
obj[nodeName]=nsXMLToJSONDirty(item); //run the function again
}else{
if(typeof (obj[nodeName].push)=="undefined"){
var old=obj[nodeName];
obj[nodeName]=[];
obj[nodeName].push(old);
}
obj[nodeName].push(nsXMLToJSONDirty(item));
}
}
}
return obj;
}
function cleanObject(myobj,recurse){
var myobjcopy=JSON.parse(JSON.stringify(myobj));
for(var i in myobjcopy){
if(recurse && typeof myobjcopy[i]==='object'){
if(i=="#text"){
delete myobjcopy[i];
} else {
//Check if it only contains a text object
if(Object.keys(myobjcopy[i]).length==1){
if(typeof myobjcopy[i]['#text'] != "undefined"){
if(myobjcopy[i]['#text'] || myobjcopy[i]['#text']==0){
myobjcopy[i]=myobjcopy[i]['#text'];
}
}
}else{
//Handle empty objects
if(Object.keys(myobjcopy[i]).length==0){
myobjcopy[i]=undefined;
}
}
if(myobjcopy[i]){
myobjcopy[i]=cleanObject(myobjcopy[i],recurse);
}
}
}
}
return myobjcopy;
}
}
create a helper.js file so the function can be shared across different scripts.
define(["N/xml"], function (xml) {
function xmlToJson(text) {
function xmlNodeToJson(xmlNode, obj) {
var sibling = xmlNode;
while (sibling) {
if (sibling.nodeType == xml.NodeType.COMMENT_NODE) {
sibling = sibling.nextSibling;
continue;
}
if (sibling.nodeType == xml.NodeType.TEXT_NODE) {
if (!!sibling.nodeValue.replace(/[\n| ]/g, ''))
obj[sibling.nodeName] = sibling.nodeValue;
sibling = sibling.nextSibling;
continue;
}
var childObj = Object.create(null);
if (!!sibling.hasAttributes()) {
Object.keys(sibling.attributes).forEach(function (key) {
childObj[key] = sibling.getAttribute({ name: key });
});
}
var value = xmlNodeToJson(sibling.firstChild, childObj);
if ((sibling.nodeName in obj)) {
if (!Array.isArray(obj[sibling.nodeName])) {
obj[sibling.nodeName] = [obj[sibling.nodeName]];
}
obj[sibling.nodeName].push(value);
} else {
obj[sibling.nodeName] = value;
}
sibling = sibling.nextSibling;
}
return obj;
}
var xmlDocument = xml.Parser.fromString({ text: text });
return xmlNodeToJson(xmlDocument.firstChild, Object.create(null));
}
return {
xmlToJson: xmlToJson
}
});
import the helper file and use the xmlToJson function in your script.
define(['N/file', '/SuiteScripts/PATH_TO_HELPER_FILE/helper'], function(file, helper) {
...
var string = file.load({ id: '/SuiteScripts/PATH_TO_FILE/filename.xml' }).getContents()
var json_object = helper.xmlToJson(string);
...
})

Filtering out empty API responses to avoid errors

Basically I've made a program where words are inputted and their definitions are found using Wordnik API. Each word is then displayed dynamically and the definition is shown on click. Here's that code:
function define(arr) {
return new Promise(function(resolve, reject) {
var client = [];
var definitions = {};
for (var i = 0, len = arr.length; i < len; i++) {
(function(i) {
client[i] = new XMLHttpRequest();
client[i].onreadystatechange = function() {
if (client[i].readyState === 4 && client[i].status === 200) {
if (client[i].responseText.length === 0) {
console.log(client[i].responseText);
client.responseText[0] = {
word: arr[i],
text: 'Definition not found'
};
}
definitions[arr[i]] = JSON.parse(client[i].responseText);
if (Object.keys(definitions).length === arr.length) {
resolve(definitions);
}
}
};
client[i].open('GET', 'http://api.wordnik.com:80/v4/word.json/' + arr[i] +
'/definitions?limit=1&includeRelated=false&sourceDictionaries=all&useCanonical=false&includeTags=false&api_key=',
true);
client[i].send();
})(i);
}
});
}
function makeFlashCards() {
var data = document.getElementById('inputText').value;
var wordsToDefine = ignore(makeArr(findUniq(data)));
define(wordsToDefine).then(function(result) {
success(result);
}).catch(function(reason) {
console.log('this shouldnt run');
});
}
function success(obj) {
document.getElementById('form').innerHTML = '';
for (var prop in obj) {
if (obj.hasOwnProperty(prop)) {
addElement('div', obj[prop][0].word);
}
}
attachDefinition(obj);
}
function addElement(type, word) {
var newElement = document.createElement(type);
var content = document.createTextNode(word);
newElement.appendChild(content);
var referenceNode = document.getElementById('form');
document.body.insertBefore(newElement, referenceNode);
newElement.id = word;
newElement.className = "flashcards";
}
function attachDefinition(obj) {
var classArr = document.getElementsByClassName('flashcards');
for (let i = 0, len = classArr.length; i < len; i++) {
classArr[i].addEventListener('click', function() {
cardClicked.call(this, obj);
});
}
}
function cardClicked(obj) {
var el = document.getElementById(this.id);
if (obj[this.id].length !== 0) {
if (this.innerHTML.split(' ').length === 1) {
var img = document.createElement('img');
img.src = 'https://www.wordnik.com/img/wordnik_badge_a2.png';
el.innerHTML = obj[this.id][0].text
+ ' ' + obj[this.id][0].attributionText + '<br>';
el.style['font-weight'] = 'normal';
el.style['font-size'] = '16px';
el.style['text-align'] = 'left';
el.style['overflow'] = 'auto';
el.appendChild(img);
} else {
el.innerHTML = obj[this.id][0].word;
el.style['font-weight'] = 'bold';
el.style['font-size'] = '36px';
el.style['text-align'] = 'center';
el.style['overflow'] = 'visible';
}
}
}
When the define function is given an array with all valid words, the program works as expected however if any word in the array argument is not valid the program doesn't add click event handlers to each element. I think this might have to do with the catch being triggered.
When an invalid word is requested Wordnik API sends back an empty array which might be the root of this problem. I tried to account for this by adding
if (client[i].responseText.length === 0) {
console.log(client[i].responseText);
client.responseText[0] = {
word: arr[i],
text: 'Definition not found'
};
but this conditional never ends up running.
I need some way of filtering out the empty array responses so the catch is not triggered and the program can run smoothly.
When you get to if (client[i].responseText.length === 0) make sure that client[i].responseText is returning empty string. It is probably undefined in which case client[i].responseText.length will throw an error and this will cause the catch block to execute.
function makePromise() {
return new Promise(function(resolve, reject) {
var test = undefined;
if (test.length === 0) {
resolve("resolved");
}
});
}
makePromise().then(console.log).catch(function(res) {
console.log('Error was thrown')
});
Try changing that condition to:
if (client[i].responseText && client[i].responseText.length === 0)

parsing xml to get the equation to be returned to python

Here is a javascript file which turns xml file to text.This text is often equation.I want this equation to be in such a way that the result of xml passed to python produce required result.any help is appreciable.
function getDOM(xmlstring) {
parser=new DOMParser();
return parser.parseFromString(xmlstring, "text/xml");
}
function remove_tags(node) {
var result = "";
var nodes = node.childNodes;
var tagName = node.tagName;
if (!nodes.length) {
if (node.nodeValue == "π") result = "pi";
else if (node.nodeValue == " ") result = "";
else result = node.nodeValue;
} else if (tagName == "mfrac") {
result = "("+remove_tags(nodes[0])+")/("+remove_tags(nodes[1])+")";
} else if (tagName == "msup") {
result = "Math.pow(("+remove_tags(nodes[0])+"),("+remove_tags(nodes[1])+"))";
} else for (var i = 0; i < nodes.length; ++i) {
result += remove_tags(nodes[i]);
}
if (tagName == "mfenced") result = "("+result+")";
if (tagName == "msqrt") result = "Math.sqrt("+result+")";
return result;
}
function stringifyMathML(mml) {
xmlDoc = getDOM(mml);
return remove_tags(xmlDoc.documentElement);
}
Example of xml file is
s = stringifyMathML(" <math><mi>sin</mi><mfenced><mi>x</mi></mfenced></math>");
alert(s);
alert(eval(s));
I am expecting output to be
math.sin(x)
Adding the specific .math part will solve the issue:
The "math." part must be added only when special keyword are present.
So first, build the potential operation you will need to cover in a list (mList)
Then, if you meet this operation, prepend it with ".math"
var mList = ['pow', 'sin', 'cos', 'pow', 'sqrt', 'π'];
function getDOM(xmlstring) {
parser=new DOMParser();
return parser.parseFromString(xmlstring, "text/xml");
}
function remove_tags(node) {
var result = "";
var nodes = node.childNodes;
var tagName = node.tagName;
if (!nodes.length) {
if(mList.indexOf(node.nodeValue) != -1 ) {
result += 'math.'
}
if (node.nodeValue == "π") result += "pi";
else if (node.nodeValue == " ") result += "";
else result += node.nodeValue;
} else if (tagName == "mfrac") {
result += "("+remove_tags(nodes[0])+")/("+remove_tags(nodes[1])+")";
} else if (tagName == "msup") {
result += "pow(("+remove_tags(nodes[0])+"),("+remove_tags(nodes[1])+"))";
} else for (var i = 0; i < nodes.length; ++i) {
result += remove_tags(nodes[i]);
}
if (tagName == "mfenced") result = "("+result+")";
if (tagName == "msqrt") result = "sqrt("+result+")";
console.log('returning', result)
return result;
}
function stringifyMathML(mml) {
xmlDoc = getDOM(mml);
return remove_tags(xmlDoc.documentElement);
}
a = stringifyMathML("<math><mi>x</mi></math>");
b = stringifyMathML("<math><mi>x</mi><mo>+</mo><mn>5</mn></math> ");
c = stringifyMathML("<math><mi>sin</mi><mfenced><mi>x</mi></mfenced></math> ");
console.log(a, 'vs x');
console.log(b, 'vs x+5');
console.log(c, 'vs math.sin(x)');
Output
x vs x
x+5 vs x+5
math.sin(x) vs math.sin(x)

convert xml to json in Javascript

I have the following javascript function (which I got from Stack Overflow) which converts XML to JSON:
function xmlToJson(xml) {
try {
var obj = {};
if (xml.nodeType == 1) {
if (xml.attributes.length > 0) {
for (var j = 0; j < xml.attributes.length; j++) {
var attribute = xml.attributes.item(j);
obj[attribute.nodeName] = attribute.nodeValue;
}
}
} else if (xml.nodeType == 3) {
obj = xml.nodeValue;
}
if (xml.hasChildNodes()) {
for (var i = 0; i < xml.childNodes.length; i++) {
var item = xml.childNodes.item(i);
var nodeName = item.nodeName;
if (typeof (obj[nodeName]) == "undefined") {
obj[nodeName] = xmlToJson(item);
} else {
if (typeof (obj[nodeName].push) == "undefined") {
var old = obj[nodeName];
obj[nodeName] = [];
obj[nodeName].push(old);
}
obj[nodeName].push(xmlToJson(item));
}
}
}
console.log(JSON.stringify(obj));
return obj;
} catch (e) {
alert(e.message);
}
}
What I want is to return it as an array ([]) when a xml node has at-least single child node and it has a parent node also. In this code it returns map ({}) if xml node has single child node but it is fine with multiple child nodes.
For example, I'd like the XML
<pnode attr1="abc">
<cnode attr2="xyz"></cnode>
</pnode>
to be transformed into the JSON
{
"pnode": {
"attr1": "abc"
},
"cnode": [
{"attr2": "xyz"}
]
}
With the clarification about what you want to achieve, here is an algorithm.
I'll leave my other answer up because I still think the wisest choice is not to play with the structure
function flattenNodes(node, isChild) {
var obj = {}, obj2, i, key, attributes = {};
if (node.attributes && node.attributes.length)
for (i = 0; i < node.attributes.length; ++i)
attributes[node.attributes[i].nodeName] = node.attributes[i].nodeValue;
if (!isChild)
obj[node.nodeName] = attributes;
else {
if (!obj.hasOwnProperty(node.nodeName))
obj[node.nodeName] = [];
else if (!(obj[node.nodeName] instanceof Array))
obj[node.nodeName] = [obj[node.nodeName]];
obj[node.nodeName].push(attributes);
}
attributes = null; // free
if (node.childNodes && node.childNodes.length)
for (i = 0; i < node.childNodes.length; ++i) {
if (node.childNodes[i].nodeType === 3) continue; // skip text node
obj2 = flattenNodes(node.childNodes[i], 1); // recurse
for (key in obj2) // merge
if (obj2.hasOwnProperty(key))
if (!obj.hasOwnProperty(key)) {
obj[key] = obj2[key];
} else {
if (!(obj[key] instanceof Array))
obj[key] = [obj[key]];
obj[key] = obj[key].concat(obj2[key]);
}
}
return obj;
}
Example usage on Node root_node
var root_node;
root_node = new DOMParser().parseFromString(
'<pnode attr1="abc"><cnode attr2="xyz"></cnode></pnode>',
'text/xml'
).documentElement;
var o = flattenNodes(root_node); // create
JSON.stringify(o); // to JSON
// {"pnode":{"attr1":"abc"},"cnode":[{"attr2":"xyz"}]}
If you have XML of the form <foo bar="baz"><foo hello="world"></foo></foo>, the first iteration will cause {foo: {bar: "baz"}}, then the second encounter will modify this to the array form of {foo: [{bar: "baz"}, {hello: "world"}]}
I would form the object representing the XML differently;
Integer nodeType
String nodeName
String nodeValue
Array childNodes
Object attributes
Now you can have the same form independent of number of child nodes/etc
function nodeToObject(node) {
var obj = {}, i;
obj.nodeType = node.nodeType;
obj.nodeName = node.nodeName;
obj.nodeValue = node.nodeValue;
obj.childNodes = [];
obj.attributes = {};
if (node.childNodes && node.childNodes.length)
for (i = 0; i < node.childNodes.length; ++i)
obj.childNodes.push(nodeToObject(node.childNodes[i]));
if (node.attributes && node.attributes.length)
for (i = 0; i < node.attributes.length; ++i)
obj.attributes[node.attributes[i].nodeName] = node.attributes[i].nodeValue;
return obj;
}
And then to transform root_node to JSON,
JSON.stringify(nodeToObject(root_node));
Going in the opposite direction is also possible in JavaScript, with some minor logic based upon nodeType to choose the creation method.

parse xml with jquery - bad xml format

i have this xml file:
<dist>
<key>keynumber1</key>
<string>value1</string>
<key>keynumber2</key>
<string>value2</string>
<key>keynumber3</key>
<string>value3</string>
<key>keynumber4</key>
<integer>value4</integer>
</dist>
how can i parse this with jquery like:
{ "dist": {"keynumber1":"value1", "keynumber2":"value2"}}
Thanks a lot for help
First step is parsing xml with jQuery using $.parseXML(str);
Then I used this pretty function, created by David Welsh
function xmlToJson(xml) {
// Create the return object
var obj = {};
if (xml.nodeType == 1) { // element
// do attributes
if (xml.attributes.length > 0) {
obj["#attributes"] = {};
for (var j = 0; j < xml.attributes.length; j++) {
var attribute = xml.attributes.item(j);
obj["#attributes"][attribute.nodeName] = attribute.nodeValue;
}
}
} else if (xml.nodeType == 3) { // text
obj = xml.nodeValue;
}
// do children
if (xml.hasChildNodes()) {
for(var i = 0; i < xml.childNodes.length; i++) {
var item = xml.childNodes.item(i);
var nodeName = item.nodeName;
if (typeof(obj[nodeName]) == "undefined") {
obj[nodeName] = xmlToJson(item);
} else {
if (typeof(obj[nodeName].length) == "undefined") {
var old = obj[nodeName];
obj[nodeName] = [];
obj[nodeName].push(old);
}
obj[nodeName].push(xmlToJson(item));
}
}
}
return obj;
};

Categories