Remove attributes from html string - error with removeAttributeNode - javascript

I am trying to remove all attributes except for certain whitelisted ones from a long html string. I am using the DOM to parse it.
I know there are a lot of other questions about this, but my question is specifically about the error I am getting with executing removeAttributeNode in my code below.
var div = document.createElement('div');
div.innerHTML = '<p class=\"p\">text here</p> <div id=\"divId\"></div>';
var elements = div.getElementsByTagName('*');
var whitelist = ["src", "width", "height"];
for (var i = 0; i < elements.length; i++) {
if (elements[i].attributes.length !== 0) {
var attr = elements[i].attributes;
for (var j = 0; j < attr.length; j--) {
var attrName = attr[j].name;
for (var k = 0; k < whitelist.length; k++) {
if (attrName !== whitelist[k])
elements[i].removeAttributeNode(attr);
}
}
}
}
But, I keep getting the following error: Failed to execute 'removeAttributeNode ' on Element: the 1st argument provided is either null, or an invalid Attr object.
But, I checked with console statements and elements[i] is not null. For example, elements[0] = <p class="p">. How do I get removeAttributeNode to work? Thank you for your help!

In the last loop, do something like this.
for (var k = 0; k < whitelist.length; k++) {
if (attrName !== whitelist[k])
{
elements[i].removeAttributeNode(elements[i].getAttributeNode(attrName ));
}
}

Related

Extracting Specific Values to TXT from PDF using a Javascript Sequence

I can not find a proper javascript solution for creating a sequence in Adobe Acrobat that will extract text into a .txt file; based on certain criteria.
I have over 500 pdfs with images & financial data on them. I need to extract specific values from these pages. Including values such as: Check number, check date, check amount.
I tried the example at:
https://www.evermap.com/javascript.asp#Title:%20Extract%20ISBN%20numbers
I even created a PDF with ISBN numbers and it doesn't work.
In my PDF I have the below data:
ProcDate: 2019/01/04
AccountNum: 69447885236
CheckAmt: 157.52
SerialNum: 8574
MflmSeqNum: 268245062738
ProcDate: 2019/01/14
AccountNum: 69447885236
CheckAmt: 2,415.36
SerialNum: 8570
MflmSeqNum: 268545187745
I need to extract the values into a text file (or excel table) in a delimited format. The expected output is below:
2019/01/14; 2,415.36; 8570
2019/01/04; 157.52; 8574
Ok so with a little tweaking and getting the loops to carry down I was able to get the desired output the only problem is that it was repeating data and they did not remain correlated:
Below is the loop info:
for (var i = 0; i < this.numPages; i++)
{
numWords = this.getPageNumWords(i);
var PageText = "";
for (var j = 0; j < numWords; j++) {
var word = this.getPageNthWord(i,j,false);
PageText += word;
}
var strMatches = PageText.match(reMatch);
if (strMatches == null) continue;
for (var o = 0; o < this.numPages; o++)
{
numWordsAmt = this.getPageNumWords(o);
var PageTextAmt = "";
for (var k = 0; k < numWordsAmt; k++) {
var wordAmt = this.getPageNthWord(o,k,false);
PageTextAmt += wordAmt;
}
var strMatchesAmt = PageTextAmt.match(reMatchAmt);
if (strMatches == null) continue;
for (var p = 0; p < this.numPages; p++)
{
numWordsNum = this.getPageNumWords(p);
var PageTextNum = "";
for (var l = 0; l < numWordsNum; l++) {
var wordNum = this.getPageNthWord(p,l,false);
PageTextNum += wordNum;
}
var strMatchesNum = PageTextNum.match(reMatchNum);
if (strMatchesAmt == null) continue;
// now output matches into report document
for (j = 0; j < strMatches.length; j++) {
for (k = 0; k < strMatchesAmt.length; k++) {
for (l = 0; l < strMatchesNum.length; l++) {
Out[strMatches[j].replace("ProcDate: ", "")+" , "+strMatchesAmt[k].replace("CheckAmt: ", "")+" , "+strMatchesNum[l].replace("SerialNum: ", "")] = true; // store email as a property name
}
}
}
}
}
}

get line number of xml node in js

I'm trying to validate (custom rules) a xml source. Therefore I parse the source with document.evaluate and a certain xpath and validate the result nodes.
If a node is not correct, I would like to give an error with the nodes line number in the source.
How can I go about accomplishing this?
I had similar problem and I wrote a function that finds the n-th tag on the original string based on the result of getElementsByTagName.
It is something like this:
function xmlNodeToOriginalLineNumber(element, xmlRootNode, sContent) {
var sTagName = element.tagName;
var aNodeListByTag = xmlRootNode.getElementsByTagName(sTagName);
var iMaxIndex = 0;
for (var j = 0; j < aNodeListByTag.length; j++) {
if (aNodeListByTag.item(j) === element) {
iMaxIndex = j;
break;
}
}
var regex = new RegExp("<" + sTagName + "\\W", 'g');
var offset = 0;
for (var i = 0; i <= iMaxIndex; i++) {
offset = regex.exec(sContent).index;
}
var line = 0;
for (var i = 0; i < sContent.substring(0, offset).length; i++) {
if (sContent[i] === '\n') {
line++;
}
}
return line + 1;
}
I updated your sample: https://jsfiddle.net/g113c350/3/

how to add a line break in javascript that works with paragraph

I wrote this function to format a certain string :
var desc = document.getElementById('desc');
var parContent = desc.textContent.trim();
var tempPar = "";
var j = 0;
var k = 0;
var built_val = "";
for (var i = 0; i < parContent.length ; i++)
{
if (j == 19 || i == parContent.length-1)
{
tempPar = parContent.substring(k, i);
tempPar = tempPar.concat("- \n");
built_val = built_val.concat(tempPar);
tempPar = "";
//Restart j
j = 0;
//update k
k = i;
continue;
}
j++;
}
desc.textContent = built_val;
Desc is a dynamic paragraph that is usually empty at first then filled (its data are composed after the page loads), j is the number of characters desired in one line.
Though now I have another problem and that is \n doesn't work ; I also tried br . How can I insert a new linebreak within a javascript string such as "built_val" ? please note how it's assigned to an Html after everything.
The textContent property sets the literal text of the element (by adding a text node), and will not parse your tags as html. Instead, you should do this:
desc.innerHTML = built_val;
Yes, you're using .textContent which is why <br>s won't be parsed (which is a good thing!)
You want to use document.createTextNode() and document.createElement('br').
var desc = document.getElementById('desc');
var parContent = desc.textContent.trim();
var tempPar = "";
var j = 0;
var k = 0;
var built_val = "";
for (var i = 0; i < parContent.length; i++) {
if (j == 19) {
tempPar = parContent.substring(k, i);
tempPar = tempPar.concat("- \n");
desc.appendChild(document.createTextNode(tempPar));
desc.appendChild(document.createElement('br'));
tempPar = "";
//Restart j
j = 0;
//update k
k = i;
continue;
}
j++;
}
// No need for textContent anymore. Appending nodes did the work for us!
Just for fun: an Array.forEach and String.slice method:
var desc = document.querySelector('#desc');
var parContent = desc.textContent.trim();
var block = 0;
parContent.split('').forEach(
function(v, i) {
if (i % 19 == 0) {
desc.appendChild(document.createTextNode(parContent.slice(block, i)));
desc.appendChild(document.createElement('br'));
block = i;
}
}
);
// last part
desc.appendChild(document.createTextNode(parContent.slice(block)));
<p id="desc">
This string should be truncated every 19th position right?
Ok, let's give it a try using [Array.forEach]<br>
</p>
There's an easy regex version to wrap text as well:
function Wrap(src, maxLineLength) {
return src.replace(new RegExp("(.{1,"+maxLineLength+"})", "g"), "$1<br/>\r\n");
}
Though this wraps on characters, not words.

Populating a select element with comma-separated values using for loop

I have a bunch of comma-separated values stored as strings in a JSON file. My aim is to split these values to populate a select element which is based on Selectize.js. Code (excerpt) looks as follows:
var options = {};
var attr_split = data.attributes['Attribute1'].split(",");
var options_key;
for (var i = 0; i < attr_split.length; i++) {
options_key = attr_split[i]
}
var options_values = {
value: options_key,
text: options_key,
}
if (options_key in options)
options_values = options[options_key];
options[options_key] = options_values;
$('#input').selectize({
options: options,
});
Although this seems to work, the output in the select element only shows the last iterations done by the for loop. As per here
and here, I've tried
for (var i = 0; i < attr_split.length; i++) {
var options_key += attr_split[i]
}
but this throws me undefined plus all concatenated strings without the separator as per the following example:
undefinedAttr1Attr2Attr3
When I simply test the loop using manual input of the array elements everything appears fine:
for (var i = 0; i < attr_split.length; i++) {
var options_key = attr_split[0] || attr_split[1] || attr_split[2]
}
But this is not the way to go, since the number of elements differs per string.
Any idea on what I'm doing wrong here? I have the feeling it's something quite straightforward :)
when you declare 'options_key' ,you are not initializing it.so its value is undefined .when you concatenate options_key += attr_split[i] .in first iteration options_key holds undefined.so only you are getting undefinedAttr1Attr2Attr3.
so declare and initialize options_key like.
var options_key="";
and in your loop
for (var i = 0; i < attr_split.length; i++)
{
options_key = attr_split[i]
}
Everytime you replace options_key with value of attr_split[i].so after the loop it will contain last element value.corrected code is
for (var i = 0; i < attr_split.length; i++)
{
options_key += attr_split[i]
}
Just change var options_key; to var options_key="";
The reason you are getting undefined is because you have not defined the variable properly.
Here is a working example
var attr_split = "1,2,3,4".split(",");
var options_key="";
for (var i = 0; i < attr_split.length; i++) {
options_key += attr_split[i]
}
alert(options_key);
var options_values = {
value: options_key,
text: options_key
}
alert(options_values);

JavaScript: typeof attr.name returns undefined

I am trying to get all possible data-* attributes from every tag on my page.
var el = document.body.querySelectorAll("*");
for(i=0;i<el.length;i++){
var datas = [].filter.call(el[i].attributes, function(at) { return /^data-/.test(at.name); });
}
now datas variable contains attribute objects and I want to search datas.name for specific value, in my case I'm searching for "data-val". But
typeof datas.name
returns undefined. Any clue what to do? Pure JavaScript please. Thanx!
update
I could go with the following solution:
var el = document.body.querySelectorAll("*");
for(i=0;i<el.length;i++){
var datas = [].filter.call(el[i].attributes, function(at) { return /^data-/.test(at.name); });
for(var k=0;k<datas.length;k++){
nodes.push(datas[k].name.replace(/^data-/,""));
}
for(var l=0;l<nodes.length;l++) {
if(nodes.search("val")>0){
...do my code here...
}
}
}
but is there a more elegant way to do that?
var el = document.body.querySelectorAll("*"),
i = 0,
j = 0,
len = el.length;
for (; i < len; i++) {
var x = el[i].attributes,
len2 = x.length;
for (; j < len2; j++) {
if (x[j].name === "data-val") console.log("data-val present");
}
}
http://jsfiddle.net/MsSht/1/

Categories