Javascript update table with timer(hidden in csv file) - javascript

everybody. I have an interesting question. Would be grateful for your help. Working on solution to one task:
Implement a financials ticker grid using the CSV data provided Initial
View
Load and parse the data in snapshot.csv into a model.
Render a grid based on that data to the DOM.
Write an engine to work through deltas.csv and emit update messages to
parse.
When only a number exists on a line, that amount of time in
milliseconds should be waited until processing the next set of deltas.
When the last set of deltas is processed, return to the start of the
file and repeat.
Each set of deltas should be merged into the existing dataset and then
propagated to the DOM in the most efficient manner possible.
Provide notification that an item has been updated via a visual flare
in the UI.
My path:
I've managed to load the snapshot.csv, parse it, create table, then(using promises), upload the second file delta.csv, parse it, trying to update table, but struggling to create timer function to update the table according to the amount of miliseconds in the delta csv file. Thanks in advance for your advice))I'm learning javascript, and looking for challenges, this looked like an interesting one.
function CSVToArray( strData, strDelimiter ){
strDelimiter = (strDelimiter || ",");
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
var arrData = [[]];
var arrMatches = null;
while (arrMatches = objPattern.exec( strData )){
var strMatchedDelimiter = arrMatches[ 1 ];
if (
strMatchedDelimiter.length &&
strMatchedDelimiter !== strDelimiter
){
arrData.push( [] );
}
var strMatchedValue;
if (arrMatches[ 2 ]){
strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
strMatchedValue = arrMatches[ 3 ];
}
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
function httpGet(url) {
return new Promise(function(resolve, reject) {
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.onload = function() {
if (this.status == 200) {
resolve(this.response);
} else {
var error = new Error(this.statusText);
error.code = this.status;
reject(error);
}
};
xhr.onerror = function() {
reject(new Error("Network Error"));
};
xhr.send();
});
}
var r1,r2
httpGet('snapshot.csv').then(function(result) {
r1 = CSVToArray(result);
createTable(r1);
return httpGet('deltas.csv')
}).then(function(result2) {
r2 = CSVToArray(result2);
updateTable(r2)
});
var hInterval = null;
var k = 0;
var iteration=0;
function updateTable(how) {
var myTable = document.getElementById('myTable');
var j=1;
var watch_dog=0;
var x = document.getElementById("myTable").rows[0].cells;
console.log(x);
while(j<myTable.rows.length) {
if (typeof(how[k]) !== "undefined") { //
var startPos = 2; //In file we have broken data (some time 6 columns some time 5)
if (how[k].length === 5)
startPos = 1;
var valueAdded=0; //How much value changed
for (var i = startPos; i < startPos + 3; i++) {
var value = how[k][i];
if (typeof(value) !== "undefined") {
valueAdded++;
if (value.length > 0) {
if (startPos === 2)
myTable.rows[j].cells[i].innerHTML = value;
else
myTable.rows[j].cells[i + 1].innerHTML = value;
}
}
}
if (valueAdded>0) //if some values changed we are increment j
j++;
k++; //Increment global row pointer
} else {
//Restart when we finished
iteration=1;
k=0;
break;
}
}
}
function createTable(now) {
var body = document.getElementsByTagName("body")[0];
// create elements <table> and a <tbody>
var tbl = document.createElement("table");
var tblBody = document.createElement("tbody");
tbl.setAttribute("id","myTable");
// cells creation
for (var j = 0; j < now.length-1; j++) {
// table row creation
var row = document.createElement("tr");
for (var i = 0; i < now[0].length; i++) {
// create element <td> and text node
//Make text node the contents of <td> element
// put <td> at end of the table row
var cell = document.createElement("td");
var cellText = document.createTextNode(now[j][i]);
cell.appendChild(cellText);
row.appendChild(cell);
}
//row added to end of table body
tblBody.appendChild(row);
}
// append the <tbody> inside the <table>
tbl.appendChild(tblBody);
// put <table> in the <body>
body.appendChild(tbl);
// tbl border attribute to
tbl.setAttribute("border", "2");
}
<!DOCTYPE html>
<html>
<head>
<title>Application</title>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="description" content="Demo project">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="http://netdna.bootstrapcdn.com/bootstrap/3.0.3/css/bootstrap.min.css">
<link rel="stylesheet" href="styles.css">
<style type="text/css"></style>
</head>
<body>
<p>Let the game begin!</p>
</body>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.8.1/jquery.min.js"></script>
<script type="text/javascript" src="script.js"></script>
</html>
Project files:
https://dw3i9sxi97owk.cloudfront.net/uploads/jobAttachments/150928161919_pph.zip

You could write a recursive function that processes one row at a time, then set's up the timeout to process the next row in X seconds. The code below is just kind of pseudo, it doesn't use your variables or anything... but hopefully you can get the idea from this. It's a recursive function that calls itself for the next row after waiting the amount of time.
function processRow(row, waitTime) {
setTimeout(function() {
//do your row stuff here
//move on to the next row, look at it's wait time and pass that along
var nextRow = row+1,
nextWaitTime = nextRow.waitTime;
processRow(nextRow, nextWaitTime);
}, waitTime);
}
//run the first instance
var firstRow = 1,
firstWait = firstRow.wait;
processRow(firstRow, firstWait);

Related

Parsing data from api in javascript

I am trying to parse data from a wordpress json api to my ionic app, Data from api is coming as:
{
"event_0_date_from":["20191015"],
"event_0_date_to":["20190926"],
"event_0_event":["Winter Vacation"],
"event_0_description":["Winter vacation"],
"event_1_date_from":["20190917"],
"event_1_date_to":["20190930"],
"event_1_event":["Dashain Vacation"],
"event_1_description":["--some-data--"],
"event_2_date_from":["--some-data--"],
"event_2_date_to":["--some-data--"],
"event_2_event":["--some-data--"],
"event_2_description":["--some-data--"],
---------------
-------------
--------------
-------------
"event":["3"] this shows total number of events
}
Using javascript, how would I format the above data and save it to some variable so that I can render it easily?
events:[
{
"date_from":"20191015",
"date_to":"20190926",
"event":"Winter Vacation",
"description":"Winter vacation"
},
{
"date_from":"20191015",
"date_to":"20190926",
"event":"Winter Vacation",
"description":"Winter vacation"
},
{
"date_from":"--some-data--",
"date_to":"--some-data--",
"event":"--some-data--",
"description":"--some-data--"
},
---------------
-------------
--------------
-------------
]
I tried so many methods but none are working.
I think your just should take "yourObjekt.event[0]" for a counter like:
var newObjekt = [];
for (var i=0; i<yourObjekt.event[0]; i++) {
newObjekt[i] = {
date_from: yourObjekt["event_"+i+"_date_from"][0],
date_to: yourObjekt["event_"+i+"_date_to"][0],
event: yourObjekt["event_"+i+"_event"][0],
description: yourObjekt["event_"+i+"_description"][0]
}
}
You just need to iterate over your json object. Within each iteration create a new map and push this newly created map into an array. Following is working snippet.
let data = {
"event_0_date_from":["20191015"],
"event_0_date_to":["20190926"],
"event_0_event":["Winter Vacation"],
"event_0_description":["Winter vacation"],
"event_1_date_from":["20190917"],
"event_1_date_to":["20190930"],
"event_1_event":["Dashain Vacation"],
"event_1_description":["--some-data--"],
"event_2_date_from":["--some-data--"],
"event_2_date_to":["--some-data--"],
"event_2_event":["--some-data--"],
"event_2_description":["--some-data--"],
"event":["3"]
}
let array = [];// Initialize an array
let index = data.event[0];// Number of events
for(let i=0;i<index;i++){
let map = {};//Initialize a new map in each iteration.
map.date_from = data["event_"+i+"_date_from"][0];
map.date_to = data["event_"+i+"_date_to"][0];
map.event = data["event_"+i+"_event"][0];
map.description = data["event_"+i+"_description"][0]
array.push(map);// finally push map into array
}
console.log(array);
Try this code, it will include all event attributes in a dynamic way
var output = [];
for(var key in datas){
// parse key
var keyParts = key.split('_');
var value = datas[key];
// ignore "event" total
if(keyParts.length > 1){
var key = keyParts.slice(2).join('_'); // generate correct key from parts
var index = keyParts[1]; // indexes : 0, 1, 2, etc.
// initialize in first call
if(output.hasOwnProperty(index) === false){
output[index] = {}
}
// append to output
output[index][key] = value
}
}
Withing 20 minutes with Googling (+ few minutes for proper adjustment of counters) ... (wrote JS few times in whole life)
I was not sure how to load it into String and did not wanted to escape whole string, so I am loading it from a text file
Input data:
{
"event_0_date_from":["20191015"],
"event_0_date_to":["20190926"],
"event_0_event":["Winter Vacation"],
"event_0_description":["Winter vacation"],
"event_1_date_from":["20190917"],
"event_1_date_to":["20190930"],
"event_1_event":["Dashain Vacation"],
"event_1_description":["--some-data--"],
"event_2_date_from":["--some-data--"],
"event_2_date_to":["--some-data--"],
"event_2_event":["--some-data--"],
"event_2_description":["--some-data--"]
}
Page and script:
<!DOCTYPE HTML>
<html>
<body>
<input type="file" id="upload">
<script>
document.getElementById('upload').addEventListener('change', readFileAsString)
function readFileAsString() {
var files = this.files;
if (files.length === 0) {
console.log('No file is selected');
return;
}
var reader = new FileReader();
reader.onload = function(event) {
//console.log('File content:', event.target.result);
var inputStr = event.target.result;
//console.log(inputStr);
var obj = JSON.parse(inputStr);
//console.log(obj);
var hasNext=true;
var counter = 0;
while(hasNext){
var properties =["date_from","date_to","event","description"];
var propertyPrefix = "event_"
var prop = propertyPrefix + counter + "_" + properties[0];
if(obj.hasOwnProperty(prop)){
console.log("element #" + counter + ": ")
for(var i = 0; i< properties.length;i++){
var propToPrint = propertyPrefix + counter + "_" + properties[i];
//console.log("loading: " + propToPrint)
console.log(" " + obj[propToPrint]);
}
counter++;
}else{
hasNext = false;
}
}
};
reader.readAsText(files[0]);
}
</script>
</body>
</html>
Result:
element #0:
20191015
20190926
Winter Vacation
Winter vacation
element #1:
20190917
20190930
Dashain Vacation
--some-data--
element #2:
--some-data--
--some-data--
--some-data--
--some-data--
So, eg. this way its possible :)

Remove a specific item from localstorage with js

I am adding simple records to localstorage but I am having a hard time removing a specific item from my localstorage object. I am able to maintain the data on refresh and continue adding records no problem. I would like to add a button next to each entry that allows me to remove THAT particular record from localstorage and from my list.
How would I accomplish this given the code below?
var theLIst = document.getElementById('list');
var resetNotify = document.getElementById('reset-message');
var recordCounter = document.getElementById('record-counter');
var st = window.localStorage;
var count = st.clickcount;
var nameArray = [];
var newArr;
// Set the counter on refresh
if (JSON.parse(st.getItem('names'))) {
recordCounter.innerHTML = (count = JSON.parse(st.getItem('names')).length);
theLIst.innerHTML = st.getItem('names').replace(/[\[\\\],"]+/g, '');
} else {
recordCounter.innerHTML = (count = 0);
}
function addNameRecord() {
resetNotify.innerHTML = '';
var name = document.getElementById('names-field');
nameArray = JSON.parse(st.getItem('names'));
count = Number(count) + 1;
newArr = makeArr(nameArray);
// Check if there is anything in the name array.
if (nameArray != null) {
nameArray.push('<p class="name-entry"><strong>' + count + '. </strong> ' + name.value + '</p><button onclick="clearThisItem(\''+ name.value + '\')">Remove</button>');
} else {
nameArray = [];
nameArray.push('<p class="name-entry"><strong>' + count + '. </strong> ' + name.value + '</p><button onclick="clearThisItem(\''+ name.value + '\')">Remove</button>');
}
st.setItem("names", JSON.stringify(nameArray));
name.value = '';
if (!newArr[0]) {
count = 1;
theLIst.innerHTML = nameArray;
recordCounter.innerHTML = count;
} else {
theLIst.innerHTML = newArr[0].join('');
recordCounter.innerHTML = count;
}
}
// Take our string from local storage and turn it into an array we can use
function makeArr() {
return Array.from(arguments);
}
// Purge all entries, reset counter
function clearArray() {
st.clear();
nameArray = [];
theLIst.innerHTML = '';
recordCounter.innerHTML = (count = 0);
resetNotify.innerHTML = 'Array has been purged.';
}
Heres the code I tried
// Delete a specific entry
function clearThisItem(item) {
console.log(item);
localStorage.removeItem(item);
console.log(localStorage.removeItem(item))
return item;
}
Here is refactored code.
Firstly there is no need to store count, as we always have access to names.length
Store only names on localStorage, not entire HTML
For add and remove a name, fetch names array from localStorage, update it and save it back to localStorage.
After every action just update the UI using a single function call.
Note: Renamed names-field to name-field in the below implementation.
Here is the working code: https://jsbin.com/simitumadu/1/edit?html,js,output
var $list = document.getElementById('list');
var $resetMessage = document.getElementById('reset-message');
var $resetCouter = document.getElementById('record-counter');
var names = getNames();
if(names == null){
setNames([]); // initializing the empty array for first time.
}
renderData(); // display data
function addNameRecord() {
$resetMessage.innerHTML = '';
var name = document.getElementById('name-field');
addName(name.value);
renderData();
name.value = ''; //clear input field
}
function renderData(){
var names = getNames();
$resetCouter.innerText = names.length; // Count
var namesListHTML = '';
names.forEach(function(name, index){
namesListHTML = namesListHTML + '<p class="name-entry"><strong>' + (index + 1) + '. </strong> ' + name + '</p><button onclick="clearThisItem(\'' + name + '\')">Remove</button>'
});
$list.innerHTML = namesListHTML;
}
function clearArray() {
setNames([]); // clear names
$resetMessage.innerHTML = 'Array has been purged.';
renderData();
}
function clearThisItem(name){
removeName(name); // remove from localStorage
renderData();
}
function getNames(){
namesStr = localStorage.getItem('names');
if(namesStr) {
return JSON.parse(namesStr);
}
return null;
}
function setNames(names){
return localStorage.setItem('names', JSON.stringify(names));
}
function addName(name){
var names = getNames();
names.push(name);
setNames(names);
}
function removeName(name){
var names = getNames();
var index = names.indexOf(name);
if (index > -1) {
names.splice(index, 1);
}
setNames(names);
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<title>JS Bin</title>
</head>
<body>
<p>Count : <span id="record-counter"></div></p>
<input id="name-field">
<button onclick="addNameRecord()">Add</button>
<button onclick="clearArray()">Clear</button>
<div id="list"></div>
<div id="reset-message"></div>
</body>
</html>
Use localStorage.removeItem(insertYourKeyHere); to remove an object from local storage.
For removing it from your nameArray you can search through your list for the record, set null and then sort your list by ensuring to move objects into new positions such that null is at the end, then decrement your count for the number of records

Google Apps Script "cannot find function" offset in object

Can any help with the following issue??? Pretty new to app script / javascript and would appreciate any help or guidance to figure out..
TypeError: Cannot find function offset in object Timestamp, (then continues listing the column headings in the red warning banner.
function uiSendLogEmail() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var data = sheet.getDataRange().getValues();
data = data.offset(1,0,data.getNumRows())-1;
// For Loop
for ( var i = 0; i < data.length; i++ ) {
var row = data[i];
var approved = row[5];
var sentEmail = row[6];
var snapshot = row[3];
//if stmt in For Loop
if ( approved != "Yes" ) {
data[i][5] = "Yes";
data.getDataRange().setValues(data);
}// if stmt end curly
else if ( approved == "Yes" && sentEmail != "Yes" ) {
data[i][6] = "Yes";
data.getDataRange().setValues(data);
GmailApp.sendEmail("email#email.com", "subject", "body" + "whatever " + snapshot);
}//else if end curly
else {
return;
}//else stmt end curly
}// for loop end curly
}
I made a few basic tweaks that will hopefully point you in the right direction (and thanks to #AdamL for the .shift() method - much better than what I had in there before :) ):
function uiSendLogEmail() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var data = sheet.getDataRange().getValues();
// Move the values down
data.shift();
// For Loop
for ( var i = 0; i < data.length; i++ ) {
var row = data[i];
var approved = row[5];
var sentEmail = row[6];
var snapshot = row[3];
// Here we set a range equal to the data range offset by 1 + our current
// position in the loop (cycling through rows), and then get the A1 notation
// of the first row, which we use to get that particular range and prep
// it for adding values
var rng = sheet.getDataRange().offset(i+1,0,1).getA1Notation();
myRange = sheet.getRange(rng);
//if stmt in For Loop
if ( approved != "Yes" ) {
// Here we can just work with the row element itself
row[5] = "Yes";
// Because setValues expects a two dimensional array,
// we wrap our row in brackets to effectively convert it to one
myRange.setValues([row]);
} // if stmt end curly
else if ( approved == "Yes" && sentEmail != "Yes" ) {
// Same here as above
row[6] = "Yes";
myRange.setValues([row]);
GmailApp.sendEmail("email#email.com", "subject", "body" + "whatever " + snapshot);
}
else {
return;
}
}
I think you are wanting to remove the first row of data (the headers); so if that's the case, try replacing:
data = data.offset(1,0,data.getNumRows())-1;
with
data.shift();

jQuery plugin DataTables: How to highlight the current search text?

I have started using the DataTables plugin (v1.6.2) for jQuery(v1.4.2), and I would like to ask you if you know a settings or a plugin that allow me to highlight the text used in search textbox on the filtered rows.
Thank you in advance
I would have to suggest the highlight plugin :)
I'm using this in about the same scenario right now, it's given me no issues thus far.
The usage is pretty simple:
$("#myTable").highlight($("#searchBox").val());
Just put the highlight CSS class in your stylesheet styles like you want and that's it:
.highlight { background-color: yellow }
I know that this question is now over 6 years old and the answers here may helped you at the time of asking. But for people still searching for this, there is a new plugin to integrate mark.js – a JavaScript keyword highlighter – into DataTables: datatables.mark.js.
Usage is as simple as:
$("table").DataTables({
mark: true
});
Here is an example: https://jsfiddle.net/julmot/buh9h2r8/
This is the cleanest way and also gives you options none of the given solutions offers you.
There's now an official DataTables blog article available.
You can use this function by coping this content :
jQuery.fn.dataTableExt.oApi.fnSearchHighlighting = function(oSettings) {
oSettings.oPreviousSearch.oSearchCaches = {};
oSettings.oApi._fnCallbackReg( oSettings, 'aoRowCallback', function( nRow, aData, iDisplayIndex, iDisplayIndexFull) {
// Initialize search string array
var searchStrings = [];
var oApi = this.oApi;
var cache = oSettings.oPreviousSearch.oSearchCaches;
// Global search string
// If there is a global search string, add it to the search string array
if (oSettings.oPreviousSearch.sSearch) {
searchStrings.push(oSettings.oPreviousSearch.sSearch);
}
// Individual column search option object
// If there are individual column search strings, add them to the search string array
if ((oSettings.aoPreSearchCols) && (oSettings.aoPreSearchCols.length > 0)) {
for (var i in oSettings.aoPreSearchCols) {
if (oSettings.aoPreSearchCols[i].sSearch) {
searchStrings.push(oSettings.aoPreSearchCols[i].sSearch);
}
}
}
// Create the regex built from one or more search string and cache as necessary
if (searchStrings.length > 0) {
var sSregex = searchStrings.join("|");
if (!cache[sSregex]) {
var regRules = "("
, regRulesSplit = sSregex.split(' ');
regRules += "("+ sSregex +")";
for(var i=0; i<regRulesSplit.length; i++) {
regRules += "|("+ regRulesSplit[i] +")";
}
regRules += ")";
// This regex will avoid in HTML matches
cache[sSregex] = new RegExp(regRules+"(?!([^<]+)?>)", 'ig');
}
var regex = cache[sSregex];
}
// Loop through the rows/fields for matches
jQuery('td', nRow).each( function(i) {
// Take into account that ColVis may be in use
var j = oApi._fnVisibleToColumnIndex( oSettings,i);
// Only try to highlight if the cell is not empty or null
if (aData[j]) {
// If there is a search string try to match
if ((typeof sSregex !== 'undefined') && (sSregex)) {
this.innerHTML = aData[j].replace( regex, function(matched) {
return "<span class='filterMatches'>"+matched+"</span>";
});
}
// Otherwise reset to a clean string
else {
this.innerHTML = aData[j];
}
}
});
return nRow;
}, 'row-highlight');
return this;
};
inside :
dataTables.search-highlight.js
an call it like this example:
<script type="text/javascript" src="jquery.dataTables.js"></script>
<script type="text/javascript" src="dataTables.search-highlight.js"></script>
<script type="text/javascript">
$(document).ready(function() {
var oTable = $('#example').dataTable();
oTable.fnSearchHighlighting();
} );
</script>
and add this code to you css file:
.filterMatches{
background-color: #BFFF00;
}
<link href="https://cdn.datatables.net/plug-ins/1.10.13/features/mark.js/datatables.mark.min.css" rel="stylesheet" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/jquery.mark.js"></script>
<script src="https://cdn.datatables.net/plug-ins/1.10.13/features/mark.js/datatables.mark.js"></script>
$("#tableId").dataTable({
mark: true
});
You can use the following add on
jQuery.fn.dataTableExt.oApi.fnSearchHighlighting = function(oSettings) {
// Initialize regex cache
oSettings.oPreviousSearch.oSearchCaches = {};
oSettings.oApi._fnCallbackReg( oSettings, 'aoRowCallback', function( nRow, aData, iDisplayIndex, iDisplayIndexFull) {
// Initialize search string array
var searchStrings = [];
var oApi = this.oApi;
var cache = oSettings.oPreviousSearch.oSearchCaches;
// Global search string
// If there is a global search string, add it to the search string array
if (oSettings.oPreviousSearch.sSearch) {
searchStrings.push(oSettings.oPreviousSearch.sSearch);
}
// Individual column search option object
// If there are individual column search strings, add them to the search string array
// searchTxt=($('#filter_input input[type="text"]')?$('#filter_input input[type="text"]').val():"");
var searchTxt = $('input[type="search"]').val();
// console.log("txt" + searchTxt);
if ((oSettings.aoPreSearchCols) && (oSettings.aoPreSearchCols.length > 0)) {
for (var i in oSettings.aoPreSearchCols) {
if (oSettings.aoPreSearchCols[i].sSearch) {
searchStrings.push(searchTxt);
}
}
}
// Create the regex built from one or more search string and cache as necessary
/*if (searchStrings.length > 0) {
var sSregex = searchStrings.join("|");
if (!cache[sSregex]) {
// This regex will avoid in HTML matches
cache[sSregex] = new RegExp("("+escapeRegExpSpecialChars(sSregex)+")(?!([^<]+)?>)", 'i');
}
var regex = cache[sSregex];
}*/
if (searchStrings.length > 0) {
var sSregex = searchStrings.join("|");
if (!cache[sSregex]) {
var regRules = "("
, regRulesSplit = sSregex.split(' ');
regRules += "("+ sSregex +")";
for(var i=0; i<regRulesSplit.length; i++) {
regRules += "|("+ regRulesSplit[i] +")";
}
regRules += ")";
// This regex will avoid in HTML matches
cache[sSregex] = new RegExp(regRules+"(?!([^<]+)?>)", 'ig');
}
var regex = cache[sSregex];
}
// Loop through the rows/fields for matches
jQuery('td', nRow).each( function(i) {
// Take into account that ColVis may be in use
var j = oApi._fnVisibleToColumnIndex( oSettings,i);
// Only try to highlight if the cell is not empty or null
// console.log("data "+ aData[j] + " j " + j);
// console.log("data 1 "+ nRow);
if (aData) {
// If there is a search string try to match
if ((typeof sSregex !== 'undefined') && (sSregex)) {
//console.log("here :: "+$(this).text());
this.innerHTML = $(this).text().replace( regex, function(matched) {
return "<span class='filterMatches'>"+matched+"</span>";
});
}
// Otherwise reset to a clean string
else {
this.innerHTML = $(this).text();//aData[j];
}
}
});
return nRow;
}, 'row-highlight');
return this;
};
This solution is working for me.
Note: Currently it does not support individual column filtering, but you just have to uncomment following in the code.
searchTxt=($('#filter_input input[type="text"]')?$('#filter_input input[type="text"]').val():"");
I have tested this with datatables 1.10.2 and jquery 1.9.2 version.
This add on have better feature for highlighting search text. if you have created datatable in a dialog , then on dialog reopen you need to reinitialize datatable.
In DatatableHighlighter.js
jQuery.fn.dataTableExt.oApi.fnSearchHighlighting = function(oSettings) {
// Initialize regex cache
oSettings.oPreviousSearch.oSearchCaches = {};
oSettings.oApi._fnCallbackReg( oSettings, 'aoRowCallback', function( nRow, aData, iDisplayIndex, iDisplayIndexFull) {
// Initialize search string array
var searchStrings = [];
var oApi = this.oApi;
var cache = oSettings.oPreviousSearch.oSearchCaches;
// Global search string
// If there is a global search string, add it to the search string array
if (oSettings.oPreviousSearch.sSearch) {
searchStrings.push(oSettings.oPreviousSearch.sSearch);
}
// Individual column search option object
// If there are individual column search strings, add them to the search string array
// searchTxt=($('#filter_input input[type="text"]')?$('#filter_input input[type="text"]').val():"");
var searchTxt = $('input[type="search"]').val();
// console.log("txt" + searchTxt);
if ((oSettings.aoPreSearchCols) && (oSettings.aoPreSearchCols.length > 0)) {
for (var i in oSettings.aoPreSearchCols) {
if (oSettings.aoPreSearchCols[i].sSearch) {
searchStrings.push(searchTxt);
}
}
}
// Create the regex built from one or more search string and cache as necessary
if (searchStrings.length > 0) {
var sSregex = searchStrings.join("|");
if (!cache[sSregex]) {
var regRules = "("
, regRulesSplit = sSregex.split(' ');
regRules += "("+ sSregex +")";
for(var i=0; i<regRulesSplit.length; i++) {
regRules += "|("+ regRulesSplit[i] +")";
}
regRules += ")";
// This regex will avoid in HTML matches
cache[sSregex] = new RegExp(regRules+"(?!([^<]+)?>)", 'ig');
//cache[sSregex] = new RegExp(regRules+"", 'ig');
}
var regex = cache[sSregex];
}
// Loop through the rows/fields for matches
jQuery('td', nRow).each( function(i) {
// Take into account that ColVis may be in use
var j = oApi._fnVisibleToColumnIndex( oSettings,i);
if (aData) {
// If there is a search string try to match
if ((typeof sSregex !== 'undefined') && (sSregex)) {
//For removing previous added <span class='filterMatches'>
var element = $(this);//convert string to JQuery element
element.find("span").each(function(index) {
var text = $(this).text();//get span content
$(this).replaceWith(text);//replace all span with just content
}).remove();
var newString = element.html();//get back new string
this.innerHTML = newString.replace( regex, function(matched) {
return "<span class='filterMatches'>"+matched+"</span>";
});
}
// Otherwise reset to a clean string
else {
//For removing previous added <span class='filterMatches'>
var element = $(this);//convert string to JQuery element
element.find("span").each(function(index) {
var text = $(this).text();//get span content
$(this).replaceWith(text);//replace all span with just content
}).remove();
var newString = element.html();
this.innerHTML = newString;//$(this).html()//$(this).text();
}
}
});
return nRow;
}, 'row-highlight');
return this;
};
and call it like this ....
$("#button").click(function() {
dTable = $('#infoTable').dataTable({"bPaginate": false,"bInfo" : false,"bFilter": true,"bSort":false, "autoWidth": false,"destroy": true,
"columnDefs": [
{ "width": "35%", "targets": 0 },
{ "width": "65%", "targets": 1 }
]});
$(".dataTables_filter input[type='search']").val('');
$("span[class='filterMatches']").contents().unwrap();
dTable.fnSearchHighlighting();
$("span[class='filterMatches']").contents().unwrap();
$("#AboutDialog").dialog('open');
});

How to extract text from a PDF in JavaScript

I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.

Categories