Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
I'm slightly confused about how to use promises. I've read a few things on promises mainly because it seems like I have to use them. I'm working on a small application which is supposed to search through some pdfs using pdfjs, and they use promises. I knocked up something in nodejs, by looking at various examples on the net but I run into a problem.
Let's look at the code first:
require('pdfjs-dist');
var fs = require('fs');
//var searchTerm = "course";
var searchTerm = "designee";
//var searchTerm = "document";
var wordCounter = 0;
var searchResultJSON = [];
//var data = new Uint8Array(fs.readFileSync('testPdf.pdf'));
//var data = new Uint8Array(fs.readFileSync('advanced-javascript.pdf'));
var data = new Uint8Array(fs.readFileSync('iss4.pdf'));
PDFJS.getDocument(data).then(function (pdfDocument) {
console.log('Number of pages: ' + pdfDocument.numPages );
//var div = document.getElementById('viewer');
for(var i = 1; i<=pdfDocument.numPages; i++ ){//loops thru pages
console.log("i is " + (i));
pdfDocument.getPage((i)).then(function(page){//get page(i),
// console.log("page is " + (i));
//console.log("inside getPage()");
page.getTextContent().then( function(textContent){//get content of pdf
//console.log("inside getTextContent()");
//if( null != textContent.items ){
var page_text = "";
var last_block = null;
var lineWithResult = "";
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
//console.log("word " + textContent.items.length + " k is " + k );
/* if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
} */
page_text += block.str;
last_block = block;
lineWithResult = searchPdf(block.str);
if(lineWithResult != null){
console.log(lineWithResult + " wordCounter is " + wordCounter);
}
}//end of for(var k...)
//console.log(" page_text " + page_text);
//console.log(searchResultJSON);
//}
});//end of textContent.items
});//end of getPage
}//end of loop
});
function searchPdf(toSearch){//searching pdf for searchTerm
var result = toSearch.toLowerCase().indexOf(searchTerm);
if(result >=0){//if match is found
wordCounter++;
//console.log("toSearch " + toSearch + " result is " + result + " wordCounter " + wordCounter);
constructResult(toSearch, result);//build the result object
return toSearch;
}
else{//if match not found
return null;
}
}
function constructResult(toSearch, result){//construct array of objects containing: search term, search result and index of search term
searchResultJSON.push({
"TextLine":toSearch,
"SearchTerm":searchTerm,
"Result": result,
});
}
The purpose of this code is to:
loop through the pdf's pages
loop through the content
get the pdf text in a variable line by line
search the pdf content with a keyword
if the keyword finds a match, print the match
get the matches in a javascript object
So, it all works OK but you'll notice that from inside the second for loop (where I get the text of the pdf that is) I call a function, searchPdf() which basically performs the search and from within that function I call another function constructResult(...) which is supposed to create the javascript object with the results.
I have some problems printing this object though, because if I print it outside the scope of the for loop, it is empty because the print call (in my case the console.log) executes before the loop has actually copied and analysed (read process and found a match) the text. So, promises see the way to resolve the problem. Thing is, I'm not sure how to code this in such a way that I can chain the promises and print my object after everything has executed. Any idea?
EDIT:
so to clarify, what I need in sequence is this:
1)loop through pdf (I will have to amend the code to loop through a collection of pdfs at some point soon)
2)get each line of text
3)check that there is a match
4)if so, copy the line of text in the javascript object
5)print the javascript object
Try something like this:
function search(doc, s) {
var allPages = [],
i;
for (var i = 1; i <= doc.numPages; i++) {
allPages.push(doc.getPage(i));
}
// Promise.all returns a promise that resolves once
// each promise inside allPages has been resolved
return Promise.all(allPages)
// pages now contains an array of pages, loop over them
// using map, return the promise to get the content for each page
// return it through Promise.all so we can run the next phase
// once the text is resolved for each page
.then(pages => Promise.all(pages.map(p => p.getTextContent())))
// We now have an array of contents for each page, filter based
// on the passed string
.then(content => content.filter(c => c.indexOf(s) > -1));
}
// This is a mock of the pdf API used in your question
var pdfDocument = {
numPages: 3,
getPage: function(i) {
return Promise.resolve({
getTextContent: function() {
return Promise.resolve('Page ' + i);
}
});
}
}
Related
Python's documentation website has a search feature. The results are a list of links to various places in the python documentation. All of these result links have the form
https://docs.python.org/3/library/__future__.html?highlight=__future__#module-__future__
The highlight query parameter causes "future" to get highlighted at the destination. I don't want this behaviour. So, I'm writing a userscript that changes the search results page, removing the ?highlight=__future__ etc from every link.
This would have been easy, except that the results are generated dynamically. So, I couldn't just find all <a> tags underneath <li> tags and edit the hrefs. If I tried that, no tags would be found, since they don't exist until a few seconds after the page loads. If I delayed the replacement script for a few seconds after the page loads, the fix wouldn't work until this delay expired. And in the case of a long search, the results can take many seconds to all come in. The only good way to apply the fix is to edit the javascript making the result tags. This is what I tried. Here's the script so far:
// ==UserScript==
// #name python-search-no-highlight
// #version 0.3
// #description Disables highlighting in docs.python.org's search results.
// #match http://docs.python.org/*
// #match https://docs.python.org/*
// #match http://*.docs.python.org/*
// #match https://*.docs.python.org/*
// #namespace https://greasyfork.org/users/217495-eric-toombs
// ==/UserScript==
script_text = `
Search.query = function(query) {
var i;
// stem the searchterms and add them to the correct list
var stemmer = new Stemmer();
var searchterms = [];
var excluded = [];
var hlterms = [];
var tmp = splitQuery(query);
var objectterms = [];
for (i = 0; i < tmp.length; i++) {
if (tmp[i] !== "") {
objectterms.push(tmp[i].toLowerCase());
}
if ($u.indexOf(stopwords, tmp[i].toLowerCase()) != -1 || tmp[i].match(/^\d+$/) ||
tmp[i] === "") {
// skip this "word"
continue;
}
// stem the word
var word = stemmer.stemWord(tmp[i].toLowerCase());
// prevent stemmer from cutting word smaller than two chars
if(word.length < 3 && tmp[i].length >= 3) {
word = tmp[i];
}
var toAppend;
// select the correct list
if (word[0] == '-') {
toAppend = excluded;
word = word.substr(1);
}
else {
toAppend = searchterms;
hlterms.push(tmp[i].toLowerCase());
}
// only add if not already in the list
if (!$u.contains(toAppend, word))
toAppend.push(word);
}
// THIS IS THE LINE I MODIFIED!
var highlightstring = '';
// console.debug('SEARCH: searching for:');
// console.info('required: ', searchterms);
// console.info('excluded: ', excluded);
// prepare search
var terms = Search._index.terms;
var titleterms = Search._index.titleterms;
// array of [filename, title, anchor, descr, score]
var results = [];
$('#search-progress').empty();
// lookup as object
for (i = 0; i < objectterms.length; i++) {
var others = [].concat(objectterms.slice(0, i),
objectterms.slice(i+1, objectterms.length));
results = results.concat(Search.performObjectSearch(objectterms[i], others));
}
// lookup as search terms in fulltext
results = results.concat(Search.performTermsSearch(searchterms, excluded, terms, titleterms));
// let the scorer override scores with a custom scoring function
if (Scorer.score) {
for (i = 0; i < results.length; i++)
results[i][4] = Scorer.score(results[i]);
}
// now sort the results by score (in opposite order of appearance, since the
// display function below uses pop() to retrieve items) and then
// alphabetically
results.sort(function(a, b) {
var left = a[4];
var right = b[4];
if (left > right) {
return 1;
} else if (left < right) {
return -1;
} else {
// same score: sort alphabetically
left = a[1].toLowerCase();
right = b[1].toLowerCase();
return (left > right) ? -1 : ((left < right) ? 1 : 0);
}
});
// for debugging
//Search.lastresults = results.slice(); // a copy
//console.info('search results:', Search.lastresults);
// print the results
var resultCount = results.length;
function displayNextItem() {
// results left, load the summary and display it
if (results.length) {
var item = results.pop();
var listItem = $('<li style="display:none"></li>');
var requestUrl = "";
if (DOCUMENTATION_OPTIONS.BUILDER === 'dirhtml') {
// dirhtml builder
var dirname = item[0] + '/';
if (dirname.match(/\/index\/$/)) {
dirname = dirname.substring(0, dirname.length-6);
} else if (dirname == 'index/') {
dirname = '';
}
requestUrl = DOCUMENTATION_OPTIONS.URL_ROOT + dirname;
} else {
// normal html builders
requestUrl = DOCUMENTATION_OPTIONS.URL_ROOT + item[0] + DOCUMENTATION_OPTIONS.FILE_SUFFIX;
}
listItem.append($('<a/>').attr('href',
requestUrl +
highlightstring + item[2]).html(item[1]));
if (item[3]) {
listItem.append($('<span> (' + item[3] + ')</span>'));
Search.output.append(listItem);
listItem.slideDown(5, function() {
displayNextItem();
});
} else if (DOCUMENTATION_OPTIONS.HAS_SOURCE) {
$.ajax({url: requestUrl,
dataType: "text",
complete: function(jqxhr, textstatus) {
var data = jqxhr.responseText;
if (data !== '' && data !== undefined) {
listItem.append(Search.makeSearchSummary(data, searchterms, hlterms));
}
Search.output.append(listItem);
listItem.slideDown(5, function() {
displayNextItem();
});
}});
} else {
// no source available, just display title
Search.output.append(listItem);
listItem.slideDown(5, function() {
displayNextItem();
});
}
}
// search finished, update title and status message
else {
Search.stopPulse();
Search.title.text(_('Search Results'));
if (!resultCount)
Search.status.text(_('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.'));
else
Search.status.text(_('Search finished, found %s page(s) matching the search query.').replace('%s', resultCount));
Search.status.fadeIn(500);
}
}
displayNextItem();
};
`;
script = document.createElement('script');
script.type = 'text/javascript';
script.text = script_text;
document.getElementsByTagName('head')[0].appendChild(script);
This script fails with the error
Uncaught SyntaxError: missing ) after argument list search.html:106:60
When I go to click on search.html:106:60, it leads me to the static page, so of course my changes aren't there.
When I copy the javascript I'm trying to inject and paste it into the console directly, it works fine. So, I know there isn't even a syntax error. When I replace the function I want to insert with a much simpler test one, like
Search.query = function() {
// This is a comment.
console.log('</test>');
};
it works fine. What the hell's going on?
I eventually figured it out. I thought that javascript backtick literals did not treat backslashes specially, but it turns out, they do. I just had to escape all backslashes in the backtick literal, and it worked! So, let that be a lesson: whenever storing javascript in a backtick literal, always escape the backslashes. Oh, and the dollar signs and the backticks.
Here is my inputText control with typeAhead enabled:
<xp:inputText id="inputNameEditBox">
<xp:typeAhead
mode="full"
minChars="3"
ignoreCase="true"
valueList="#{javascript:return mytypeAheadList();}"
var="searchValue"
valueMarkup="true"
id="typeAhead1">
</xp:typeAhead>
</xp:inputText>
SSJS mytypeAheadList() function calls custom Java userTools.userLookup() function to get a set of suggestions. (Our server cannot access corporate directory so we have to use LDAP HTTP Java API).
SSJS library:
function mytypeAheadList(){
var v=new userTools.userLookup(); //Java library
var usrList = v.getUserList(searchValue);
var lenList = usrList.length;
var retList = "<ul>";
if(lenList>0){
for (var i=0; i<lenList; i++) {
var matchDetails:string = ["<li>",#Name("[ABBREVIATE]", #Left(usrList[i], "#")),"</li>"].join("");
retList += matchDetails;
}
} else {
retList += ["<li>","None found","</li>"].join("");
}
retList += "</ul>";
return retList;
}
So that means userTools Java object is created each time user type a character. Is there a way to avoid it, e.g. make var v a global variable on page load? Seems scope variables cannot accept Java objects.
I would do the following:
Implement the Serializable interface to your POJO returned by getUserLookup. This allows to store the object in viewScope
Limit the max size of lenlist. E.g. 20 results would reduce the time of looping, the size of the HTTP response and the performance in the browser
Cache the result of the search (add searchValue and the resulting HTML string to a map). If a user hits backspace, the whole result must not be recomputed.
Drop SSJS. Use Java.
optional: If possible, precompute the results.
EDIT
Something like this:
function mytypeAheadList(){
// check if value is already cached
if( viewScope.containsKey("search~" + searchValue) ){
return viewScope.get("search~" + searchValue);
}
// get the userLookup object
var v = null;
if( viewScope.containsKey("userLookup") ){
v = viewScope.get("userLookup");
}else{
v = new userTools.userLookup();
viewScope.put("userLookup", v);
}
// if usrList is "unlimited", limit the max size
var usrList = v.getUserList(searchValue);
var lenList = usrList.length > 20 ? 20 : usrList.length;
// if getUserList has a restriction parameter
var usrList = v.getUserList(searchValue, 20);
var lenList = usrList.length;
// build the list
var retList = null;
// reuse a variable is up to 2 times faster
var matchDetails = null;
if(lenList>0){
retList = "<ul>";
for (var i=0; i<lenList; i++) {
// concatenating a string is up to 2 times faster then join
matchDetails = "<li>" + #Name("[ABBREVIATE]", #Left(usrList[i], "#")) + "</li>";
retList += matchDetails;
}
retList += "</ul>";
} else {
// why join a static string?
retList = "<ul><li>None found</li></ul>";
}
// put the result to the cache
viewScope.get("search~" + searchValue, retList);
return retList;
}
Yes you can do that.
Either you can put var v outside of your function to keep it loaded (and just lazy load it first time by checking if it is null).
Or you can put it all in a Java bean and let the scope of the bean determine how long you want to keep the data: ViewScope for just this page - ApplicationScope to allow all users to share it (and you can build in a check to force a maximum age of xx minutes - this could be relevant to consider if the data you look up could change).
I have two arrays.
var fruits = [];
var tasks = [];
When I enter a value in the text field it fires a function that pushes the value to an array. It then fires a separate function that stringifies the array and saves it in local storage. ("when" is my alias for document.addeventlistener).
when(toDo, "keypress", function(event){
if (event.key == "Enter" || event.keyCode == 13) {
pushArray();
stringifyArray(fruits);
toDo.value = "";
}
});
// function that adds new task to the array
function pushArray(){
var newtask = new Task(toDo.value, "No note yet");
fruits.push(newtask);
}
// function that stringifies given array and stores it in local storage
function stringifyArray(array){
var makeString = JSON.stringify(array);
var setItem = localStorage.setItem("tasks", makeString);
}
When I loop through the first array and try to display object.Name and .Note in a div it works fine:
when(button, "click", function(event){
demolist.innerHTML = "";
for(i=0; i< fruits.length; i++){
demolist.innerHTML += fruits[i].Name + " " + fruits[i].Note + "<br>";
}
});
But when I fire a function that parses that array, populates the second and tries to loop through it in the same manner I get "undefined undefined" even though I can see that the array contains all the objects I submitted when I check the console.
function parseArray(){
var getArray = localStorage.getItem("tasks");
var parseObj = JSON.parse(getArray);
tasks.push(parseObj);
}
when(button2, "click", function(event){
function parseArray()
demolist2.innerHTML = "";
for(i=0; i< tasks.length; i++){
demolist2.innerHTML += tasks[i].Name + " " + tasks[i].Note + "<br>";
}
});
https://jsfiddle.net/bjxs3LdL/
(NO JQUERY SOLUTIONS PLEASE)
I am new to coding and stackoverflow so forgive the long post.
Fix your parseArray() function by changing
tasks.push(parseObj);
to
tasks = parseObj;
EDIT: Sorry for all the edits, it's hard to wrap my around the control flow. To fix the issue of the first note not getting saved, add a stringifyArray(fruits); call to the end of your submitNote() function.
The parseArray call is wrong, try rewiriting button2 listener like this:
when(button2, "click", function(event){
parseArray();
demolist2.innerHTML = "";
for(i=0; i< tasks.length; i++){
demolist2.innerHTML += tasks[i].Name + " " + tasks[i].Note + "<br>";
}
});
Otherwise, your code needs a redesign, but that's for another opportunity.
This is my first question here, hoping you can help. Currently I am trying to loop through an API list of 100 arrays all of which contain one string of data. My loop filters through for numerical data and prints it to a div id. However when I hit data with "#N/A" instead of digits, it breaks my loop. I have tried nesting an if statement that would check if data is null or not, but as it treats null data as an object, this does not work. I have included commented out code to show the things I have tried:
var xhr = new XMLHttpRequest();
var URL = "https://spreadsheets.google.com/feeds/list/0AhySzEddwIC1dEtpWF9hQUhCWURZNEViUmpUeVgwdGc/1/public/basic?alt=json";
xhr.open("GET", URL, false);
xhr.send();
var statusResponseStringify = JSON.stringify(xhr.responseText, "", 2);
var statusResponseParse = JSON.parse(xhr.responseText);
var Find = statusResponseParse.feed.entry;
for (var i = 0; i < Find.length; i++) {
var FTSEContent = statusResponseParse.feed.entry[i].content.$t;
document.getElementById("FTSEName").innerHTML+=FTSEContent + "<br><br>";
var text = FTSEContent;
var value = text.match(/(\d[\d\.]*)/g);
//var price = value[0];
//var change = value[1];
console.log(value);
/*if (typeof value === "number") {
document.getElementById("Change").innerHTML+=value + "<br>";
}
else if (typeof value === null) {
document.getElementById("Change").innerHTML+="N/A" + "<br>";
}
else if (typeof value === "object") {
document.getElementById("Change").innerHTML+="Smell" + "<br>";
}
else {
document.getElementById("Change").innerHTML+="poo" + "<br>";
};*/
if (typeof value == "undefined") {
document.getElementById("Print").innerHTML+="N/A" + "<br>";
}
else {
document.getElementById("Print").innerHTML+=value[0] + "<br>";
};
};
This is the console I get back when I run this code
Could anyone help me with some code ideas to circumvent the null responses when looping. I would ideally like to print the numbers and print an N/A whenever there is a null or #N/A within the API data.
Thank you all!
Rewrite your check: instead of if (typeof value == "undefined") it should be...
if (value === null) { ... }
... as .match() returns null on non-matching, and not undefined.
As a sidenote, your code can be simplified a bit. First, you don't have to repeat the whole statusResponseParse.feed.entry... expression in FTSEContent, use Find instead:
var FTSEContent = Find[i].content.$t;
Second, my understanding is that you check for number in that content string. In this case, you can adjust your pattern a bit:
var value = FTSEContent.match(/(\d+(?:\.\d+)?)/);
... so it won't consume such illegal numbers as '3..' and '3.14.15' (in the last case, only 3.14 will be matched), and doesn't have to match globally (you only process the first result anyway).
I'm trying to get the content from all the nodes in the bookmarks menu into textbox.value, but only the last bookmark appears. What am I doing wrong?
function AllBookmarks()
{
var historyService = Components.classes["#mozilla.org/browser/nav-history-service;1"]
.getService(Components.interfaces.nsINavHistoryService);
var options = historyService.getNewQueryOptions();
var query = historyService.getNewQuery();
var bookmarksService = Components.classes["#mozilla.org/browser/nav-bookmarks-service;1"]
.getService(Components.interfaces.nsINavBookmarksService);
//var toolbarFolder = bookmarksService.toolbarFolder;
//var bookmarksMenuFolder = bookmarksService.bookmarksMenuFolder;
var unfiledBookmarksFolder = bookmarksService.unfiledBookmarksFolder;
//query.setFolders([toolbarFolder], 1);
//query.setFolders([bookmarksMenuFolder], 1);
query.setFolders([unfiledBookmarksFolder], 1);
var result = historyService.executeQuery(query, options);
var rootNode = result.root;
rootNode.containerOpen = true;
// iterate over the immediate children of this folder
for (var i = 0; i < rootNode.childCount; i ++) {
var node = rootNode.getChild(i);
}
// close a container after using it!
rootNode.containerOpen = false;
var textbox = document.getElementById("MyExtension");
var title= "Title: " + node.title; // shows the title of URL
var url= "\nURL: " + node.uri; // shows the URL
textbox.value = title + url + "\n";
}
In the loop commented as "iterate over the immediate children of this folder", you are probably looping over each of the bookmarks correctly, but you are not doing anything with the each node before moving on to the next. As a result, the node variable is set to the last node when you leave the loop.
Also, you are assigning to textbox.value, rather than appending to it, so even if you were acting on the data for each node you would have clobbered it each time, resulting in only the data of the last node (the same outcome!). If you want to build up a string like that, you have to append to it, not assign to it. One way to do this is with the += operator.
So, the last part of the code should be something like:
var textbox = document.getElementById("MyExtension");
// iterate over the immediate children of this folder
for (var i = 0; i < rootNode.childCount; i ++) {
var node = rootNode.getChild(i);
var title = "Title: " + node.title; // gets the title of URL
var url = "\nURL: " + node.uri; // gets the URL
textbox.value += title + ": " + url + "\n"; // note the += (append) operator
}
// close a container after using it!
rootNode.containerOpen = false;
NB: In many other (stricter) languages, your posted code wouldn't compile because you're using the variable node outside of the "scope" (the braces) in which it was declared. It is a good rule of thumb to follow voluntarily though: violating this guideline often means you're making a mistake, or need to think more carefully about what you're doing. In this very case, it may have alerted you to the problem.