How to modify data-list element or alternative - javascript

I am working on an autocomplete text input by testing for string similarity, rather than checking for perfect character matches. This way, a dropdown like a datalists would still present the user with suggestions even if they accidentally add an extra character or spell their desired input wrong.
I have a working Javascript file that can compare the string input from an HTML text input to all the strings in a JSON file that holds about 700 school names as strings. The Javascript file then formats the HTML and passes the 10 most similar strings into an unordered list(for debugging) and into a data-list (where the user will be able to pick their correct answer).
However, datalists seem to have built-in autocomplete that check for identical groups of characters and the datalists will intelligently remove suggestions if the inputted string does not exist within the suggestion.
<input
type ="text"
id="search"
list="hsDropdown"
class ="form-control form-control-lg"
placeholder="High School Name"
autocomplete="off"
autofocus = "false"
/>
<hr/>
<p id="word"></p>
<datalist id ="hsDropdown"></datalist>
<ul id ="list"></ul>
</main>
<script src="js/script.js" type ="text/javascript"></script>
<script src="js/ukkonen/index.js" type ="text/javascript"></script>
The options within the datalist in my HTML are properly populated by my script.js with the most similar strings, but I need to find a way to override the property of the datalist tag that causes results with nonperfect matches to not appear, or
I would need to find an alternative way to make a dropdown list appear from a textbox that is not limited to hard auto-correct.

You could look at the select2 jQuery plugin and the Fuzzy search issue opened there
As per requestor, he has implemented the fuzzy_match function and embedded it into the plugin as the following:
I've also a function called matcher, which looks something like:
function matcher(term, text){
if(term.term === undefined){
return {text: text, score: 1};
}
var match = fuzzy_match(term.term, text.text);
return (match[0])?{text: text, score: match[1]}:false;
}
I also have a sorter, which sorts the matched elements, (so matching elements come at top)
function sorter(data) {
return data.filter(function(item) {
return !!item;
}).sort((a, b) => b.score - a.score)
.map(item => item.text);
}
And whenever we're invoking a select2 on a element, we're passing this matcher as a matcher option, and sorter as sorter option, which looks something like:
$("#element").select2({
placeholder: 'select a name',
matcher,
sorter
})
Here is the fuzzy_match function code provided:
/**
*
* #param pattern
* #param str
* #returns {[boolean,score,formatted]}
*/
function fuzzy_match(pattern, str) {
// Score consts
var adjacency_bonus = 55; // bonus for adjacent matches
var separator_bonus = 10; // bonus if match occurs after a separator
var camel_bonus = 10; // bonus if match is uppercase and prev is lower
var leading_letter_penalty = -3; // penalty applied for every letter in str before the first match
var max_leading_letter_penalty = -9; // maximum penalty for leading letters
var unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Loop variables
var score = 0;
var patternIdx = 0;
var patternLength = pattern.length;
var strIdx = 0;
var strLength = str.length;
var prevMatched = false;
var prevLower = false;
var prevSeparator = true; // true so if first letter match gets separator bonus
// Use "best" matched letter if multiple string letters match the pattern
var bestLetter = null;
var bestLower = null;
var bestLetterIdx = null;
var bestLetterScore = 0;
var matchedIndices = [];
// Loop over strings
while (strIdx != strLength) {
var patternChar = patternIdx != patternLength ? pattern.charAt(patternIdx) : null;
var strChar = str.charAt(strIdx);
var patternLower = patternChar != null ? patternChar.toLowerCase() : null;
var strLower = strChar.toLowerCase();
var strUpper = strChar.toUpperCase();
var nextMatch = patternChar && patternLower == strLower;
var rematch = bestLetter && bestLower == strLower;
var advanced = nextMatch && bestLetter;
var patternRepeat = bestLetter && patternChar && bestLower == patternLower;
if (advanced || patternRepeat) {
score += bestLetterScore;
matchedIndices.push(bestLetterIdx);
bestLetter = null;
bestLower = null;
bestLetterIdx = null;
bestLetterScore = 0;
}
if (nextMatch || rematch) {
var newScore = 0;
// Apply penalty for each letter before the first pattern match
// Note: std::max because penalties are negative values. So max is smallest penalty.
if (patternIdx == 0) {
var penalty = Math.max(strIdx * leading_letter_penalty, max_leading_letter_penalty);
score += penalty;
}
// Apply bonus for consecutive bonuses
if (prevMatched)
newScore += adjacency_bonus;
// Apply bonus for matches after a separator
if (prevSeparator)
newScore += separator_bonus;
// Apply bonus across camel case boundaries. Includes "clever" isLetter check.
if (prevLower && strChar == strUpper && strLower != strUpper)
newScore += camel_bonus;
// Update patter index IFF the next pattern letter was matched
if (nextMatch)
++patternIdx;
// Update best letter in str which may be for a "next" letter or a "rematch"
if (newScore >= bestLetterScore) {
// Apply penalty for now skipped letter
if (bestLetter != null)
score += unmatched_letter_penalty;
bestLetter = strChar;
bestLower = bestLetter.toLowerCase();
bestLetterIdx = strIdx;
bestLetterScore = newScore;
}
prevMatched = true;
}
else {
// Append unmatch characters
formattedStr += strChar;
score += unmatched_letter_penalty;
prevMatched = false;
}
// Includes "clever" isLetter check.
prevLower = strChar == strLower && strLower != strUpper;
prevSeparator = strChar == '_' || strChar == ' ';
++strIdx;
}
// Apply score for last match
if (bestLetter) {
score += bestLetterScore;
matchedIndices.push(bestLetterIdx);
}
// Finish out formatted string after last pattern matched
// Build formated string based on matched letters
var formattedStr = "";
var lastIdx = 0;
for (var i = 0; i < matchedIndices.length; ++i) {
var idx = matchedIndices[i];
formattedStr += str.substr(lastIdx, idx - lastIdx) + "<b>" + str.charAt(idx) + "</b>";
lastIdx = idx + 1;
}
formattedStr += str.substr(lastIdx, str.length - lastIdx);
var matched = patternIdx == patternLength;
return [matched, score, formattedStr];
}

Related

How to find the first incorrect word in a document attempting to use Standard Pilish?

I am creating a GDocs Apps Script to check my document to see if it is in Standard Pilish. I'd like to have the checker return both the position of the first error and the word that is incorrect. I can get the position of the first error in pi, but since the word list does not necessarily perfectly reflect that positioning.
For example, I used a modified quote from Peter Walder: "Two mathematicians accommodatingly promenade to tavern, quest everything". The error lies in the 8th word in the list, but the 10th position in pi.
This is the script I've landed at, and I originally tried to just words[positionOne] before realizing my counting error.
function pilishTranslate() {
var doc = DocumentApp.getActiveDocument();
var text = doc.getBody().getText();
// Remove quotation marks from the text
text = text.replace(/\"/g, "");
// Replace all non-letter characters with white space
text = text.replace(/[^a-zA-Z\s]/g, " ");
// Split the text into an array of words
var words = text.split(/\s+/);
// Count word length
var wordLengths = words.map(function(word) {
return word.length;
});
// Change 10 character counts to 0
wordLengths = wordLengths.map(function(length) {
return length === 10 ? 0 : length;
});
// Join character counts into single string
var wordLengthsString = wordLengths.join('');
// Create variable for pi
var decimal15 = '314159265358979'
// Find common prefix of strings a and b.
var prefix = function(a,b){
return a && a[0] === b[0] ? a[0] + prefix(a.slice(1), b.slice(1)) : '';
};
// Find index of first difference.
var diff = function(a,b){
return a===b ? -1 : prefix(a,b).length;
};
// actual test case
var tests = [
[wordLengthsString,decimal15],
];
// find first position of error
var positionOne = tests.map(test => diff(test[0], test[1]))
console.log(positionOne);
}
function checkPilish(text) {
// Remove quotation marks from the text
text = text.replace(/\"/g, "");
// Replace all non-letter characters with white space
text = text.replace(/[^a-zA-Z\s]/g, " ");
// Split the text into an array of words
var words = text.split(/\s+/);
// Create variable for pi
var decimal15 = '314159265358979'
let pi_index = 0;
// Loop over words
for (let i = 0; i < words.length; i++) {
// convert word length to Standard Pilish digits
let length_str = String(words[i].length);
if (length_str == '10') {
word_length = '0';
}
// check if this matches the current position in pi
if (decimal15.substr(pi_index, length_str.length) != length_str) {
return [i+1, words[i]];
}
pi_index += length_str.length;
}
return [];
}
console.log(checkPilish("Two mathematicians accommodatingly promenade to tavern, quest everything"));
console.log(checkPilish("Two mathematicians accommodatingly promenade to tavern, quest all"));

Get duplicate character in javascript

How to get duplicate character in JavaScript,
As like input:
aaabcccdeffa
Output:
a4bc3def2
Try this:
var str = "aaabcccdeffa"; // Original string
// We are going to create a key-value array to store the number of occurance
// per letter (eg. 'a' : 4, 'b' : 1 etc.)
var store = {};
// Next we loop through each letter in the string
for (var a in str) {
if (store[str[a]] == undefined) { // If this letter has not ben found before, we set the count to 1 (first occurance)
store[str[a]] = 1;
}
else { // else if the letter has been found, we increase the count by one
store[str[a]] += 1;
}
}
// At this point, we have a key value array that contains the count of each letter
// Next, we loop through this array to generate the new string
var newStr = ''; // Initialise new string
for (var char in store) {
newStr += char; // append the letter to the string
if (store[char] > 1) {
newStr += store[char]; // If the count is more than one, we want to show the number too, so we append the number to the string
}
}
Output will be in newStr
you can use a HashTable, which in javascript is done through an Object. This code works
function duplicateCharacters(str) {
//Create an empty object
var hashTable = {};
for(var i = 0; i < str.length; i++){
//Check if the character has already been registered
//If false, register it and link a 1 to it
//If true, increment the integer linked to it
if (hashTable.hasOwnProperty(str[i]))
hashTable[str[i].toString()]++;
else
hashTable[str[i].toString()] = 1;
}
var output = "";
//Go through the hashTable
for(var key in hashTable) {
//Concatenate the key
output += key.toString();
//If the character only appeared once, do not add it
if(hashTable[key] != 1)
output += hashTable[key].toString()
}
return output;
}
Here is the reference code which uses both jquery and Regular expression for calculating the frequency of the character.
// Variable Declaration with Source text
var sourceText="aaabcccdeffa";
var resultText="";
// Splitting the source text to array
var sourceTextArray=sourceText.split("");
var uniqueText = [];
//Fetches Unique text from sourceTextArray in order
$.each(sourceTextArray, function(i, el){
if($.inArray(el, uniqueText) === -1) uniqueText.push(el);
});
//Iteration with unique text array
$.each(uniqueText, function(i, el){
//Regular Expression approach to calculate frequency of character with source text
resultText+=(sourceText.match(new RegExp(el, "g")) || []).length>1?el+(sourceText.match(new RegExp(el, "g")) || []).length:el;
});
alert(resultText);
Working Example Here

Javascript: Split a string by comma, except inside parentheses

Given string in the form:
'"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'
How can I split it to get the below array format:
abc
ab()
c(d(),e())
f(g(),zyx)
h(123)
I have tried normal javascript split, however it doesn't work as desired. Trying Regular Expression but not yet successful.
You can keep track of the parentheses, and add those expressions when the left and right parens equalize.
For example-
function splitNoParen(s){
var left= 0, right= 0, A= [],
M= s.match(/([^()]+)|([()])/g), L= M.length, next, str= '';
for(var i= 0; i<L; i++){
next= M[i];
if(next=== '(')++left;
else if(next=== ')')++right;
if(left!== 0){
str+= next;
if(left=== right){
A[A.length-1]+=str;
left= right= 0;
str= '';
}
}
else A=A.concat(next.match(/([^,]+)/g));
}
return A;
}
var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
splitNoParen(s1).join('\n');
/* returned value: (String)
"abc"
ab()
c(d(),e())
f(g(),zyx)
h(123)
*/
This might be not the best or more refined solution, and also maybe won't fit every single possibility, but based on your example it works:
var data = '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
// Create a preResult splitting the commas.
var preResult = data.replace(/"/g, '').split(',');
// Create an empty result.
var result = [];
for (var i = 0; i < preResult.length; i++) {
// Check on every preResult if the number of parentheses match.
// Opening ones...
var opening = preResult[i].match(/\(/g) || 0;
// Closing ones...
var closing = preResult[i].match(/\)/g) || 0;
if (opening != 0 &&
closing != 0 &&
opening.length != closing.length) {
// If the current item contains a different number of opening
// and closing parentheses, merge it with the next adding a
// comma in between.
result.push(preResult[i] + ',' + preResult[i + 1]);
i++;
} else {
// Leave it as it is.
result.push(preResult[i]);
}
}
Demo
For future reference, here's another approach to top-level splitting, using string.replace as a control flow operator:
function psplit(s) {
var depth = 0, seg = 0, rv = [];
s.replace(/[^(),]*([)]*)([(]*)(,)?/g,
function (m, cls, opn, com, off, s) {
depth += opn.length - cls.length;
var newseg = off + m.length;
if (!depth && com) {
rv.push(s.substring(seg, newseg - 1));
seg = newseg;
}
return m;
});
rv.push(s.substring(seg));
return rv;
}
console.log(psplit('abc,ab(),c(d(),e()),f(g(),zyx),h(123)'))
["abc", "ab()", "c(d(),e())", "f(g(),zyx)", "h(123)"]
Getting it to handle quotes as well would not be too complicated, but at some point you need to decide to use a real parser such as jison, and I suspect that would be the point. In any event, there's not enough detail in the question to know what the desired handling of double quotes is.
You can't use .split for this, but instead you'll have to write a small parser like this:
function splitNoParen(s){
let results = [];
let next;
let str = '';
let left = 0, right = 0;
function keepResult() {
results.push(str);
str = '';
}
for(var i = 0; i<s.length; i++) {
switch(s[i]) {
case ',':
if((left === right)) {
keepResult();
left = right = 0;
} else {
str += s[i];
}
break;
case '(':
left++;
str += s[i];
break;
case ')':
right++;
str += s[i];
break;
default:
str += s[i];
}
}
keepResult();
return results;
}
var s1= '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
console.log(splitNoParen(s1).join('\n'));
var s2='cats,(my-foo)-bar,baz';
console.log(splitNoParen(s2).join('\n'));
Had a similar issue and existing solutions were hard to generalize. So here's another parser that's a bit more readable and easier to extend to your personal needs. It'll also work with curly braces, brackets, normal braces, and strings of any type. License is MIT.
/**
* This function takes an input string and splits it by the given token, but only if the token is not inside
* braces of any kind, or a string.
* #param {string} input The string to split.
* #param {string} split_by Must be a single character.
* #returns {string[]} An array of split parts without the split_by character.
*/
export function parse_split(input:string, split_by:string = ",") : string[]
{
// Javascript has 3 types of strings
const STRING_TYPES = ["'","`","\""] as const;
// Some symbols can be nested, like braces, and must be counted
const state = {"{":0,"[":0,"(":0};
// Some cannot be nested, like a string, and just flip a flag.
// Additionally, once the string flag has been flipped, it can only be unflipped
// by the same token.
let string_state : (typeof STRING_TYPES)[number] | undefined = undefined
// Nestable symbols come in sets, usually in pairs.
// These sets increase or decrease the state, depending on the symbol.
const pairs : Record<string,[keyof typeof state,number]> = {
"{":["{",1],
"}":["{",-1],
"[":["[",1],
"]":["[",-1],
"(":["(",1],
")":["(",-1]
}
let start = 0;
let results = [];
let length = input.length;
for(let i = 0; i < length; ++i)
{
let char = input[i];
// Backslash escapes the next character. We directly skip 2 characters by incrementing i one extra time.
if(char === "\\")
{
i++;
continue;
}
// If the symbol exists in the single/not nested state object, flip the corresponding state flag.
if(char == string_state)
{
string_state = undefined;
console.log("Closed string ", string_state);
}
// if it's not in a string, but it's a string opener, remember the string type in string_state.
else if(string_state === undefined && STRING_TYPES.includes(char as typeof STRING_TYPES[number]))
{
string_state = char as typeof STRING_TYPES[number];
}
// If it's not in a string, and if it's a paired symbol, increase or decrease the state based on our "pairs" constant.
else if(string_state === undefined && (char in pairs) )
{
let [key,value] = pairs[char];
state[key] += value;
}
// If it's our split symbol...
else if(char === split_by)
{
// ... check whether any flags are active ...
if(Object.entries(state).every(([k,v])=>v == 0) && (string_state === undefined))
{
// ... if not, then this is a valid split.
results.push(input.substring(start,i))
start = i+1;
}
}
}
// Add the last segment if the string didn't end in the split_by symbol, otherwise add an empty string
if(start < input.length)
{
results.push(input.substring(start,input.length))
}
else
results.push("");
return results;
}
With this regex, it makes the job:
const regex = /,(?![^(]*\))/g;
const str = '"abc",ab(),c(d(),e()),f(g(),zyx),h(123)';
const result = str.split(regex);
console.log(result);
Javascript
var str='"abc",ab(),c(d(),e()),f(g(),zyx),h(123)'
str.split('"').toString().split(',').filter(Boolean);
this should work

Regex to find price in HTML

Disclaimer: I know that parsing HTML with regex is not the correct approach. I am actually just trying to parse text inside the HTML.
I am parsing several pages, and I am looking for prices. Here is what I have so far:
var all = document.body.querySelectorAll(":not(script)");
var regex = /\$[0-9,]+(\.[0-9]{2})?/g;
for (var i = 0; i < all.length; i++) {
var node_value = all[i].nodeValue;
for (var j = 0; j < all[i].childNodes.length; j++) {
var node_value = all[i].childNodes[j].nodeValue;
if (node_value !== null) {
var matches = node_value.match(regex);
if (matches !== null && matches.length > 0) {
alert("that's a match");
}
}
}
}
This particular code can get me prices like this:
<div>This is the current price: <span class="current">$60.00</span></div>
However, there are some prices that have the following structure:
<div>This is the current price: <sup>$</sup><span>80.00</span></div>
How could I improve the algorithm in order to find those prices? Shall I look in the first for loop for <sup>symbol</sup><span>price</span> with regex?
Important: Once a match, I need to findout which DOM element is holding that price. The most inner element that is holding the price. So for example:
<div><span>$80.00</span></div>
I would need to say that is the element that is holding the price, not the div.
Try this:
var text = document.body.textContent || document.body.innerText,
regex = /\$\s*[0-9,]+(?:\s*\.\s*\d{2})?/g,
match = text.match(regex);
if( match) {
match = match[0].replace(/\s/g,"");
alert("Match found: "+match);
}
Using a recursive search:
function findPrice(node) {
node = node || document.body;
var text = node.textContent || node.innerText,
regex = /\$\s*[0-9,]+(?:\s*\.\s*\d{2})?/,
match = text.match(regex);
if( match) {
var children = node.children, l = children.length, i;
for( i=0; i<l; i++) {
if( findPrice(children[i])) {
return children[i];
}
}
// if no children matched, then this is the narrowest container
return node;
}
else return false;
}
var result = findPrice();
If you can choose your browser, you might use XPath to pre-select your candidates. The following code finds candidates nodes. I tried it in Firefox 25. You might also want to look at What browsers support Xpath 2.0? and http://www.yaldex.com/ajax-tutorial-4/BBL0029.html for cross-browser approaches.
<html><head><script type="text/javascript">
function func() {
//span containing digits preceeded by superscript dollar sign
var xpathExpr1 = "//span[translate(text(),'0123456789.,','')!=text()][preceding-sibling::sup[text()='$']]";
//span containing digits and starting with dollar sign
var xpathExpr2 = "//span[translate(text(),'0123456789.,','')!=text() and contains(text(),'$')]";
var xpathExpr3 = xpathExpr1 + "|" + xpathExpr2; // union
var contextNode = document.body;
var namespaceResolver = function(prefix){return "";}
var resultType = XPathResult.UNORDERED_NODE_ITERATOR_TYPE;
var xpathResult = document.evaluate(xpathExpr1, contextNode, namespaceResolver, resultType, null);
alert(xpathResult);
var node;
while ((node = xpathResult.iterateNext()) != null) {
alert(node.textContent);
}
}
</script></head>
<body onload="func()"> aaa
<sup>$</sup><span>80.00</span> bbb
<span>$129</span> ccc
<sup>$</sup><span>ABC</span> ddd
</body></html>

pass $` value to associated parameter function of replace

I have an expression say
log(1,3)+4,5+max(7,8,9)
where comma is being used two ways.
1- In "log(1,3)+4,5" comma is being used in place of dot(.) or decimal sign.i.e. "log(1,3)+4,5" is equivalent to "log(1.3)+4.5".
2- In max(7,8,9) it is being used as number separator. i.e. this outcome of this is 9 ; the maximum number.
My problem is to substitute comma; which is being used as decimal separator; with decimal but this should not affect max(7,8,9). i.e. I need to convert above expression to
log(1.3)+4.5+max(7,8,9)
What I tried-
function substitute(expr) {
expr.replace(/,/g, function ($`) {
/*some processing here to decide whether comma to be substituted with dot or not.On that basis I will return either dot or comma.*/
}
But how can I pass $` value to associated function
or
Is it possible to do this in javascript.
expr.replace(/,/g,function ($`) {
if yes then how?
Your language is ambiguous.
max(8,1,8,2)
Does this return 8, 8,1 or 8,2?
Your language also doesn't look regular, so you can't parse it with a regular expression, you need the context. If something like this is allowed:
max(1,max(2,3)) // 3?
Assuming you can get rid of the ambiguity, you could write a parser to do the context detection.
This could be a solution :
function myFilter(string) {
// save all functions and signs
var functions = [];
var regExp = /[+,-]max\(([^\)]+)\)/;
matches = true;
while (matches !== null) {
var matches = regExp.exec(string);
if (matches !== null) {
functions.push(matches[0]);
string = string.replace(matches[0], '');
}
}
// replace all remaining commas with dots
string = string.replace(/,/g , ".");
for (i in functions) {
string += functions[i];
}
return string;
}
var s = '1,3+4,5+max(7,8,9)-max(2,3,5)';
var filteredString = myFilter(s);
jsFiddle Demo
This currently works with multiple max functions but only + and - signs. It could be improved with *, / and more... You will have to find the good regex.
Try the below using Javascript. Hope this helps you in logic.
DEMO HERE
var value = "log(1,3)-4,5+max(7,8,9)";
var val = '';
var splitValue, appendSym;
if (value.indexOf("+") != -1)
{
splitValue = value.split("+");
appendSym = "+";
}
else if(value.indexOf("-") != -1)
{
splitValue = value.split("-");
appendSym = "-";
}
else if(value.indexOf("*") != -1)
{
splitValue = value.split("*");
appendSym = "*";
}
else
{
splitValue = value.split("/");
appendSym = "/";
}
var length = splitValue.length;
for (var i = 0; i < length; i++) {
if (val) val += appendSym;
var strrep = splitValue[i].replace(/,/g,".");
if (splitValue[i].indexOf("max") != -1 || splitValue[i].indexOf("min") != -1)
{
val+=splitValue[i];
}
else
{
val+=strrep;
}
}
alert(val);
The output for the above code is log(1.3)-4.5+max(7,8,9)

Categories