Highlighting string at multiple occurrences - javascript

I'm currently implementing a substring search. From the algorithm, I get array of substrings occurence positions where each element is in the form of [startPos, endPos].
For example (in javascript array):
[[1,3], [8,10], [15,18]]
And the string to highlight is:
ACGATCGATCGGATCGAGCGATCGAGCGATCGAT
I want to highlight (in HTML using <b>) the original string, so it will highlight or bold the string from position 1 to 3, then 8 to 10, then 15 to 18, etc (0-indexed).
A<b>CGA</b>TCGA<b>TCG</b>GATC<b>GAGC</b>GATCGAGCGATCGAT
This is what I have tried (JavaScript):
function hilightAtPositions(text, posArray) {
var startPos, endPos;
var startTag = "<b>";
var endTag = "</b>";
var hilightedText = "";
for (var i = 0; i < posArray.length; i++) {
startPos = posArray[i][0];
endPos = posArray[i][1];
hilightedText = [text.slice(0, startPos), startTag, text.slice(startPos, endPos), endTag, text.slice(endPos)].join('');
}
return hilightedText;
}
But it highlights just a range from the posArray (and I know it is still incorrect yet). So, how can I highlight a string given multiple occurrences position?

Looking at this question, and following John3136's suggestion of going from tail to head, you could do:
String.prototype.splice = function( idx, rem, s ) {
return (this.slice(0,idx) + s + this.slice(idx + Math.abs(rem)));
};
function hilightAtPositions(text, posArray) {
var startPos, endPos;
posArray = posArray.sort(function(a,b){ return a[0] - b[0];});
for (var i = posArray.length-1; i >= 0; i--) {
startPos = posArray[i][0];
endPos = posArray[i][1];
text= text.splice(endPos, 0, "</b>");
text= text.splice(startPos, 0, "<b>");
}
return text;
}
Note that in your code, you are overwriting hilightedText with each iteration, losing your changes.

Try this:
var stringToHighlight = "ACGATCGATCGGATCGAGCGATCGAGCGATCGAT";
var highlightPositions = [[1,3], [8,10], [15,18]];
var lengthDelta = 0;
for (var highlight in highlightPositions) {
var start = highlightPositions[highlight][0] + lengthDelta;
var end = highlightPositions[highlight][1] + lengthDelta;
var first = stringToHighlight.substring(0, start);
var second = stringToHighlight.substring(start, end + 1);
var third = stringToHighlight.substring(end + 1);
stringToHighlight = first + "<b>" + second + "</b>" + third;
lengthDelta += ("<b></b>").length;
}
alert(stringToHighlight);
Demo: http://jsfiddle.net/kPkk3/

Assuming that you're trying to highlight search terms or something like that. Why not replace the term with the bolding?
example:
term: abc
var text = 'abcdefgabcqq';
var term = 'abc';
text.replace(term, '<b>' + term + '</b>');
This would allow you to avoid worrying about positions, assuming that you are trying to highlight a specific string.

Assuming your list of segments is ordered from lowest start to highest, try doing your array from last to first.
That way you are not changing parts of the string you haven't reached yet.
Just change the loop to:
for (var i = posArray.length-1; i >=0; i--) {

If you want to check for multiple string matches and highlight them, this code snippet works.
function highlightMatch(text, matchString) {
let textArr = text.split(' ');
let returnArr = [];
for(let i=0; i<textArr.length; i++) {
let subStrMatch = textArr[i].toLowerCase().indexOf(matchString.toLowerCase());
if(subStrMatch !== -1) {
let subStr = textArr[i].split('');
let subStrReturn = [];
for(let j=0 ;j<subStr.length; j++) {
if(j === subStrMatch) {
subStrReturn.push('<strong>' + subStr[j]);
} else if (j === subStrMatch + (matchString.length-1)){
subStrReturn.push(subStr[j] + '<strong>');
} else {
subStrReturn.push(subStr[j]);
}
}
returnArr.push(subStrReturn.join(''));
} else {
returnArr.push(textArr[i]);
}
}
return returnArr;
}
highlightMatch('Multi Test returns multiple results', 'multi');
=> (5) ['<strong>Multi<strong>', 'Test', 'returns', '<strong>multi<strong>ple', 'results']

Related

How to print top 5 frequently occurring words from a substring

I have some project, with finding the bad words from user. How can i find an a top 5 frequently encountered words from the array of "bad words" in user input string?
I try to do it, but this code doesn't work how i want
const containsAny = (str, substrings) => {
for (var i = 0; i != substrings.length; i++) {
var substring = substrings[i];
if (str.indexOf(substring) != - 1) {
return substring;
}
}
return null;
}
var result = containsAny(textWords, listOfBadWords);
console.log("String was found in substring " + result);
i would like to make it like: word - number of times of use
Try
var filteredArr = topWords.filter(function(item) {
return item[1] != 0;
});
var output = filteredArr.map(function(item) {
return item[0] + " - " + item[1];
}).join(", ");

Copy string and increment version

For example if we have string str and I copy it it will be str_1, str_2, str_3 and so on..
If I copy str_2 then it will be str_2_1, str_2_2 and so on.
I have following logic but it does not work..
It should return 'Test2_5' (because Test2 AND Test2_4 already exists) but it returns 'Test2_4'
function createName(nameToCopy) {
var i;
var version = 1;
var nameCopiesArr = ["Test2", "Test2_2",
"Test2_3",
"Test2_4",
"Test2_2_2",
"Test2_3_1",
"Test2_2_1_2",
"Test2_2_3"
];
if (nameCopiesArr && nameCopiesArr.length > 1) {
for (i = 0; i < nameCopiesArr.length; i++) {
var indexes = nameCopiesArr[i].lastIndexOf('_') ? nameCopiesArr[i].match(/\d+$/) : 0
if (indexes) {
version = indexes[indexes.length - 1];
version = parseInt(version) + 1;
}
}
}
p = nameToCopy + '_' + version;
document.getElementById("demo").innerHTML = p;
return p;
}
<button onclick="createName('Test2')">Click me</button>
<p id="demo"></p>
You could search for all string which starts with the given name and a dash and an ending number. Then take the max number of ending and return the given string with an incremented version.
function createName(nameToCopy) {
var copies = ["Test2", "Test2_2", "Test2_3", "Test2_4", "Test2_2_2", "Test2_3_1", "Test2_2_1_2", "Test2_2_3"],
filtered = copies.filter(/./.test.bind(new RegExp(nameToCopy + '_' + '\\d+$'))),
version = filtered.reduce((v, s) => Math.max(v, +s.match(/\d+$/)[0]), 0);
return nameToCopy + '_' + ++version;
}
console.log(['Test2', 'Test', 'Test2_2'].map(createName));
More traditional
function createName(nameToCopy) {
var copies = ["Test2", "Test2_2", "Test2_3", "Test2_4", "Test2_2_2", "Test2_3_1", "Test2_2_1_2", "Test2_2_3"],
regexp = new RegExp(nameToCopy + '_' + '\\d+$'),
version = 0,
i, v;
for (i = 0; i < copies.length; i++) {
if (regexp.test(copies[i])) {
v = +copies[i].match(/\d+$/)[0];
if (v > version) {
version = v;
}
}
}
return nameToCopy + '_' + (version + 1);
}
console.log(['Test2', 'Test', 'Test2_2'].map(createName));
I update your code to test the number of the '_' char and if the current string contains the given input.
function createName(nameToCopy) {
var i;
var version = 1;
var nameToCopy_Nb = (nameToCopy.match(/_/g) || []).length; //number of the '_' char
var nameCopiesArr = ["Test2", "Test2_2",
"Test2_3",
"Test2_4",
"Test2_2_2",
"Test2_3_1",
"Test2_2_1_2",
"Test2_2_3"
];
if (nameCopiesArr && nameCopiesArr.length > 1) {
for (i = 0; i < nameCopiesArr.length; i++) {
var item_Nb = (nameCopiesArr[i].match(/_/g) || []).length;//number of the '_' char
if (nameCopiesArr[i].indexOf(nameToCopy) !== -1 && item_Nb === nameToCopy_Nb + 1) {
var indexes = nameCopiesArr[i].lastIndexOf('_') ? nameCopiesArr[i].match(/\d+$/) : 0
if (indexes) {
version = indexes[indexes.length - 1];
version = parseInt(version) + 1;
}
}
}
}
p = nameToCopy + '_' + version;
document.getElementById("demo").innerHTML = p;
return p;
}
But the answer of #Nina is good and probably the shortest !!! (I upvote her answer)
JSFiddle
I'm not sure what you're trying to do here, or why, so I can't really comment on your code. Just note that it is not too efficient, and not too understandable. And as a side note, your html doesn't call the code you wrote.
to answer your question of why it returns 'Test2_4' : your code loops through the hard coded values you put into the nameCopiesArr, and the version variable keeps the number of the current name + 1. And then you overwrite this with the result from the next entry in the array. Since the last entry in the array is 'Test2_2_3', you grab the last number there - which is 3 - and add 1 to that, so after your last iteration the version variable holds the value of 4 - and this is what you return.

Split method and then concatenate

What I need to do is make this function to where it splits each part of the string entered, and then puts pig latin on each word, meaning it adds ay at the end of each word. Here's what I have so far:
function pigLatin(whatWeTitle) {
var alertThis = " ";
var splitArray = whatWeTitle.split(" ");
for ( i = 0; i < splitArray.length; i++) {
alertThis = makeSentenceCase(splitArray[i]) + " ";
var newWord3 = splitArray.substring(1, whatWeTitle.length) + newWord + 'ay';
alert(newWord3);
}
}
Right now, it just takes the first letter of the string and adds it to the end. It doesn't change each word to pig latin, just the whole phrase. I was wondering of anyone could help me with this. THanks.
You need to use [i] to get items of your array :
var word = splitArray[i];
var newWord3 = word.substring(1,word.length) + word[0] + 'ay';
The best, if you want to end up with the whole new sentence, is to change each word an join them at the end :
var splitArray = whatWeTitle.split(" ");
for ( i = 0; i < splitArray.length; i++) {
var word = splitArray[i];
splitArray[i] = word.substring(1,word.length) + word[0] + 'ay';
}
var newSentence = splitArray.join(' ');
alert(newSentence);
If you test a little, you'll see this algorithm doesn't like the dots or comma in your sentence. If you want something stronger, you'd need a regular expression, for example like this :
var newSentence = whatWeTitle.replace(/[^\. ,]+/g, function(word){
return word.slice(1) + word[0] + 'ay';
});
alert(newSentence);
This works by replacing in place the words in the text, using a function to transform each word.
Something like this ?
function pigLatin(whatWeTitle) {
var alertThis = " ";
var splitArray = whatWeTitle.split(" ");
var finalString = "";
for ( i = 0; i < splitArray.length; i++) {
finalString += splitArray[i]+ "ay ";
}
alert(finalString);
}
pigLatin("this is a test");
You probably want to split off the first consonant values and then append them along with 'ay'.
I would use a regex to accomplish this. Here is a JSFiddle showing an example.
First part is split the word
var words = text.split(" ");
Next part is to piglatinify™ each word
words = words.map(function(word){ return pigLatinifyWord(word);});
This is the piglatinify™ function
function pigLatinifyWord(word){
var result;
var specialMatches = word.match(/(\W|\D)+$/);
var specialChars;
if(specialMatches && specialMatches.length >= 0){
var specialIndex = word.indexOf(specialMatches[0]);
specialChars = word.slice(specialIndex);
word = word.substr(0, specialIndex);
}
var i = word.search(/^[^aeiou]/);
if(i >= 0){
result = word.slice(i+1) + word.slice(0, i+1) + "ay";
}
else{
result = word + "ay";
}
if(specialChars){
result += specialChars;
}
return result;
}
Update
JSFiddle example now includes handling for non-word non-digit characters

Find anagrams JavaScript jQuery

Lets Say I have a list like
Dog
dOg
God
doggy
dogg
Zebra
Wood
What I want to do is find all the words in the list regardless of case, or regardless of the actual word. I want to match the letters and take a count. So from above
Dog, dOg, God would all be a match and in this case would return "3" as the count, but doggy, dogg, zebra, wood.. would all be unique and all would return 1 as the count.. Though I know this is possible I don't know where to begin. The anagram concept throws me off a bit. Any ideas?
var words = new Array("Dog", "dOg", "God", "doggy", "dogg","Zebra", "Wood");
var unique = {};
// iterate over all the words
for (i=0; i < words.length; i++) {
// get the word, all lowercase
var word = words[i].toLowerCase();
// sort the word's letters
word = word.split('').sort().join('')
// keep a count of unique combinations
if(unique[word])
unique[word] += 1;
else
unique[word] = 1;
}
// print the histogram
for (u in unique)
document.write(u + ": " + unique[u] + "<br/>")
here's what I came up with... jsfiddle here
$(document).ready(function() {
var mywords = ['Dog', 'dOg', 'God', 'doggy', 'dogg', 'Zebra', 'Wood'];
var finalArr = {};
for (var i = 0; i < mywords.length; i++) {
var temp = mywords[i].toLowerCase();
var letters = temp.split('');
var sorted = letters.sort();
var final = sorted.join("");
if(typeof finalArr[final] != 'undefined'){
finalArr[final] ++;
} else {
finalArr[final] = 1;
}
}
console.log(finalArr);
for(var i in finalArr) {
alert(i + ': ' + finalArr[i]);
document.write(i + ': ' + finalArr[i] + "<br/>");
}
});

Javascript word-count for any given DOM element

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g
If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.
Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.
string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.
Or just use Countable.js to do the hard job ;)
document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))
Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}
The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.
This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;
string_var.match(/[^\s]+/g).length - 1;

Categories