How to print top 5 frequently occurring words from a substring

How to print top 5 frequently occurring words from a substring - javascript

I have some project, with finding the bad words from user. How can i find an a top 5 frequently encountered words from the array of "bad words" in user input string?
I try to do it, but this code doesn't work how i want
const containsAny = (str, substrings) => {
for (var i = 0; i != substrings.length; i++) {
var substring = substrings[i];
if (str.indexOf(substring) != - 1) {
return substring;
}
}
return null;
}
var result = containsAny(textWords, listOfBadWords);
console.log("String was found in substring " + result);
i would like to make it like: word - number of times of use

Try
var filteredArr = topWords.filter(function(item) {
return item[1] != 0;
});
var output = filteredArr.map(function(item) {
return item[0] + " - " + item[1];
}).join(", ");

Related

Specific Array element replace

I'm solving a simple problem where I need to capitalize the first alphabet of all words. I was able to do that but I have another string vn52tqsd0e4a if any of my output is matched with this string I have to replace it with --[matched string]-- .
so the expected output should be H--e--llo Worl--d--
but when I'm trying to replace the element with -- it's not doing anything. I tried replace() method as well but it didn't work. I don't know what I'm doing wrong here.
function LetterCapitalize(str) {
// code goes here
let array = str.split(" ")
for (let i=0; i<array.length; i++){
array[i] = array[i].charAt(0).toUpperCase() + array[i].slice(1)
}
let output = array.join(" ") ;
let comp = "vn52tqsd0e4a".split("");
for (let i=0; i<output.length; i++){
comp.map(el=> {
if(output[i] === el){
console.log( `matched ${output[i]}` )
output[i] = `--${output[i]}--`;
console.log(output[i]);
}
})
//
}
console.log(output);
}
LetterCapitalize("hello world");

You can achieve this using split, map, join as:
function LetterCapitalize(str) {
// code goes here
let array = str.split(' ');
for (let i = 0; i < array.length; i++) {
array[i] = array[i].charAt(0).toUpperCase() + array[i].slice(1);
}
let output = array.join(' ');
let comp = 'vn52tqsd0e4a'.split('');
const result = output
.split('')
.map((c) => (comp.includes(c) ? `--${c}--` : c))
.join('');
console.log(result);
}
LetterCapitalize('hello world');

You coulduse Array.reduce() to iterate over the str argument and either capitalize or replace depending on the character.
If the preceeding value is a space we'll capitalize, otherwise if the character is in the comp value, we'll replace with --${char}--.
function LetterCapitalize(str) {
const comp = "vn52tqsd0e4a";
return [...str].reduce((acc, char, idx, a) => {
if (idx === 0 || a[idx - 1] === ' ') {
char = char.toUpperCase();
} else if (comp.includes(char)) {
char = `--${char}--`;
}
return acc + char;
}, '')
}
console.log(LetterCapitalize("hello world"));
console.log(LetterCapitalize("hey man"));

What you say to JavaScript in the piece of code is that it should fit 5 characters in a place that can only hold one character.
output[i] = `--${output[i]}--`;
You need to change it to something like this (code below may not work):
output = output.substring(0,i-1) + el + output.substring(i+1,output.length - i-1);
I recommend using the string.replaceAll function instead. If you create a loop yourself you'll get problems when adding more characters on a place where original one character was present.
function LetterCapitalize(str) {
// code goes here
let array = str.split(" ")
for (let i=0; i<array.length; i++){
array[i] = array[i].charAt(0).toUpperCase() + array[i].slice(1)
}
let output = array.join(" ") ;
let comp = "vn52tqsd0e4a".split("");
comp.map(el=> {
output = output.replaceAll(el, '--' + el + '--');
});
console.log(output);
}
LetterCapitalize("hello world");

How to make abbreviations/acronyms in JavaScript?

new to coding I'm trying to make a function that makes "abbreviations/acronyms" of words, e.g. 'I love you' -> 'ily'.
I've tried rewriting the code in many ways but console.log only shows me the first letter of the first given word.
function makeAbbr(words) {
let abbrev = words[0];
let after = 0;
let i = 0;
for (const letter of words) {
if (letter === '') {
i = words.indexOf('', after);
abbrev += words[i + 1];
}
after++;
}
return abbrev;
}
const words = 'a bc def';
let result = makeAbbr(words);
console.log(result)

Without using arrays. But you really should learn about them.
Start by trimming leading and trailing whitespace.
Add the first character to your acronym.
Loop over the rest of the string and add the current character to the acronym if the previous character was a space (and the current character isn't).
function makeAbbr(words) {
words = words.trim();
const length = words.length;
let acronym = words[0];
for(let i = 1; i < length; i++) {
if(words[i - 1] === ' ' && words[i] !== ' ') {
acronym += words[i];
}
}
return acronym;
}
console.log(makeAbbr('I love you'));
console.log(makeAbbr('I love you'));
console.log(makeAbbr(' I love you '));
And here's the version for GottZ
function w(char) {
char = char.toLocaleLowerCase();
const coll = Intl.Collator('en');
const cmpA = coll.compare(char, 'a');
const cmpZ = coll.compare(char, 'z');
return cmpA >= 0 && cmpZ <= 0;
}
function makeAbbr(words) {
words = words.trim();
const length = words.length;
if(!length) return '';
let acronym = words[0];
for(let i = 1; i < length; i++) {
if(!w(words[i - 1]) && w(words[i])) {
acronym += words[i];
}
}
return acronym;
}
console.log(makeAbbr('I love you'));
console.log(makeAbbr('I love you'));
console.log(makeAbbr(' I love you '));
console.log(makeAbbr(' \tI ... ! love \n\r .you '));
console.log(makeAbbr(' \tI ... ! Löve \n\r .ÿou '));

Since you wanted something using your approach, try this (code is commented)
function makeAbbr(words) {
let abbrev = "";
for (let i = 0; i < words.length - 1; i++) { // Loop through every character except the last one
if (i == 0 && words[i] != " ") { // Add the first character
abbrev += words[i];
} else if (words[i] == " " && words[i + 1] != " ") { // If current character is space and next character isn't
abbrev += words[i + 1];
}
}
return abbrev.toLowerCase();
}
const words = 'a bc def';
let result = makeAbbr(words);
console.log(result)

here is my implementation of your function:
Split the sentence into an array, get the first letter of each word and join them into one string.
const makeAbbr = string => string.split(' ').map(word => word[0]).join('');
console.log(makeAbbr('stack overflow'));
console.log(makeAbbr('i love you'));
`

If you want to use your approach exactly, you had a typo on the line specified. A character can never be "" (an empty string), but a character can be a space " ". Fixing this typo makes your solution work.
function makeAbbr(words) {
let abbrev = words[0];
let after = 0;
let i = 0;
for (const letter of words) {
if (letter === ' ') { // This line here
i = words.indexOf(' ', after);
abbrev += words[i + 1];
}
after++;
}
return abbrev.toLowerCase(); // Also added .toLowerCase()
}
const words = 'a bc def';
let result = makeAbbr(words);
console.log(result)

There are couple of things tripping you up.
let abbrev = words[0]; is just taking the first letter of the word string you passed into the function, and at some point adding something new to it.
for (const letter of words) {...}: for/of statements are used for iterating over arrays, not strings.
Here's a remixed version of your code. It still uses for/of but this time we're creating an array of words from the string and iterating over that instead.
function makeAbbr(str) {
// Initialise `abbrev`
let abbrev = '';
// `split` the string into an array of words
// using a space as the delimiter
const words = str.split(' ');
// Now we can use `for/of` to iterate
// over the array of words
for (const word of words) {
// Now concatenate the lowercase first
// letter of each word to `abbrev`
abbrev += word[0].toLowerCase();
}
return abbrev;
}
console.log(makeAbbr('I love you'));
console.log(makeAbbr('One Two Three Four Five'));

Regular Expression to check- No more than 2 sequential numbers or characters and No more than 1 same numbers or characters-Javascript

I want to reject user input if it contains 2 sequential numbers or characters ,for example 1234,jkl, zyxw and if it contains more than 1 same numbers or characters like aaer,0000,aabb,pp22. Thank you for insights. I have regex for the second one but dont know how to combine the two expressions:
"([a-zA-Z0-9])\\1{1,}"

Doing this in regex is neither sound nor practical. However, you can easily check if your input contains a sequential (abc.. or cba) pattern using code like that:
function isSequencial(input) {
var numpattern = '0123456789012345789'; // match ascending/descending sequence of numbers.
var charpattern = 'ABCDEFGHIJKLMNOPQRSTUVWXYZYXWVUTSRQPONMLKJIHGFEDCBA'; // match ascending/descending sequence of letters.
for (var i = 0; i < input.length-1; i++) {
var shard = input.substring(i,i+2);
if(numpattern.indexOf(shard) != -1) {
console.log('sequential number pattern detected: ' + shard);
return true;
}
if (charpattern.indexOf(shard.toUpperCase()) != -1) {
console.log('sequential letter pattern detected: ' +shard);
return true;
}
}
return false;
}
console.log("isSequencial(a234):" + isSequencial("a234"));
console.log("isSequencial(azyx):" + isSequencial("azyx"));
console.log("isSequencial(xbc):" + isSequencial("xbc"));
console.log("isSequencial(2435):" + isSequencial("2435"));
This code can be optimized but is easy to understand and maintain since it does not try to do multiple things at once. You should be able to combine this with your existing approach.

The simplest solution for your first requirement would be to parse it, as with a regex it will be not that easy to set up, if at all possible.
Here I used charCodeAt (and check for both sequence/equal and duplicates characters)
var input1 = "1543abc3";
var input2 = "cba23DEf";
var input3 = "ba2354cd";
console.log('test 1');
testit(input1.split(''));
console.log('test 2');
testit(input2.split(''));
console.log('test 3');
testit(input3.split(''));
function testit (arr) {
var prev = arr[0].charCodeAt(0) + 1, prev2 = -1;
for (var i = 1; i < arr.length; i++) {
var arritem = arr[i].charCodeAt(0);
if ( (arritem == prev && arritem == (prev2+1)) || // abc
(arritem == (prev-2) && arritem == (prev2-3)) // cba
) {
console.log(' - sequence, more than 2: ', arr[i-2], arr[i-1], arr[i] );
//return false;
}
if (arr.indexOf(arr[i-1],i) > -1) {
console.log(' - duplicate, more than 1: ', arr[i-1] );
//return false;
}
prev2 = prev;
prev = arr[i].charCodeAt(0) + 1;
}
//return true;
}

Highlighting string at multiple occurrences

I'm currently implementing a substring search. From the algorithm, I get array of substrings occurence positions where each element is in the form of [startPos, endPos].
For example (in javascript array):
[[1,3], [8,10], [15,18]]
And the string to highlight is:
ACGATCGATCGGATCGAGCGATCGAGCGATCGAT
I want to highlight (in HTML using <b>) the original string, so it will highlight or bold the string from position 1 to 3, then 8 to 10, then 15 to 18, etc (0-indexed).
A<b>CGA</b>TCGA<b>TCG</b>GATC<b>GAGC</b>GATCGAGCGATCGAT
This is what I have tried (JavaScript):
function hilightAtPositions(text, posArray) {
var startPos, endPos;
var startTag = "<b>";
var endTag = "</b>";
var hilightedText = "";
for (var i = 0; i < posArray.length; i++) {
startPos = posArray[i][0];
endPos = posArray[i][1];
hilightedText = [text.slice(0, startPos), startTag, text.slice(startPos, endPos), endTag, text.slice(endPos)].join('');
}
return hilightedText;
}
But it highlights just a range from the posArray (and I know it is still incorrect yet). So, how can I highlight a string given multiple occurrences position?

Looking at this question, and following John3136's suggestion of going from tail to head, you could do:
String.prototype.splice = function( idx, rem, s ) {
return (this.slice(0,idx) + s + this.slice(idx + Math.abs(rem)));
};
function hilightAtPositions(text, posArray) {
var startPos, endPos;
posArray = posArray.sort(function(a,b){ return a[0] - b[0];});
for (var i = posArray.length-1; i >= 0; i--) {
startPos = posArray[i][0];
endPos = posArray[i][1];
text= text.splice(endPos, 0, "</b>");
text= text.splice(startPos, 0, "<b>");
}
return text;
}
Note that in your code, you are overwriting hilightedText with each iteration, losing your changes.

Try this:
var stringToHighlight = "ACGATCGATCGGATCGAGCGATCGAGCGATCGAT";
var highlightPositions = [[1,3], [8,10], [15,18]];
var lengthDelta = 0;
for (var highlight in highlightPositions) {
var start = highlightPositions[highlight][0] + lengthDelta;
var end = highlightPositions[highlight][1] + lengthDelta;
var first = stringToHighlight.substring(0, start);
var second = stringToHighlight.substring(start, end + 1);
var third = stringToHighlight.substring(end + 1);
stringToHighlight = first + "<b>" + second + "</b>" + third;
lengthDelta += ("<b></b>").length;
}
alert(stringToHighlight);
Demo: http://jsfiddle.net/kPkk3/

Assuming that you're trying to highlight search terms or something like that. Why not replace the term with the bolding?
example:
term: abc
var text = 'abcdefgabcqq';
var term = 'abc';
text.replace(term, '<b>' + term + '</b>');
This would allow you to avoid worrying about positions, assuming that you are trying to highlight a specific string.

Assuming your list of segments is ordered from lowest start to highest, try doing your array from last to first.
That way you are not changing parts of the string you haven't reached yet.
Just change the loop to:
for (var i = posArray.length-1; i >=0; i--) {

If you want to check for multiple string matches and highlight them, this code snippet works.
function highlightMatch(text, matchString) {
let textArr = text.split(' ');
let returnArr = [];
for(let i=0; i<textArr.length; i++) {
let subStrMatch = textArr[i].toLowerCase().indexOf(matchString.toLowerCase());
if(subStrMatch !== -1) {
let subStr = textArr[i].split('');
let subStrReturn = [];
for(let j=0 ;j<subStr.length; j++) {
if(j === subStrMatch) {
subStrReturn.push('<strong>' + subStr[j]);
} else if (j === subStrMatch + (matchString.length-1)){
subStrReturn.push(subStr[j] + '<strong>');
} else {
subStrReturn.push(subStr[j]);
}
}
returnArr.push(subStrReturn.join(''));
} else {
returnArr.push(textArr[i]);
}
}
return returnArr;
}
highlightMatch('Multi Test returns multiple results', 'multi');
=> (5) ['<strong>Multi<strong>', 'Test', 'returns', '<strong>multi<strong>ple', 'results']

Javascript word-count for any given DOM element

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g

If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.

Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.

string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.

Or just use Countable.js to do the hard job ;)

document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))

Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}

The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.

This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;

string_var.match(/[^\s]+/g).length - 1;

We Keep Coding

JavaScript is the programming language of the Web.

How to print top 5 frequently occurring words from a substring - javascript

Try var filteredArr = topWords.filter(function(item) { return item[1] != 0; }); var output = filteredArr.map(function(item) { return item[0] + " - " + item[1]; }).join(", ");

Related

Specific Array element replace

How to make abbreviations/acronyms in JavaScript?

Regular Expression to check- No more than 2 sequential numbers or characters and No more than 1 same numbers or characters-Javascript

Highlighting string at multiple occurrences

Javascript word-count for any given DOM element

Categories

Resources