Word frequency counter on specific words using javascript - javascript

I need to count number of predetermined words (wordlist) in a text. This is what I have done so far:
function frequencies(text, wordlist){
var words = text.split(/\s/);
var freqMap = {};
words.forEach(function(w){
if (!freqMap[w] && wordlist){
freqMap[w] = 0;
}
freqMap[w] += 1;
});
return freqMap;
}
At the moment it counts all the words in given text, how do I make it count only words given in wordlist?

Check word is in given list using Array#indexOf method( or Array#includes method).
function frequencies(text, wordlist) {
var words = text.split(/\s/);
var freqMap = {};
words.forEach(function(w) {
if (wordlist && wordlist.indexOf(w) > -1) { // or wordlist.includes(w)
if (!freqMap[w]) {
freqMap[w] = 0;
}
freqMap[w] += 1;
}
});
return freqMap;
}

This example accounts for some basic punctuation in the text. It will remove said punctuation, split on spaces, then uses reduce to build the object you're wanting.
let wordlist = ['hello', 'bob', 'you', 'later'];
let text = 'hello bob, how are you doing? i hope you are doing well. see you later.';
function frequencies(text, wordlist) {
return text.replace(/(\.|\?|,)/g, '').split(' ').reduce(function(prev, curr) {
if (wordlist.includes(curr)) {
if (prev[curr])
prev[curr]++;
else
prev[curr] = 1;
}
return prev;
}, {});
}
console.log(frequencies(text, wordlist))

Related

counting the same letters in a string but not in all lenght

I'm starting my adventure with javascript and i got one of first tasks.
I must create function that count letter that most occur in string and write this in console.
For example:
var string = "assssssadaaaAAAasadaaab";
and in console.log should be (7,a) <---
the longest string is 7 consecutive identical characters (yes, before count i use .toLowerCase();, because the task requires it)
So far I have it and I don't know what to do next.
Someone want to help?
var string = "assssssadaaaAAAasadaaab";
var string = string.toLowerCase();
function writeInConsole(){
console.log(string);
var count = (string.match(/a/g) || []).length;
console.log(count);
}
writeInConsole();
One option could be matching all consecutive characters using (.)\1* and sort the result by character length.
Then return an array with the length of the string and the character.
Note that this will take the first longest occurrence in case of multiple characters with the same length.
function writeInConsole(s) {
var m = s.match(/(.)\1*/g);
if (m) {
var res = m.reduce(function(a, b) {
return b.length > a.length ? b : a;
})
return [res.length, res.charAt(0)];
}
return [];
}
["assssssadaaaAAAasadaaab", "a", ""].forEach(s => {
s = s.toLowerCase();
console.log(writeInConsole(s))
});
Another example when you have multiple consecutive characters with the same length
function writeInConsole(s) {
let m = s.match(/(.)\1*/g);
if (m) {
let sorted = m.sort((a, b) => b.length - a.length)
let maxLength = sorted[0].length;
let result = [];
for (let i = 0; i < sorted.length; i++) {
if (sorted[i].length === maxLength) {
result.push([maxLength, sorted[i].charAt(0)]);
continue;
}
break;
}
return result;
}
return [];
}
[
"assssssadaaaAAAasadaaab",
"aaabccc",
"abc",
"yyzzz",
"aa",
""
].forEach(s => {
s = s.toLowerCase();
console.log(writeInConsole(s))
});
I'm no sure if this works for you:
string source = "/once/upon/a/time/";
int count = 0;
foreach (char c in source)
if (c == '/') count++;
The answer given by using regular expressions is more succinct, but since you say you are just starting out with programming, I will offer a verbose one that might be easier to follow.
var string = "assssssadaaaAAAasadaaab";
var string = string.toLowerCase();
function computeLongestRun(s) {
// we set up for the computation at the first character in the string
var longestRunLetter = currentLetter = string[0]
var longestRunLength = currentRunLength = 1
// loop through the string considering one character at a time
for (i = 1; i < s.length; i++) {
if (s[i] == currentLetter) { // is this letter the same as the last one?
currentRunLength++ // if yes, reflect that
} else { // otherwise, check if the current run
// is the longest
if (currentRunLength > longestRunLength) {
longestRunLetter = currentLetter
longestRunLength = currentRunLength
}
// reset to start counting a new run
currentRunLength = 1
currentLetter = s[i]
}
}
return [longestRunLetter, longestRunLength]
}
console.log(computeLongestRun(string))

Compare strings in Javascript

I need to compare titles (which are made of many words)
with a list of bad words. For one word, indexOf works fine.
but when there are many swear words it doesn't.
Can anyone help with this?
var title = "This is my title";
var badwordlist = title.indexOf('fword and other bad words list');
//var badwordlist = title.indexOf('fword');
if ( badwordlist >= 0){
//do something
}
I feel the two answers posted so far are overdoing things
var title = "fword This is a f**king title.",
words = title.split(" "),
badwords = ["fword", "f**king"];
// one of these should do it
var isGood = words.filter(function(a) {
return badwords.indexOf(a) == -1;
});
var isBad = words.filter(function(a) {
return badwords.indexOf(a) != -1;
});
console.log(isBad, isGood);
// if isBad.length>0 then there were swear words in the title
You can use String.prototype.includes() to check if the string contains the bad word:
var title = "fword This is title.";
var badwords = ["fword", "f**k"];
var isbad = badwords.map(function(a) {
return title.includes(a);
});
console.log(isbad);
This is a simple solution, which involves an array, a cycle and a function which coordinates these
var badwords = ['fword', 'uglyword'];
var replacements = ['*word', 'ug***ord'];
function replaceBadwords(title) {
for (var badwordIndex in badwords) {
if (title.indexOf(badwords[badwordIndex]) >= 0) {
title = title.split(badwords[badwordIndex]).join(replacements[badwordIndex]);
}
}
}
However, the word 'assignment' contains an ugly word, which in fact is not ugly. I mean if you read only the first three characters, you will think this is an ugly word. To cope with these exceptions make sure you will not censor these as well.
See this SO question. This will do:
var arr = ['banana', 'monkey banana', 'apple', 'kiwi', 'orange'];
function checker(value) {
var prohibited = ['banana', 'apple'];
for (var i = 0; i < prohibited.length; i++) {
if (value.indexOf(prohibited[i]) > -1) {
return false;
}
}
return true;
}
arr = arr.filter(checker);
console.log(arr);
Obtain arr by splitting your title on space, eg title.split(" ").
Once you do any filtering you can create a filtered title with title = arr.join(" ").

How can I find if ALL of string2 letters are also contained within string1 somewhere?

I am trying to compare two strings to see if ALL of one of the string's input is also within another string, regardless of order.
So far I have the following code...
What am I doing wrong?
var str1= "rkqodlw"
var str2= "world"
StringScrambler(str1, str2);
function StringScrambler(str1, str2) {
var string1= str1.split("").sort();
console.log(string1);
var string2 = str2.split("").sort();
console.log(string2);
matches = [];
for (i=0; i< string1.length; i++) {
for (j=0; j<string2.length; i++) {
while (j === i) {
matches.push(j);
console.log(matches);
var matchSort = matches.sort();
console.log(matchSort);
if (matchSort === string2) {
return true;
}else {
return false;
}
}
}
}
}
All the answers this far work fine but they will not work for words with double letters in the second string but not in the first (for eg. 'worlld' - notice the double L). The trick is to affect the first word such that it removes the found character(s) so that the same letter is not checked again. Something like this would do the trick:
// Check if the second string's characters are
// found in the first string
function StringScrambler(str1, str2) {
var arr1 = str1.split(''),
arr2 = str2.split(''),
isATrueSubset = true,
indexOfChar;
arr2.forEach(function(char) {
indexOfChar = arr1.indexOf(char);
if (indexOfChar > -1) {
// Remove the character that was found
// to avoid matching against it again
arr1.splice(indexOfChar, 1);
} else {
isATrueSubset = false;
// No need to continue
return;
}
});
console.log(isATrueSubset);
return isATrueSubset;
}
StringScrambler('rkqodlw ', 'world '); // outputs true
StringScrambler('rkqodlw ', 'worlld '); // outputs false
var one = "dlrow";
var two = "world";
var allCharsFound = true;
one.split("").map(function(char) {
if (two.indexOf(char) < 0) {
allCharsFound = false;
}
});
console.log(allCharsFound);
var str1= "rkqodlw";
var str2= "world";
function test($str1, $str2) {
var string2 = str2.split("");
for(var i=0; i<string2.length; i++) {
if (str1.indexOf(string2[i]) == -1) {
return false;
}
}
return true;
}
You can use the following code to do this task:
alert (AllFirstInSecond("world", "rkqodlw"));
alert (AllFirstInSecond("worldz", "rkqodlw"));
function AllFirstInSecond(str1, str2) {
var pos = str1.length - 1;
while (pos >= 0) {
if (str2.indexOf(str1.substr(pos--,1)) == -1) {
return false;
}
}
return true;
}
It simply checks every single character in the first string to see if it's in the second. If not, it returns false.
Only once all have been found does it return true.
There are possibilities for optimisation (every character is checked even if it's a duplicate that's already been checked) but, unless your strings are particularly large, there's probably not much absolute gain to be had.
If str2 is always a subset of str1, then this answer can be used
Compute intersection of two arrays in JavaScript
var arr1 = "rkqodlw".split("");
var arr2 = "world".split("");
var commonValues = arr2.filter(function(value) {
return arr1.indexOf(value) > -1;
});
alert(commonValues.join(""))
This will compare each words of the second string in the first one and if its present it will be added in the mathes array.
var str1= "rkqodlw";
var str2= "world2";
StringScrambler(str1, str2);
function StringScrambler(str1, str2) {
var string2 = str2.split("").sort();
console.log(string2);
matches = [];
for (j=0; j<string2.length; j++) {
if(str1.indexOf(string2[j]) > -1){
matches.push(string2[j]);
console.log(string2[j]);
}
}
console.log(matches);
}
try this:
var str1= "rkqodlw"
var str2= "world"
StringScrambler(str1, str2);
function StringScrambler(str1, str2) {
var string1 = str1.split("").sort();
var string2 = str2.split("").sort();
matches = [];
for (i = 0; i < string1.length; i++) {
if (string2.indexOf(string1[i]) > -1) matches.push(string1[i]);
}
return matches
}

how to find the most repeat word in string?

can you please tell me how to find the most repeat word in string ?
Example
If input is this
"how do you do"
Output is "do"
var str="how do you do"
function findMostReaptedWord(str){
var res = str.split(" ");
alert(res.length);
var count;
var compareString;
for(var i=0;i<res.length ;i++){
count=0;
compareString=res[i]
for (j=0;i<res.lenth ;j++){
if(compareString==res[j]){
count++
}
}
}
}
alert(findMostReaptedWord(str))
fiddle
http://jsfiddle.net/omjg9v0q/
I gave the idea in a comment. Here it is in code :
function findMostReaptedWord(str){
var counts = {}, mr, mc;
str.match(/\w+/g).forEach(function(w){ counts[w]=(counts[w]||0)+1 });
for (var w in counts) {
if (!(counts[w]<mc)) {
mc = counts[w];
mr = w;
}
}
return mr;
}
A few details :
I use str.match(/\w+/g) for a better decomposition in words. Yours would take anything not a space as a word or part of a word.
counts is a map giving the number of occurrences of each words (i.e. counts["do"] is 2)
using a map avoids doing two levels of loop, which is very slow
Here is my approach
First, separate the words from the string using Regular Expression.
Declare an object as a Map which will help you to find the occurrences of each word. (You can use Map Data Structure!)
Find the most repeated word from that object.
let str = 'How do you do?';
console.log(findMostRepeatedWord(str)); // Result: "do"
function findMostRepeatedWord(str) {
let words = str.match(/\w+/g);
console.log(words); // [ 'How', 'do', 'you', 'do' ]
let occurances = {};
for (let word of words) {
if (occurances[word]) {
occurances[word]++;
} else {
occurances[word] = 1;
}
}
console.log(occurances); // { How: 1, do: 2, you: 1 }
let max = 0;
let mostRepeatedWord = '';
for (let word of words) {
if (occurances[word] > max) {
max = occurances[word];
mostRepeatedWord = word;
}
}
return mostRepeatedWord;
}
Here I give you an approach,
Sort the words first. That way "how do you do" becomes "do do how you".
Iterate the string to count the words that repeat, keep the maximum number of times repeating word in memory while iterating.
-
Mebin
This function may help you
function maxWord (str)
{
var max = 0;
var maxword = '';
var words = str.split(' ');
for(i=0;i<words.length;i++)
{
var count = 0;
var word = '';
for(j=0;j<words.length;j++)
{
if(j !== i && words[i] === words[j])
{
count++;
word = words[i];
}
}
if(count>maxword)
{
max = count;
maxword = word;
}
}
return maxword;
}
maxWord('how do you do'); // returns do

Recursive backtracking in javascript (closures)

I'm trying to implement a recursive backtracking algorithm in javascript (determine if a word is an anagram). The rough idea is I permute every single combination of the different letters, then I compare the final word to the wordlist.
I've gotten the code to work, but the syntax is a little ugly because to avoid the for-loop with closure errors, I've used a self-invoking anonymous function, stored that value, and if it is true, return to break the for loop. I'm just wondering if there is a smarter way to implement the code so I don't need to worry about closures?
var wordList = ['boats', 'horse', 'cow'];
var input = 'oastb';
function isAnagram(sofar, remaining) {
if (remaining.length === 0) {
console.log('returning: ' + (wordList.indexOf(sofar) !== -1))
if (wordList.indexOf(sofar) !== -1) {
console.log(sofar);
return true;
} else {
return false;
}
} else {
for (var i = 0; i < remaining.length; i++) {
var found = (function(index) {
if (isAnagram(sofar + remaining[index], remaining.substring(0, index) + remaining.substring(index + 1))) return true;
})(i);
if(found) return true;
};
}
return false;
}
isAnagram('', input);
Yes, there's an easier way to determine whether a given word is an anagram of a list of words. First, we need to normalize each word so that each anagram produces the same unique word.
For example:
normalize("boats") = "abost";
normalize("horse") = "ehors";
normalize("cow") = "cow";
normalize("oastb") = "abost";
As it turns out, implementing such a function in JavaScript is very simple:
function normalize(word) {
return word
.split("") // convert the string into an array of characters
.sort() // sort the array of characters in lexicographic order
.join(""); // convert the sorted array of characters into a string
}
Next, we create a function that takes a given list of words, normalizes them, and adds them to a dictionary which maps each normalized word to its list of anagrams.
function dictionaryOf(words) {
var dictionary = {};
words.forEach(function (word) {
var norm = normalize(word);
if (dictionary.hasOwnProperty(norm))
dictionary[norm].push(word);
else dictionary[norm] = [word];
});
return dictionary;
}
Then, we create a function which when given a dictionary of normalized words to anagrams and a specific word, returns the list of anagrams of that word.
function anagramsOf(dictionary, word) {
var norm = normalize(word);
return dictionary.hasOwnProperty(norm) ?
dictionary[norm] : [];
}
Finally, we can implement the isAnagram function as follows:
function isAnagram(dictionary, word) {
return anagramsOf(dictionary, word).length > 0;
}
We use it as follows:
var dictionary = dictionaryOf(["boats", "horse", "cow"]);
alert(isAnagram(dictionary, "oastb"));
Putting it all together:
var dictionary = dictionaryOf(["boats", "horse", "cow"]);
alert(isAnagram(dictionary, "oastb"));
function normalize(word) {
return word.split("").sort().join("");
}
function dictionaryOf(words) {
var dictionary = {};
words.forEach(function (word) {
var norm = normalize(word);
if (dictionary.hasOwnProperty(norm))
dictionary[norm].push(word);
else dictionary[norm] = [word];
});
return dictionary;
}
function anagramsOf(dictionary, word) {
var norm = normalize(word);
return dictionary.hasOwnProperty(norm) ?
dictionary[norm] : [];
}
function isAnagram(dictionary, word) {
return anagramsOf(dictionary, word).length > 0;
}
Hope that helps.

Categories