This question already has answers here:
Counting the occurrences / frequency of array elements
(39 answers)
Closed 4 months ago.
I need to write some kind of loop that can count the frequency of each letter in a string.
For example: "aabsssd"
output: a:2, b:1, s:3, d:1
Also want to map same character as property name in object. Any good idea how to do this?
I am not sure how to do it.
This is where I am so far:
var arr = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"];
function counter(x) {
var count = 0,
temp = [];
x = x.split('');
console.log(x);
for (var i = 0, len = x.length; i < len; i++) {
if (x[i] == "a") {
count++;
}
}
return count;
}
var a = "aabbddd";
console.log(counter(a));
Here you go:
function getFrequency(string) {
var freq = {};
for (var i=0; i<string.length;i++) {
var character = string.charAt(i);
if (freq[character]) {
freq[character]++;
} else {
freq[character] = 1;
}
}
return freq;
};
some ES6 syntax with reduce:
let counter = str => {
return str.split('').reduce((total, letter) => {
total[letter] ? total[letter]++ : total[letter] = 1;
return total;
}, {});
};
counter("aabsssd"); // => { a: 2, b: 1, s: 3, d: 1 }
Another solution:
function count (string) {
var count = {};
string.split('').forEach(function(s) {
count[s] ? count[s]++ : count[s] = 1;
});
return count;
}
With some ES6 features and short-circuiting:
const counter = s => [...s].reduce((a, c) => (a[c] = ++a[c] || 1) && a, {})
console.log(
counter("hello") // {h: 1, e: 1, l: 2, o: 1}
)
Here's another way:
const freqMap = s => [...s].reduce((freq,c) => {freq[c] = -~freq[c]; return freq} ,{})
Or, if you prefer a "for" loop:
function freqMap(s) {
freq={};
for (let c of s)
freq[c]=-~freq[c];
return freq;
}
e.g. freqMap("MaMaMia") returns Object{M : 3, a : 3, i : 1}
This method leverages the fact that in javascript, bitwise not on "undefined" gives -1, (whereas "undefined+1" gives NaN).
So, -~undefined is 1, -~1 is 2, -~2 is 3 etc.
We can thus iterate over the characters of the string, and simply increment freq[c] without any "if". The first time we encounter a character c, freq[c] will be undefined, so we set it to -~freq[c] which is 1. If we subsequently encounter c again, we again set freq[c] to -~freq[c], which will now be 2, etc.
Simple, elegant, concise.
More declarative way to get a word histogram will be to utilise reduce to iterate through letters and come up with a new object that contains letters as keys and frequencies as values.
function getFrequency(str) {
return str.split('').reduce( (prev, curr) => {
prev[curr] = prev[curr] ? prev[curr] + 1 : 1;
return prev;
}, {});
};
console.log(getFrequency('test')); // => {t: 2, e: 1, s: 1}
a leaner, functional solution:
using ES6 Arrows && Logical Operators:
const buildFreqDict = string =>
string.split('').reduce((freqDict, char) => {
freqDict[char] = (freqDict[char] || 0) + 1;
return freqDict;
}, {})
console.log(buildFreqDict("banana"))
Explained
split string into array of characters.
and then feed it into a reduce method (using method.chaining()).
if char is already logged in countDict then add 1 to it.
or if character not found in countDict then set it to 1.
return new values back up to reduce's accumulator object
NB: don't forget about including the third argument of .reduce(): in this case it is a {} (object literal) that serves to initialize the freqDict object.
for more info see Counting instances of values in an object half way down the page here: MDN Reduce
and for more info about using logical operators please see here: MDN Logical Operators
An easy way. In addition, its get you an alphabetically sorted list. It loops throught an arrray and evaluate if the character is already in the object: if false, the character is added to the object, if true, its frequency increase a unit.
const text= "Lorem ipsum dolor sit amet consectetur adipiscing"
const textAsArray = text.split('').sort()
let charactersList = {}
for (char of textAsArray) {
if (!charactersList[char]) {
charactersList[char]=1;
}
else {
charactersList[char]++
}
}
console.log(charactersList)
I have reviewed and I think that this adapts very well to the need they pose. I would like it to be in a single line but I don't know how to generate the object dynamically.
const uniqueCount=(arr)=>{
let rs ={};
arr.sort().join("").match(/(.)(\1*)/g).map(i=>rs[i[0]]=i.length);
return rs;
};
console.log(uniqueCount(["a","b","c","d","d","e","a","b","c","f","g","h","h","h","e","a"]));
//{ a: 3, b: 2, c: 2, d: 2, e: 2, f: 1, g: 1, h: 3 }
I find it very successful to use .match() and regex /(.)(\1*)/g as explained above.
If it is just a string, you just need to add a .split("") before and that's it.
One more version with sorting by alphabetically. This function works for both.
Frequency of characters by alphabetically sorted
Frequency of characters in order of occurrence
Caveat: Only works if whole string is in lowercase
function freqWithAlphabetTable(str, doNeedToSort) {
let cnt = new Array(26).fill(0), firstLowerCase = 97, output = {}
for (let i = 0; i < str.length; i++)
cnt[str[i].charCodeAt(0) - firstLowerCase]++ // filling the array with count at it's index
if (doNeedToSort) {
for (let i = 0; i < cnt.length; i++) {
if (cnt[i] !== 0)
output[String.fromCharCode(firstLowerCase)] = cnt[i]
firstLowerCase++;
}
} else {
for (let i = 0; i < str.length; i++) {
let letterIndexVal = cnt[str[i].charCodeAt(0) - firstLowerCase];
if (letterIndexVal != 0 ) {
output[str[i]] = letterIndexVal
letterIndexVal = 0 // replacing it with zero to avoid repetition
}
}
}
console.log(output);
return output;
}
for(i = strlen(string)var string = 'aabsssd';
var chars = new Array();
for(var i = 0; i < string.length; i++){
var char = string.charAt(i);
if(chars[char] == undefined){
chars[char] = 0;
}
chars[char]++;
}
console.log(chars);
Here's another option using underscore.js:
function charCount(str) {
return _(str.split('')).countBy(function(char) {
return char.toLowerCase();
});
}
charCount('aaabbbbdd') outputs Object {a: 3, b: 4, d: 2}
const recorrences = ['a', 'b', 'c', 'a', 'b','a']
.map(i => !!~i.indexOf('a'))
.filter(i => i)
.length;
console.log(`recorrences ${recorrences}`)
//recorrences 3
// Count frequency of characters in a string
// input: 'Hello, I'm Paul!'
// result: {
// H: 1,
// E: 1,
// L: 3,
// ... and so on ...
// }
const countChars = (string) => {
let charStats = {};
string = string.replace(' ', '').toUpperCase().split('');
string.forEach((char) => {
if (charStats[char]) {
charStats[char]++;
} else {
charStats[char] = 1;
}
});
return charStats;
};
Another Solution
function maxChar(str) {
const charMap = {};
let max = 0;
let maxChar = '';
for(let char of str){
if(charMap[char]){
charMap[char]++;
}else{
charMap[char] = 1;
}
}
for(let char in charMap){
if(charMap[char] > max){
max = charMap[char];
maxChar = char;
}
}
return maxChar;
}
===>
maxChar('355385')
"5"
var str = 'abcccdddd';
function maxCharCount(target) {
const chars = {};
let maxChar = '';
let maxValue = 1;
for (let char of target) {
chars[char] = chars[char] + 1 || 1;
}
return chars;
}
console.log(maxCharCount(str));
The same solution but refactored. So cool how we can solve this problem with so many different answers :)
function getFrequency(string) {
var freq = {};
for (let character in string) {
let char = string[character];
(freq[char]) ? freq[char]++ : freq[char] = 1
}
return freq;
};
You can use this. Just pass the string and it will return object with all the character frequency.
function buildCharMap(string) {
const charMap = {};
string.replace(/[^\w]/g, '').toLowerCase();
for (let char of string) {
charMap[char] = charMap[char] + 1 || 1;
}
return charMap;
}
[...str].map( char => map.get(char) ? map.set( char, map.get(char) + 1) : map.set(char,1) )
cheat code to count frequency of a char in a string is
let target = "e";
let string = " i want to see that person that came in here last";
let frequency = string.split(target).length - 1;
or all in one line
console.log(string.split("e").length - 1)
Everyone using split and reduce are over-complicating things.
string is an iterator so you can use a for/of loop to go over each letter - there's no need to split it into an array so reduce can use it. reduce is very useful for lots of things but it often seems like: "when all you have is a hammer everything looks like a nail". I think its used unnecessarily in many places.
Anyway...
Create a new object.
Loop over the string.
If there is no key in the object that corresponds to the current letter, add it and set it it to zero.
Increment it.
function counter(str) {
// Create an object
const obj = {};
// Loop through the string
for (const letter of str) {
// If the object doesn't have a `letter`
// property create one and set it to 0;
obj[letter] ??= 0;
// Increment the value
++obj[letter];
}
// Finally return the object
return obj;
}
const str = 'aabbddd';
console.log(counter(str));
Additional documentation
for/of
Logical nullish assignment
Related
i am trying to use recursion to return each character in a string. However, the output is not
//We define a function with input parameter.
function countCharInString(string) {
//vi Define an empty objec
const result = {};
//we loop through the length of string
for (let i = 0; i < string.length; i++) {
//create another variable for each element in string
const ch = string[i];
//BASE CASE: if string is empty, return Object with nothing
if (!result[ch]) {
return result[ch]=0;
} else {
//RECURSION: 1 plus whatever the length of the substring from the next character onwards is
return countCharInString(result[ch] + 1)
}
}
}
console.log(countCharInString("Vi skal tælle bogstaver"))
the output should be the following:
var result = {
l : 3,
a : 2,
e : 2,
s : 2,
t : 2,
v : 2,
b: 1,
i : 1,
k : 1,
o : 1,
r : 1,
æ : 1
};
i would suggest to do it with a simple reduce like so
var inputString = 'donald duck';
var result = inputString.split('').reduce((acc, char, index) => {
if (acc[char] !== undefined) {
acc[char] = acc[char] + 1;
}
else {
acc = { ...acc, [char]: 1 }
}
return acc
}, {})
see: https://jsfiddle.net/yswu91zh/21/
Only recursion would not give you the output that you are asking for. After recursively counting character you have to sort it by frequency and then by character. I have excluded a bunch of punctuation with space from counting, if you want exclude more just add it to the punctuation string. You have to use String.prototype.localeCompare() method to compare the characters. This method compares two strings in the current locale. As you are using Danish language you have to specify locale as da.
const punctuations = '.,:;!? ';
const countCharInString = (str, p = {}) => {
if (str.length === 0) return p;
const key = str[0].toLowerCase();
if (!punctuations.includes(key)) {
if (!p[key]) p[key] = 1;
else p[key] += 1;
}
return countCharInString(str.slice(1), p);
};
const cmp = (x, y) => {
if (x[1] === y[1]) {
return x[0].localeCompare(y[0], 'da');
}
return x[1] < y[1] ? 1 : -1;
};
const ret = Object.fromEntries(
Object.entries(countCharInString('Vi skal tælle bogstaver')).sort(cmp)
);
console.log(ret);
I am trying to find the places of each letter in a sentence by using "dictionaries". The problem is I want to find all the places that each letter is and not only the last one. I am very new to JavaScript and couldn't figure out the way to do it.
function letters(stringArgument) {
stringArgument = stringArgument.replace(/ /g,'');
var dict = {};
for (var i=0; i < stringArgument.length; i++ )
if (!stringArgument[i] in dict){
dict[stringArgument[i]] = [];
}else{
dict[stringArgument[i]] = [i+1]
}
return dict
}
var a = letters('Lost time is never found again.');
console.log(a);
naturally gives this output:
{ L: [ 1 ], o: [ 17 ], s: [ 10 ], t: [ 5 ]...
but it should give this:
{ L: [ 1 ], o: [ 2, 17 ], s: [ 3, 10 ], t: [ 4, 5 ]...
Also each letter is saved to the dictionary at the same order they appear in the sentence, how can I order the letters alphabetically?
What you need is a function that gets the positions of a character in a given string.
Try this:
function findAllPositions(char, content) {
var result = [];
let index = content.indexOf(char);
while(index !== -1) {
result.push(index);
index = content.indexOf(char, index + 1);
}
return result;
}
findAllPositions('o', 'Lost time is never found again.'); // Result = [1, 20]
Using this we can update the letter function as follows:
function letters(stringArgument) {
stringArgument = stringArgument.replace(/ /g, '');
var dict = {};
for (const char of stringArgument) {
dict[char] = findAllPositions(char, stringArgument)
}
return dict;
}
letters('is again.')
/*
{
"i": [0, 5],
"s": [1],
"a": [2, 4],
"g": [3],
"n": [6],
".": [7]
}
*/
You need to have
parantheses for the check
if (!(stringArgument[i] in dict)) {
create an array if the above is true
push the postion to the array
For getting a sorted output, you could take the entries of the object, apply a sorting by taking the key and show the result in order.
Object have an insertation oder for not positive 32 bit numbers (like indixes) or symbols. The index like numbers are sorted by value and appears first in the object.
function letters(stringArgument) {
stringArgument = stringArgument.replace(/ /g, '');
var dict = {};
for (var i = 0; i < stringArgument.length; i++) {
if (!(stringArgument[i] in dict)) {
dict[stringArgument[i]] = [];
}
dict[stringArgument[i]].push(i + 1);
}
return dict;
}
var a = letters('Lost time is never found again.');
Object
.entries(a)
.sort(([a], [b]) => a.localeCompare(b))
.forEach(([key, positions]) => console.log(key, ...positions));
console.log(a);
First, for any item, if it is not in an empty array:
var notInDict = !(stringArgument[i] in dict);
If not in dict, then initialize an empty array and push the item in it using
dict[stringArgument[i]].push(i + 1);
Try this.
function letters(stringArgument) {
stringArgument = stringArgument.replace(/ /g, "");
var dict = {};
for (var i = 0; i < stringArgument.length; i++) {
var notInDict = !(stringArgument[i] in dict);
if (notInDict) {
dict[stringArgument[i]] = [];
}
dict[stringArgument[i]].push(i + 1);
}
return dict;
}
var a = letters("Lost time is never found again.");
console.log(a);
you are assigning a new array at each iteration
dict[stringArgument[i]] = [i+1]
what you need to do is push the new position to existing array.
dict[stringArgument[i]].push(i+1)
also, remove the else block
function letters(stringArgument) {
stringArgument = stringArgument.toLowerCase().replace(/ /g,'');
var dict = {};
for (var i=0; i < stringArgument.length; i++ ){
if (!dict.hasOwnProperty(stringArgument[i])){
dict[stringArgument[i]] = [];
}
dict[stringArgument[i]].push(i+1);
}
//sorting
var letters = Object.keys(dict); //returns a array
letters.sort();
var sortedDic = {};
for(var i in letters) {
sortedDic[letters[i]] = dict[letters[i]];
}
return sortedDic;
}
var a = letters('Lost time is never found again.');
console.log(a);
for the first part you can also do that:
let sentence = 'Lost time is never found again.'
let tabLetters = [...sentence.replace(/ /g,'')].reduce((a,c,i)=>
{
if (!a[c]) a[c] = [i+1]
else a[c].push(i+1)
return a
},{})
document.write(JSON.stringify(tabLetters))
I have string like the following:
11222233344444445666
What I would like to do is output the number followed the times it was displayed:
112433475163
Question is, I want this to be efficient. I can store this in an object as the following:
1: { id: 1, displayed: 2},
2: { id: 2, displayed: 1},
3: { id: 3, displayed: 2},
etc.
I can access this object and increment displayed.
My issues is, there is no guarantee in the order. I would like to store the keys in the order they are in the string. How do I accomplish the importance of the order in the object?
This is a proposal for run length coding with an array which holds infomation about one charcter and the count of it:
{
"char": "1",
"count": 2
},
var string = "11222233344444445666",
array = function () {
var r = [], o = {};
string.split('').forEach(function (a, i, aa) {
if (a !== aa[i - 1]) {
o[a] = { char: a, count: 0 };
r.push(o[a]);
}
o[a].count++;
});
return r;
}(string);
document.write('<pre>' + JSON.stringify(array, 0, 4) + '</pre>');
Quick solution with for loop:
var str = "7771122229933344444445666",
obj = {},
len = str.length,
val = null,
count_str = "",
key = "";
for (var i = 0; i < len; i++) {
val = str[i], key = 'k' + val;
if (!obj[key]) {
obj[key] = {'id': val, 'displayed': 1};
} else {
obj[key].displayed++;
}
}
for (var p in obj) {
count_str += obj[p]['id'] + obj[p]['displayed'];
}
console.log(count_str); // "7312249233475163"
because you have such a small set of distinct numbers, I seen no reason why you can't use a array (yeah it's not super ideal memorywise if you skip values and it becomes sparse, but for such a small subset it won't affect you enough to worry of it). Then you can use (number-1) as the index and increment that number as needed.
var counts = [];
var str = "11222233344444445666";
for(var i in str){
var index = parseInt(str[i])-1
counts[index] = (counts[index]||0)+1;
}
for(var i in counts){
var which = 1+parseInt(i);
var count = counts[i];
console.log("# of " + which +"'s: "+count);
}
https://jsfiddle.net/ga0fqpqn/
note: You shouldn't need the parseInt(i)... just +i should work but I think jsfiddle has a bug with it about it defaulting i to handle like a string.
You could store an additional array with the order of the numbers, which you only append to if the object doesn't yet contain the given number. Then once you're done counting, iterate through that array and output the number and the count from the lookup dictionary.
var chars = "1234576123452345".split("");
var order = [];
var hash = {};
chars.forEach(function(char) {
if (!hash[char]) {
hash[char] = 1;
order.push(char);
} else {
hash[char]++;
}
});
console.log(order.map(function(char) {
return char + hash[char];
}).join(""));
// "12233343537161"
I'm currently working on a password strength calculator and then I need to know if a character appears more than once.
I know I must use regex like this occurance = password.match(/a/g).length to get ho many times a occurs, but I want to do that with each character (letter, number, symbol).
Is there a way to do that using JS / JQuery, maybe regex, other than working with an array which contains all characters I want to test ?
Something like this?
var hello = "Hello world";
var histogram = {};
for (var i = 0, len = hello.length; i < len; i++) {
var letter = hello[i];
histogram[letter] = (histogram[letter] || 0) + 1;
}
console.log(histogram);
Result:
{ H: 1, e: 1, l: 3, o: 2, ' ': 1, w: 1, r: 1, d: 1 }
Or you may use array. Just change {} to [].
From #Noel Jose 's answer here, you can simply run this function after converting the string to an array string.split('').
function foo(arr) {
var a = [], b = [], prev;
arr.sort();
for( var i = 0; i < arr.length; i++ ){
if ( arr[i] !== prev ) {
a.push(arr[i]);
b.push(1);
} else {
b[b.length-1]++;
}
prev = arr[i];
}
return [a, b];
}
var stringToCheck = 'password';
var result = foo(stringToCheck.split(''));
// result[0] contain unique array elements and result[1] contain number of occurrences of those elements
for(var i = 0; i < result[0].length; i++){
console.log(result[0][i] + " : " + result[1][i]);
}
Passing in 'testing' will result in the following output:
e : 1
g : 1
i : 1
n : 1
s : 1
t : 2
function rall(r, s) {
var a=[],t,g=r.global;
do {t=r.exec(s);if (!t) break;
a.push(t);} while (g);
return a;
}
var r=/.*?(.)(?=(.*?\1.*))/g;
var res=rall(r,password);
res will be an array of arrays containing all matches of repeating characters.
The RegExp uses a look ahead to find out whether a found character (captured in the first group) will re-appear later in the string.
A password like secret elements would come up as:
"[["s","s","ecret elements"],
["e","e","cret elements"],
["cre","e","t elements"],
["t","t"," elements"],
[" e","e","lements"],
["le","e","ments"]]"
The second element in each sub-array is the multiply matched character.
If there are no repetitions the array will have length=0 which is easy to test like:
if (rall(r,password).length==0)
console.log('password is OK!');
If you want to use an "array-based" solution, you can try something like this:
var password= "abcdsa";
var freq = [];
for(var i = 0 ; i < password.length ; i++){
freq[password[i]] = (freq[password[i]] || 0)+1;
}
You iterate through the password once, and keep track of the ocurrances of each character that you find.
In this case the array "freq" would have something like this:
freq["a"] = 2;
freq["b"] = 1;
freq["c"] = 1;
freq["d"] = 1:
freq["s"] = 1;
Simply reduce your string into a count object. Seed the reduction with an empty object, each time a letter is encountered then that letter receives a +1 in the object where the index is the letter.
Made into a reusable function
function charCount(str){
return [].reduce.call(str,function(p,c){
p[c] = p[c] ? p[c]+1 : 1;
return p;
},{});
}
charCount("hello");//Object {h: 1, e: 1, l: 2, o: 1}
Just wondering if there is some other way than this.
var hashStringArray = function(array) {
array.sort();
return array.join('|');
};
I don't like sorting much and using that delimiter is not safe either if it's contained in one of the strings. In overall I need to produce same hash no matter the order of strings. It will be rather short arrays (up to 10 items), but it will be required very often so it shouldn't be too slow.
I intend to use it with ES6 Map object and I need to easily find same array collection.
Updated example of use
var theMap = new Map();
var lookup = function(arr) {
var item = null;
var hashed = hashStringArray(arr);
if (item = theMap.get( hashed )) {
return item;
}
theMap.set( hashed, itemBasedOnInput );
return itemBasedOnInput;
}
var arr1 = ['alpha','beta','gama'];
var arr2 = ['beta','alpha','gama'];
lookup(arr1) === lookup(arr2)
Performance tests
http://jsperf.com/hashing-array-of-strings/5
Two things occurred to me as the basis of a solution:
summing doesn't depend on order, which is actually a flaw in simple checksums (they don't catch changes in block order within a word), and
we can convert strings to summable numbers using their charcodes
Here's a function to do (2) :
charsum = function(s) {
var i, sum = 0;
for (i = 0; i < s.length; i++) {
sum += (s.charCodeAt(i) * (i+1));
}
return sum
}
Here's a version of (1) that computes an array hash by summing the charsum values:
array_hash = function(a) {
var i, sum = 0
for (i = 0; i < a.length; i++) {
var cs = charsum(a[i])
sum = sum + (65027 / cs)
}
return ("" + sum).slice(0,16)
}
Fiddle here: http://jsfiddle.net/WS9dC/11/
If we did a straight sum of the charsum values, then the array ["a", "d"] would have the same hash as the array ["b", "c"] - leading to undesired collisions. So based on using non-UTF strings, where charcodes go up to 255, and allowing for 255 characters in each string, then the max return value of charsum is 255 * 255 = 65025. So I picked the next prime number up, 65027, and used (65027 / cs) to compute the hash. I am not 100% convinced this removes collisions... perhaps more thought needed... but it certainly fixes the [a, d] versus [b, c] case.
Testing:
var arr1 = ['alpha','beta','gama'];
var arr2 = ['beta','alpha','gama'];
console.log(array_hash(arr1))
console.log(array_hash(arr2))
console.log(array_hash(arr1) == array_hash(arr2))
Outputs:
443.5322979371356
443.5322979371356
true
And testing a case that shows different hashes:
var arr3 = ['a', 'd'];
var arr4 = ['b', 'c'];
console.log(array_hash(arr3))
console.log(array_hash(arr4))
console.log(array_hash(arr3) == array_hash(arr4))
outputs:
1320.651443298969
1320.3792001649144
false
Edit:
Here's a revised version, which ignore duplicates from the arrays as it goes, and return the hash based on unique items only:
http://jsfiddle.net/WS9dC/7/
array_hash = function(a) {
var i, sum = 0, product = 1
for (i = 0; i < a.length; i++) {
var cs = charsum(a[i])
if (product % cs > 0) {
product = product * cs
sum = sum + (65027 / cs)
}
}
return ("" + sum).slice(0, 16)
}
testing:
var arr1 = ['alpha', 'beta', 'gama', 'delta', 'theta', 'alpha', 'gama'];
var arr2 = ["beta", "gama", "alpha", "theta", "delta", "beta"];
console.log(array_hash(arr1))
console.log(array_hash(arr2))
console.log(array_hash(arr1) === array_hash(arr2))
returns:
689.878503111701
689.878503111701
true
Edit
I've revised the answer above to account for arrays of words that have the same letters. We need these to return different hashes, which they now do:
var arr1 = ['alpha', 'beta']
var arr2 = ['alhpa', 'ateb']
The fix was to add a multiplier to the charsum func based on the char index:
sum += (s.charCodeAt(i) * (i+1));
If you calculate a numeric hash code for each string, then you can combine them with an operator where the order doesn't matter, like the ^ XOR operator, then you don't need to sort the array:
function hashStringArray(array) {
var code = 0;
for (var i = 0; i < array.length; i++) {
var n = 0;
for (var j = 0; j < array[i].length; j++) {
n = n * 251 ^ array[i].charCodeAt(j);
}
code ^= n;
}
return code
};
You can do this:
var hashStringArray = function(array) {
return array.sort().join('\u200b');
};
The \u200b character is an unicode character that also means null, but is not the same as the \0 character, which is most widely used.
'\u200b' == '\0'
> false
An idea to have very fast hash if your set of possible string is less than 32 items long : hash the string with a built-in hash function that will return power-of two as hash :
function getStringHash(aString) {
var currentPO2 = 0;
var hashSet = [];
getStringHash = function ( aString) {
var aHash = hashSet[aString];
if (aHash) return aHash;
aHash = 1 << currentPO2++;
hashSet[aString] = aHash;
return aHash;
}
return getStringHash(aString);
}
Then use this hash on your string array, ORing the hashes ( | ) :
function getStringArrayHash( aStringArray) {
var aHash = 0;
for (var i=0; i<aStringArray.length; i++) {
aHash |= getStringHash(aStringArray[i]);
}
return aHash;
}
So to test a bit :
console.log(getStringHash('alpha')); // 1
console.log(getStringHash('beta')); // 2
console.log(getStringHash('gamma')); // 4
console.log(getStringHash('alpha')); // 1 again
var arr1 = ['alpha','beta','gama'];
var arr2 = ['beta','alpha','gama'];
var arr3 = ['alpha', 'teta'];
console.log(getStringArrayHash(arr1)); // 11
console.log(getStringArrayHash(arr2)); // 11 also, like for arr1
var arr3 = ['alpha', 'teta'];
console.log(getStringArrayHash(arr3)); // 17 : a different array has != hashset
jsbin is here : http://jsbin.com/rozanufa/1/edit?js,console
RQ !!! with this method, arrays are considered as set, meaning that a repeated item won't change the hash of an array !!!
This HAS to be faster since it uses only 1) function call 2) lookup 3) integer arithmetic.
So no sort, no (long) string, no concat.
jsperf confirms that :
http://jsperf.com/hashing-array-of-strings/4
EDIT :
version with prime numbers, here : http://jsbin.com/rozanufa/3/edit?js,console
// return the unique prime associated with the string.
function getPrimeStringHash(aString) {
var hashSet = [];
var currentPrimeIndex = 0;
var primes = [ 2, 3, 5, 7, 11, 13, 17 ];
getPrimeStringHash = function ( aString) {
var aPrime = hashSet[aString];
if (aPrime) return aPrime;
if (currentPrimeIndex == primes.length) aPrime = getNextPrime();
else aPrime = primes[currentPrimeIndex];
currentPrimeIndex++
hashSet[aString] = aPrime;
return aPrime;
};
return getPrimeStringHash(aString);
// compute next prime number, store it and returns it.
function getNextPrime() {
var pr = primes[primes.length-1];
do {
pr+=2;
var divides = false;
// discard the number if it divides by one earlier prime.
for (var i=0; i<primes.length; i++) {
if ( ( pr % primes[i] ) == 0 ) {
divides = true;
break;
}
}
} while (divides == true)
primes.push(pr);
return pr;
}
}
function getStringPrimeArrayHash( aStringArray) {
var primeMul = 1;
for (var i=0; i<aStringArray.length; i++) {
primeMul *= getPrimeStringHash(aStringArray[i]);
}
return primeMul;
}
function compareByPrimeHash( aStringArray, anotherStringArray) {
var mul1 = getStringPrimeArrayHash ( aStringArray ) ;
var mul2 = getStringPrimeArrayHash ( anotherStringArray ) ;
return ( mul1 > mul2 ) ?
! ( mul1 % mul2 )
: ! ( mul2 % mul1 );
// Rq : just test for mul1 == mul2 if you are sure there's no duplicates
}
Tests :
console.log(getPrimeStringHash('alpha')); // 2
console.log(getPrimeStringHash('beta')); // 3
console.log(getPrimeStringHash('gamma')); // 5
console.log(getPrimeStringHash('alpha')); // 2 again
console.log(getPrimeStringHash('a1')); // 7
console.log(getPrimeStringHash('a2')); // 11
var arr1 = ['alpha','beta','gamma'];
var arr2 = ['beta','alpha','gamma'];
var arr3 = ['alpha', 'teta'];
var arr4 = ['alpha','beta','gamma', 'alpha']; // == arr1 + duplicate 'alpha'
console.log(getStringPrimeArrayHash(arr1)); // 30
console.log(getStringPrimeArrayHash(arr2)); // 30 also, like for arr1
var arr3 = ['alpha', 'teta'];
console.log(getStringPrimeArrayHash(arr3)); // 26 : a different array has != hashset
console.log(compareByPrimeHash(arr1, arr2) ); // true
console.log(compareByPrimeHash(arr1, arr3) ); // false
console.log(compareByPrimeHash(arr1, arr4) ); // true despite duplicate