Javascript word-count for any given DOM element - javascript

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g

If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.

Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.

string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.

Or just use Countable.js to do the hard job ;)

document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))

Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}

The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.

This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;

string_var.match(/[^\s]+/g).length - 1;

Related

Removing duplicate spaces from input

<textarea id="check" cols="50" rows="20"></textarea>
<script>
var text = document.getElementById("check").value;
var lengthA = text;
for (var i = 0; i < lengthA.length; i++) {
var space = " ";
if (lengthA[i] === space) {
var next = lengthA[i] + 1;
if (next === space) {
lengthA.replace(lengthA[i], "");
}
}
}
var length3 = lengthA.length - length2;
var words = length3 + 1;
</script>
Alright bois, me got a problemo! Im attempting to make a word counter through the law that each space equals a word (1:1). Im not sure why it is not working, it makes sense to me in my mind. I have attempted several alternatives and dwelled hours upon trying to fix this chunk. Thank you in advance to anyone that answers, even if it doesn't work! :)
EDIT: Regular expressions did the trick and replaced the incorrectly used for loop and if statements. Thanks
How about just the below -
var text = document.getElementById("check").value.replace (/ +/g, " ");
Not sure, why you would need a for loop to begin with.
/ +/ will more than 1 space
g will do all the changes throughout the text
To remove the duplicate space, the following code
lengthA.replace(lengthA[i], "");
should be
lengthA = lengthA.substring(0, i) + lengthA.substring(i + 1);
// i should not increase
i--;
continue;
You misunderstand the usage of replace.
Use str.replace() of JavaScript to do this. This will remove not only space but also work for tabs, newlines etc.
Usage:
var string = string.replace(/\s\s+/g, ' ');
So change below code:
var lengthA = text;
for (var i = 0; i < lengthA.length; i++) {
var space = " ";
if (lengthA[i] === space) {
var next = lengthA[i] + 1;
if (next === space) {
lengthA.replace(lengthA[i], "");
}
}
}
To this:
var lengthA = text.replace(/\s\s+/g, ' ');
Reference here : https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace

Javascript Return the location of the first letter of each word in a string using a loop

Having trouble coming up with code doing this.
So for example here is my string.
var str = "Hello how are you today?";
How would I manipulate this string to return the position of the first letter of each word using a loop?
this will give you the result with less complicated code and a single loop
function foo(str) {
var pos = [];
var words = str.split(' ');
pos.push(1);
var prevWordPos;
for (var i = 1; i < words.length; i++) {
prevWordPos = pos[i - 1] + words[i - 1].length;
pos.push((str.indexOf(words[i], prevWordPos) + 1));
}
return pos;
}
You should search for a question before asking it in case it's already been asked and answered.
Get first letter of each word in a string, in Javascript
You can use a regexp replace passing a function instead of a replacement string, this will call the function for each match:
str.replace(/[^ ]+/g, function(match, pos) {
console.log("Word " + match + " starts at position " + pos);
});
The regexp meaning is:
[^ ]: anything excluding space
+: one or more times
"g" option: not only first match, but each of them
in other words the function will be called with sequences of non-spaces. Of course you can define what you consider a "word" differently.
Here is a Solution with two Loops, i hope that is close enough ;)
var starts = [];
var str = "How are you doing today?";
//var count = 0;
var orgStr = str;
while (str.indexOf(" ") > 0) {
if (starts.length > 0) {
starts.push(starts[starts.length - 1] + str.indexOf(" ") +1);
} else {
starts.push(1);
starts.push(str.indexOf(" ") +2);
//alert(str);
}
str = str.substring(str.indexOf(" ") + 1);
}
for (var i = 0; i < starts.length; i++) {
alert(starts[i] + ": " + orgStr.substring(starts[i]-1,starts[i]))
}
Easiest would be to search a regular expression \b\w and collect match.start() match.index for each match. Loop while there's matches.
EDIT: wrong language. lol.

if else statement in while loop, where if else statement determines when while loop stops

var userInput = prompt("type something with or without double spacing");
var errorSpaces = [];
var maxSpaceCount = [];
var doneOnce = 0;
var done = 0;
var size;
var tempArray = [0, 0];
while (done === 0) {
if (doneOnce === 0) {
for (var i = 0; i<size; i++) {
size = userInput.length - 1;
if (userInput.substr(i, 2) == " ") {
userInput.replace(userInput.substring(i, j), userInput[i]);
errorSpaces.push(0);
}
}
doneOnce = 1;
maxSpaceCount.push(0);
} else if (doneOnce === 1 && tempArray.length != 1) {
for (var i = 0; i<size; i++) {
tempArray = [0];
size = userInput.length - 1;
if (userInput.substr(i, 2) == " ") {
userInput.replace(userInput.substring(i, j), userInput[i]);
tempArray.push(0);
}
doneOnce = 2;
}
maxSpaceCount.push(0);
} else {
done = 1;
}
}
alert("done");
This loops at the second for loop rather than finishing. I know it probably isn't the best way to do it, but how could I make the 'else if' work so that when there are no more double spaces, it will go to the final else?
I am trying to eliminate any multiple spaces by iteratively replacing double spaces with single spaces, then re-reading to replace further double (previously triple) spaces, etc.
Way , way too much code if your goal is to replace any double (or more spaces) with a single space
try regex
var userInput = prompt("type something with or without double spacing");
userInput = userInput.replace(/\s{2,}/g, ' ');
alert("done");
although not quite sure what you are trying to do with tempArray as it doesn't seem to make sense.
EDIT
There appears to be some indication that there is a requirement to count how many occurrences of 2 or more spaces, so using the below will give you the count. The reason for the || bit is because if none are found, it will return null, || [] will change the null to empty array, so the length of it will be zero. Thanks to #RobG
var countOfMultipleSpaces = (userInput.match(/\s{2,}/g) || []).length;
I'm sure it goes without saying that you have to do this before you replace them all
Is this what you want?
"type something with or without double spacing".replace(/\s{2,}/g, ' ');
//"type something with or without double spacing"

Split method and then concatenate

What I need to do is make this function to where it splits each part of the string entered, and then puts pig latin on each word, meaning it adds ay at the end of each word. Here's what I have so far:
function pigLatin(whatWeTitle) {
var alertThis = " ";
var splitArray = whatWeTitle.split(" ");
for ( i = 0; i < splitArray.length; i++) {
alertThis = makeSentenceCase(splitArray[i]) + " ";
var newWord3 = splitArray.substring(1, whatWeTitle.length) + newWord + 'ay';
alert(newWord3);
}
}
Right now, it just takes the first letter of the string and adds it to the end. It doesn't change each word to pig latin, just the whole phrase. I was wondering of anyone could help me with this. THanks.
You need to use [i] to get items of your array :
var word = splitArray[i];
var newWord3 = word.substring(1,word.length) + word[0] + 'ay';
The best, if you want to end up with the whole new sentence, is to change each word an join them at the end :
var splitArray = whatWeTitle.split(" ");
for ( i = 0; i < splitArray.length; i++) {
var word = splitArray[i];
splitArray[i] = word.substring(1,word.length) + word[0] + 'ay';
}
var newSentence = splitArray.join(' ');
alert(newSentence);
If you test a little, you'll see this algorithm doesn't like the dots or comma in your sentence. If you want something stronger, you'd need a regular expression, for example like this :
var newSentence = whatWeTitle.replace(/[^\. ,]+/g, function(word){
return word.slice(1) + word[0] + 'ay';
});
alert(newSentence);
This works by replacing in place the words in the text, using a function to transform each word.
Something like this ?
function pigLatin(whatWeTitle) {
var alertThis = " ";
var splitArray = whatWeTitle.split(" ");
var finalString = "";
for ( i = 0; i < splitArray.length; i++) {
finalString += splitArray[i]+ "ay ";
}
alert(finalString);
}
pigLatin("this is a test");
You probably want to split off the first consonant values and then append them along with 'ay'.
I would use a regex to accomplish this. Here is a JSFiddle showing an example.
First part is split the word
var words = text.split(" ");
Next part is to piglatinify™ each word
words = words.map(function(word){ return pigLatinifyWord(word);});
This is the piglatinify™ function
function pigLatinifyWord(word){
var result;
var specialMatches = word.match(/(\W|\D)+$/);
var specialChars;
if(specialMatches && specialMatches.length >= 0){
var specialIndex = word.indexOf(specialMatches[0]);
specialChars = word.slice(specialIndex);
word = word.substr(0, specialIndex);
}
var i = word.search(/^[^aeiou]/);
if(i >= 0){
result = word.slice(i+1) + word.slice(0, i+1) + "ay";
}
else{
result = word + "ay";
}
if(specialChars){
result += specialChars;
}
return result;
}
Update
JSFiddle example now includes handling for non-word non-digit characters

Searching through international characters with Chosen.js

Has anyone found a fix for searching through international characters using chosen.js?
for example if I have a multi-select field with the following options:
- A French Château
- An English Chateau
And I search for "Cha"
Only the english will show up.
I don't know, if it is not too late for answering, but it could help someone else probably.
The point is to strip diacritics from entered string and from matched option string during comparison.
My modification is working with jquery Chosen plugin in version 1.1.0, downloaded from github.
First you have to have a function to strip diacritics (i've added it after winnow_results())
// strip czech diacritics from string
AbstractChosen.prototype.strip_diacritics= function(string) {
var
translationTable= [
// input with diacritics - add your characters if necessary
"éěÉĚřŘťŤžŽúÚůŮüÜíÍóÓáÁšŠďĎýÝčČňŇäÄĺĹľĽŕŔöÖ",
// stripped output
"eeEErRtTzZuUuUuUiIoOaAsSdDyYcCnNaAlLlLrRoO",
],
output= '';
// scan through whole string
for (var i= 0; i < string.length; i++) {
var charPosition= translationTable[0].indexOf(string[i]);
output+= charPosition == -1 ? string[i] : translationTable[1][charPosition];
}
return output;
}
Then use it in appropriate places in winnow_results() function.
Referencing chosen.jquery.js lines:
line #315
searchText = this.get_search_text();
// replace with
searchText = this.strip_diacritics(this.get_search_text());
line #339
option.search_match = this.search_string_match(option.search_text, regex);
// replace with
option.search_match = this.search_string_match(this.strip_diacritics(option.search_text), regex);
and finally line #345
startpos = option.search_text.search(zregex);
// replace with
startpos = this.strip_diacritics(option.search_text).search(zregex);
And you're done :-).
(I feel like writing some "savegame byte replacement" instruction for extra lives in old DOS game)
Whole modified file at pastebin.
Sept 29th, 2016: Modifications made in Chosen 1.6.2, tested ok.
It doesn't look as though Chosen does this by default - there's no functionality in the code for this.
The source code is here. I've posted the search function below, which is responsible for checking the characters. There's nothing in this function that deals with close characters like that, so you'd either have to write it or request that feature from the Chosen team. This is because, at the core, accented characters and non-accented characters don't have the same ASCII (or Unicode) values. You'd have to have some type of lookup table, and parse that for each character to return "fuzzy" results.
Sorry I couldn't be of more help. I'm sure if you could find a way to modify this function, you could get this working. Again, you'd need lookup tables or something for the underlying code values. Best of luck.
Edit: You may not need lookup tables - perhaps the Regex functionality has a built-in way to do this. Alternatively, you may be able to simply match on anything close to the letter you're searching for.
Chosen.prototype.winnow_results = function() {
var found, option, part, parts, regex, regexAnchor, result, result_id, results, searchText, startpos, text, zregex, _i, _j, _len, _len1, _ref;
this.no_results_clear();
results = 0;
searchText = this.search_field.val() === this.default_text ? "" : $('<div/>').text($.trim(this.search_field.val())).html();
regexAnchor = this.search_contains ? "" : "^";
regex = new RegExp(regexAnchor + searchText.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&"), 'i');
zregex = new RegExp(searchText.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&"), 'i');
_ref = this.results_data;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
option = _ref[_i];
if (!option.disabled && !option.empty) {
if (option.group) {
$('#' + option.dom_id).css('display', 'none');
} else if (!(this.is_multiple && option.selected)) {
found = false;
result_id = option.dom_id;
result = $("#" + result_id);
if (regex.test(option.html)) {
found = true;
results += 1;
} else if (this.enable_split_word_search && (option.html.indexOf(" ") >= 0 || option.html.indexOf("[") === 0)) {
parts = option.html.replace(/\[|\]/g, "").split(" ");
if (parts.length) {
for (_j = 0, _len1 = parts.length; _j < _len1; _j++) {
part = parts[_j];
if (regex.test(part)) {
found = true;
results += 1;
}
}
}
}
if (found) {
if (searchText.length) {
startpos = option.html.search(zregex);
text = option.html.substr(0, startpos + searchText.length) + '</em>' + option.html.substr(startpos + searchText.length);
text = text.substr(0, startpos) + '<em>' + text.substr(startpos);
} else {
text = option.html;
}
result.html(text);
this.result_activate(result);
if (option.group_array_index != null) {
$("#" + this.results_data[option.group_array_index].dom_id).css('display', 'list-item');
}
} else {
if (this.result_highlight && result_id === this.result_highlight.attr('id')) {
this.result_clear_highlight();
}
this.result_deactivate(result);
}
}
}
}
if (results < 1 && searchText.length) {
return this.no_results(searchText);
} else {
return this.winnow_results_set_highlight();
}
};

Categories