JavaScript count all elements on a webpage - javascript

I want to be able to count everything on a site that contains a certain string, even if it's not displayed on the page.
I found this:
window.occurrencesFunc()
I am not sure how to use it. I did play around with it, but nothing seemed to work.
This is what I tried:
function main() {
function(r) {
var amount = window.occurrencesFunc(r, "string", false);
$('ul.reset').append('Count: ' + amount);
}
}
main();

I'd get the entire page as a string:
var markup = document.documentElement.innerHTML;
And, then I'd use the match method to match for the string occurrences in the string and count them:
// the g in the regular expression says to search for a word (not part of a word)
var resultArray= markup.match(/WORRDDD/g);
var count = resultArray.length
Shorthand, would be
var count = (markup.match(/WORRDDD/g) || []).length;

How about a function like this?
function findOccurrences (string)
{
var elements = document.getElementsByTagName ('*');
var foundCount = 0;
for (var i = 0, il = elements.length; i < il; i++) {
if (!elements[i].textContent || !elements[i].textContent.match (string)) {
continue;
}
foundCount++;
}
return 'Count: ' + foundCount;
}
Use it like this, HTML...
<div id="occurrenceCount"></div>
And the JavaScript...
var countElement = document.getElementById ('occurrenceCount');
countElement.textContent = findOccurrences ('your string');
This will make the <div> above contain something like:
Count: 35

I'd probably walk through element nodes present in the DOM
function countElementsMatching(needle, elements) {
return Array.prototype.slice.call(elements)
.map(function (node) {
return node.textContent;
})
.filter(function (str) {
return str.indexOf(needle) !== -1;
}).length;
}
Usage:
var total = countElementsMatching('some string', document.querySelectorAll('*'));

Related

Why is my array 'undefined'? (vanilla javascript)

I'm trying to make a simple 'bad words' filter with javascript. It's meant to listen to any submit events on the page, then iterate through all input fields of the text type, check them for bad stuff by comparing the entered text with the word list, and finally return an according console.log/alert (for now).
I have two files: word-list.js with the critical words (loads first) and filter.js which pulls an array with all words from word-list.js.
My problems is, swear_words_arr[1] is 'undefined' and I don't understand why. I've been looking around for solutions, but still I can't seem to determine the reason for this. Help is much appreciated.
// get all inputs type = text and turn html collection into array
var getInputs = document.querySelectorAll("input[type=text]")
var inputs = Array.from(getInputs);
//var swear_alert_arr -> from in word-list.js
var swear_alert_arr = new Array();
var swear_alert_count = 0;
function reset_alert_count() {
swear_alert_count = 0;
}
function validate_text() {
reset_alert_count();
inputs.forEach(function(input) {
var compare_text = input.value;
console.log(compare_text);
for (var i = 0; i < swear_words_arr.length; i++) {
for (var j = 0; j < compare_text.length; i++) {
if (
swear_words_arr[i] ==
compare_text.substring(j, j + swear_words_arr[i].length).toLowerCase()
) {
swear_alert_arr[swear_alert_count] =
compare_text.substring(
j,
j + swear_words_arr[i].length
);
swear_alert_count++;
}
}
}
var alert_text = "";
for (var k = 1; k <= swear_alert_count; k++) {
alert_text += "\n" + "(" + k + ") " + swear_alert_arr[k - 1];
if (swear_alert_count > 0) {
alert("No!");
console.log('omg no bad stuff! D:');
} else {
console.log('no bad stuff found :)');
}
}
});
}
window.onload = reset_alert_count;
window.addEventListener('submit', function() {
validate_text();
});
It doesn't look like you've declared the array you're trying to access.
But, instead of loops with nested loops and keeping track of loop counters, just get a new array that contains any bad words in the submitted array. You can do this a number of ways, but the Array.prototype.filter() method works nicely:
let badWords = ["worse", "terrible", "horrible", "bad"];
let submittedWords = ["Good", "Terrible", "Great", "Fabulous", "Bad", "OK"];
// Loop over the submitted words and return an array of all the bad words found within it
let bad = submittedWords.filter(function(word){
// Do a case-insensitive match test. Return the word from the submitted words
// if it's on the bad word list.
return badWords.indexOf(word.toLowerCase()) > -1 ? word: null;
});
console.log("Bad words found in submitted data: " + bad.join(", "));

Find smallest substring containing a given set of letters in a larger string

Say you have the following string:
FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNT
LDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFY
FFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQ
XBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGR
AMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR
I'm trying to find the smallest substring containing the letters ABCDA.
I tried a regex approach.
console.log(str.match(/[A].*?[B].*?[C].*?[D].*?[A]/gm).sort((a, b) => a.length - b.length)[0]);
This works, but it only find strings where ABCDA appear (in that order). Meaning it won't find substring where the letters appear in a order like this: BCDAA
I'm trying to change my regex to account for this. How would I do that without using | and type out all the different cases?
You can't.
Let's consider a special case: Assume the letters you are looking for are A, A, and B. At some point in your regexp there will certainly be a B. However, the parts to the left and to the right of the B are independent of each other, so you cannot refer from one to the other. How many As are matched in the subexpression to the right of the B depends on the number of As being already matched in the left part. This is not possible with regular expressions, so you will have to unfold all the different orders, which can be many!
Another popular example that illustrates the problem is to match opening brackets with closing brackets. It's not possible to write a regular expression asserting that in a given string a sequence of opening brackets is followed by a sequence of closing brackets of the same length. The reason for this is that to count the brackets you would need a stack machine in contrast to a finite state machine but regular expressions are limited to patterns that can be matched using FSMs.
This algorithm doesn't use a regex, but found both solutions as well.
var haystack = 'FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNTLDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFYFFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQXBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGRAMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR';
var needle = 'ABCDA'; // the order of letters doesn't matter
var letters = {};
needle.split('').forEach(function(ch) {
letters[ch] = letters[ch] || 0;
letters[ch]++;
});
var shortestSubstringLength = haystack.length;
var shortestSubstrings = []; // storage for found substrings
var startingPos = 0;
var length;
var currentPos;
var notFound;
var letterKeys = Object.keys(letters); // unique leters
do {
lettersLeft = JSON.parse(JSON.stringify(letters)); // copy letters count object
notFound = false;
posStart = haystack.length;
posEnd = 0;
letterKeys.forEach(function(ch) {
currentPos = startingPos;
while (!notFound && lettersLeft[ch] > 0) {
currentPos = haystack.indexOf(ch, currentPos);
if (currentPos >= 0) {
lettersLeft[ch]--;
posStart = Math.min(currentPos, posStart);
posEnd = Math.max(currentPos, posEnd);
currentPos++;
} else {
notFound = true;
}
}
});
if (!notFound) {
length = posEnd - posStart + 1;
startingPos = posStart + 1; // starting position for next iteration
}
if (!notFound && length === shortestSubstringLength) {
shortestSubstrings.push(haystack.substr(posStart, length));
}
if (!notFound && length < shortestSubstringLength) {
shortestSubstrings = [haystack.substr(posStart, length)];
shortestSubstringLength = length;
}
} while (!notFound);
console.log(shortestSubstrings);
Maybe not as clear as using regex could be (well, for me regex are never really clear :D ) you can use brute force (not so brute)
Create an index of "valid" points of your string (those with the letters you want) and iterate with a double loop over it getting substrings containing at least 5 of those points, checking that they are valid solutions. Maybe not the most efficient way, but easy to implement, to understand, and probably to optimize.
var haystack="UGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNTLDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFYFFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQXBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGR";
var needle="ABCD";
var size=haystack.length;
var candidate_substring="";
var minimal_length=size;
var solutions=new Array();
var points=Array();
for(var i=0;i<size;i++){
if(needle.indexOf(haystack[i])>-1) points.push(i);
}
var limit_i= points.length-4;
var limit_k= points.length;
for (var i=0;i<limit_i;i++){
for(var k=i;k<limit_k;k++){
if(points[k]-points[i]+1<=minimal_length){
candidate_substring=haystack.substr(points[i],points[k]-points[i]+1);
if(is_valid(candidate_substring)){
solutions.push(candidate_substring);
if(candidate_substring.length < minimal_length) minimal_length=candidate_substring.length;
}
}
}
}
document.write('<p>Solution length:'+minimal_length+'<p>');
for(var i=0;i<solutions.length;i++){
if(solutions[i].length<=minimal_length) document.write('<p>Solution:'+solutions[i]+'<p>');
}
function is_valid(candidate_substring){
//verify we've got all characters
for(var j=0;j<candidate_substring.length;j++){
if(candidate_substring.indexOf(needle.charAt(j))<0) return false;
}
//...and verify we have two "A"
if(candidate_substring.indexOf("A")==candidate_substring.lastIndexOf("A")) return false;
return true;
}
Just had this problem in an interview as a coding assignment and came up with another solution, (it's not as optimal as the one above but maybe it's easier to understand).
function MinWindowSubstring(strArr) {
const N = strArr[0];
const K = strArr[1];
const letters = {};
K.split('').forEach( (character) => {
letters[character] = letters[character] ? letters[character] + 1 : 1;
});
let possibleSequencesList = [];
const letterKeys = Object.keys(letters);
for(let i=0; i< N.length; i++) {
const char = N[i];
if (new String(letterKeys).indexOf(char) !== -1) {
// found a character in the string
// update all previus sequences
possibleSequencesList.forEach((seq) => {
if(!seq.sequenceComplete) {
seq[char] = seq[char]-1;
seq.lastIndex = i;
// check if sequence is complete
var sequenceComplete = true;
letterKeys.forEach( (letter) => {
if(seq[letter] > 0) {
sequenceComplete = false;
}
});
seq.sequenceComplete = sequenceComplete
}
})
// create a new sequence starting from it
const newSeq = {
startPoint: i,
lastIndex: i,
sequenceComplete: false,
...letters
}
newSeq[char] = newSeq[char]-1;
possibleSequencesList.push(newSeq);
}
}
// cleanup sequences
let sequencesList = possibleSequencesList.filter(sequence => sequence.sequenceComplete);
let output = [];
let minLength = N.length;
// find the smalles one
sequencesList.forEach( seq => {
if( (seq.lastIndex - seq.startPoint) < minLength) {
minLength = seq.lastIndex - seq.startPoint;
output = N.substring(seq.startPoint, seq.lastIndex + 1);
}
})
return output;
}

Regex to find price in HTML

Disclaimer: I know that parsing HTML with regex is not the correct approach. I am actually just trying to parse text inside the HTML.
I am parsing several pages, and I am looking for prices. Here is what I have so far:
var all = document.body.querySelectorAll(":not(script)");
var regex = /\$[0-9,]+(\.[0-9]{2})?/g;
for (var i = 0; i < all.length; i++) {
var node_value = all[i].nodeValue;
for (var j = 0; j < all[i].childNodes.length; j++) {
var node_value = all[i].childNodes[j].nodeValue;
if (node_value !== null) {
var matches = node_value.match(regex);
if (matches !== null && matches.length > 0) {
alert("that's a match");
}
}
}
}
This particular code can get me prices like this:
<div>This is the current price: <span class="current">$60.00</span></div>
However, there are some prices that have the following structure:
<div>This is the current price: <sup>$</sup><span>80.00</span></div>
How could I improve the algorithm in order to find those prices? Shall I look in the first for loop for <sup>symbol</sup><span>price</span> with regex?
Important: Once a match, I need to findout which DOM element is holding that price. The most inner element that is holding the price. So for example:
<div><span>$80.00</span></div>
I would need to say that is the element that is holding the price, not the div.
Try this:
var text = document.body.textContent || document.body.innerText,
regex = /\$\s*[0-9,]+(?:\s*\.\s*\d{2})?/g,
match = text.match(regex);
if( match) {
match = match[0].replace(/\s/g,"");
alert("Match found: "+match);
}
Using a recursive search:
function findPrice(node) {
node = node || document.body;
var text = node.textContent || node.innerText,
regex = /\$\s*[0-9,]+(?:\s*\.\s*\d{2})?/,
match = text.match(regex);
if( match) {
var children = node.children, l = children.length, i;
for( i=0; i<l; i++) {
if( findPrice(children[i])) {
return children[i];
}
}
// if no children matched, then this is the narrowest container
return node;
}
else return false;
}
var result = findPrice();
If you can choose your browser, you might use XPath to pre-select your candidates. The following code finds candidates nodes. I tried it in Firefox 25. You might also want to look at What browsers support Xpath 2.0? and http://www.yaldex.com/ajax-tutorial-4/BBL0029.html for cross-browser approaches.
<html><head><script type="text/javascript">
function func() {
//span containing digits preceeded by superscript dollar sign
var xpathExpr1 = "//span[translate(text(),'0123456789.,','')!=text()][preceding-sibling::sup[text()='$']]";
//span containing digits and starting with dollar sign
var xpathExpr2 = "//span[translate(text(),'0123456789.,','')!=text() and contains(text(),'$')]";
var xpathExpr3 = xpathExpr1 + "|" + xpathExpr2; // union
var contextNode = document.body;
var namespaceResolver = function(prefix){return "";}
var resultType = XPathResult.UNORDERED_NODE_ITERATOR_TYPE;
var xpathResult = document.evaluate(xpathExpr1, contextNode, namespaceResolver, resultType, null);
alert(xpathResult);
var node;
while ((node = xpathResult.iterateNext()) != null) {
alert(node.textContent);
}
}
</script></head>
<body onload="func()"> aaa
<sup>$</sup><span>80.00</span> bbb
<span>$129</span> ccc
<sup>$</sup><span>ABC</span> ddd
</body></html>

Javascript token replace/append

I have a string that looks something like the following 'test:1;hello:five;just:23'. With this string I need to be able to do the following.
....
var test = MergeTokens('test:1;hello:five;just:23', 'yes:23;test:567');
...
The end result should be 'test:567;hello:five;just:23;yes:23' (note the exact order of the tokens is not that important).
Just wondering if anyone has any smart ideas of how to go about this. I was thinking a regex replace on each of the tokens on right and if a replace didn't occur because there was not match just append it. But maybe there is better way.
Cheers
Anthony
Edit: The right side should override the left. The left being what was originally there and the right side being the new content. Another way of looking at it, is that you only keep the tokens on the left if they don't exist on the right and you keep all the tokens on the right.
#Ferdinand
Thanks for the reply. The problem is the efficiency with which the solution you proposed. I was initially thinking down similar lines but discounted it due to the O(n*z) complexity of the merge (where n and z is the number tokens on the left and right respectively) let alone the splitting and joining.
Hence why I was trying to look down the path of a regex. Maybe behind the scenes, regex is just as bad or worse, but having a regex which removes any token from the left string that exists on the right (O(n) for the total amount of token on the right) and then just add the 2 string together (i.e. vat test = test1 + test2) seems more efficient. thanks
I would use join() and split() to create some utility functions to pack and unpack your token data to an object:
// Unpacks a token string into an object.
function splitTokens(str) {
var data = {}, pairs = str.split(';');
for (var i = 0; i < pairs.length; ++i) {
var pair = pairs[i].split(':');
data[pair[0]] = pair[1];
}
return data;
}
// Packs an object into a token string.
function joinTokens(data) {
var pairs = [];
for (var key in data) {
pairs.push(key + ":" + data[key]);
}
return pairs.join(';');
}
Using these, merging is easy:
// Merges all token strings (supports a variable number of arguments).
function mergeTokens() {
var data = {};
for (var i = 0; i < arguments.length; ++i) {
var d = splitTokens(arguments[i]);
for (var key in d) {
data[key] = d[key];
}
}
return joinTokens(data);
}
The utility functions are also useful if you want to extract some keys (say,"test") and/or check for existence:
var data = splitTokens(str);
if (data["test"] === undefined) {
// Does not exist
} else {
alert("Value of 'test': " + data["test"]);
}
The following is what I ended thiking about. What do you guys recon?
Thanks
Anthony
function Tokenizer(input, tokenSpacer, tokenValueSpacer) {
this.Tokenizer = {};
this.TokenSpacer = tokenSpacer;
this.TokenValueSpacer = tokenValueSpacer;
if (input) {
var TokenizerParts = input.split(this.TokenSpacer);
var i, nv;
for (i = 0; i < TokenizerParts.length; i++) {
nv = TokenizerParts[i].split(this.TokenValueSpacer);
this.Tokenizer[nv[0]] = nv[1];
}
}
}
Tokenizer.prototype.add = function(name, value) {
if (arguments.length == 1 && arguments[0].constructor == Object) {
this.addMany(arguments[0]);
return;
}
this.Tokenizer[name] = value;
}
Tokenizer.prototype.addMany = function(newValues) {
for (nv in newValues) {
this.Tokenizer[nv] = newValues[nv];
}
}
Tokenizer.prototype.remove = function(name) {
if (arguments.length == 1 && arguments[0].constructor == Array) {
this.removeMany(arguments[0]);
return;
}
delete this.Tokenizer[name];
}
Tokenizer.prototype.removeMany = function(deleteNames) {
var i;
for (i = 0; i < deleteNames.length; i++) {
delete this.Tokenizer[deleteNames[i]];
}
}
Tokenizer.prototype.MergeTokenizers = function(newTokenizer) {
this.addMany(newTokenizer.Tokenizer);
}
Tokenizer.prototype.getTokenString = function() {
var nv, q = [];
for (nv in this.Tokenizer) {
q[q.length] = nv + this.TokenValueSpacer + this.Tokenizer[nv];
}
return q.join(this.TokenSpacer);
}
Tokenizer.prototype.toString = Tokenizer.prototype.getTokenString;
i am a few years late, but i think this is what you are looking for:
function MergeTokens(input, replace){
var replaceTokens = replace.split(";");
for(i=0; i<replaceTokens.length; i++){
var pair = replaceTokens[i].split(":");
var result = input;
regString = "\\b" + pair[0] + ":[\\w]*\\b";
var reg = new RegExp(regString);
if(reg.test(result)){
result = result.replace(reg, replaceTokens[i]);
}
else{
result = result + replaceTokens[i];
}
}
return result;
}

Javascript word-count for any given DOM element

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g
If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.
Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.
string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.
Or just use Countable.js to do the hard job ;)
document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))
Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}
The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.
This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;
string_var.match(/[^\s]+/g).length - 1;

Categories