Jquery how check if an array string value is in a textarea - javascript

var SPECIAL_CHARS = Array("\x5B", "\x5C", "\x5D", "\x5E", "\x7B", "\x7C", "\x7D", "\x7E", 8364, 49792, 14844588, '�', '%', '/', '&','�','!','"','(',')','=','[','\\',']','^','{','|','}','~');
var dynamic_variables = Array("${nome}");
function charUsed(el) {
var base = el;
var count = base.val().length;
var chars = base.val().split("");
var numberOfSpecialChars = 0;
for (var k=0; k<chars.length; k++) {
if ($.inArray(chars[k], SPECIAL_CHARS) > -1) {
numberOfSpecialChars++;
}
}
if($.inArray(base.val(),dynamic_variables) != -1)
{
numberOfSpecialChars = numberOfSpecialChars+40;
}
return count + numberOfSpecialChars;
} // function
Basically, I need to count the textarea length and if this contains some special char (array SPECIAL_CHARS) count X 2 (till here, all goes right).
Now I need to add some other words (no more chars), like ${nome}
Pseudocode:
if an element of array is in base.val(), add 40 to the numberOf SpecialChars
Of course, my code doesn't function.

You can loop through the elements of dynamic_variables Array and check they are present in the main string by using Array.indexOf(). If so add 40.
instead of
if($.inArray(base.val(),dynamic_variables) != -1)
{
numberOfSpecialChars = numberOfSpecialChars+40;
}
do
for(var i=0;i<dynamic_variables.length;i++)
{
if(base.val().indexOf(dynamic_variables[i]) != -1) {
numberOfSpecialChars += 40;
}
}

var SpecialChars = ['\x5B', '\x5C', '\x5D', '\x5E', '\x7B'];
var SpecialWords = ['foo', 'bar'];
function count(text)
{
var result = text.length;
// more effective lookup on big text and special char list
var charLookup = SpecialChars.reduce(function(r, v)
{
r[v] = true;
return r;
}, {});
// count special chars
result += text.split('').reduce(function(r, v)
{
if(v in charLookup)
{
r += 1;
}
return r;
}, 0);
// count special words
result += SpecialWords.reduce(function(r, v)
{
r += (text.split(v).length - 1) * 40;
return r;
}, 0);
return result;
}
var result = count(document.getElementById('text').value);
console.assert(result == 'bar some } text [ bar'.length + 2 * 1 + 2 * 40);
document.getElementById('result').innerHTML = result;
<textarea id='text'>bar some { text [ bar</textarea>
<div>Result: <span id='result'></span></div>

Related

I am looking to create an Array from the Output of Counts conducted by my string

I am trying to make my below JS code, Develop and Array of
Count of each vowels
1) Count of A
2) Count of E
3) Count of I
4) Count of 0
5) Count of U
Upon reading a string the code, generates the number of times each vowel occurs.
But now I need to create an array that display, the Vowel occurring the most frequently
I know it is something to do with creating a variable for
var biggestSoFar etc... But how to piece it together I am having problems.
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
function count_all() {
var str = document.getElementById('txtname').value;
var count9=0, totalvowels="";
var count2=0, totalconsonants="";
var count3=0, total_digits="";
var count4=0, totalA="";
var count5=0, totalE="";
var count6=0, totalI="";
var count7=0, totalO="";
var count8=0, totalU="";
for (var i = 0; i < str.length; i++) {
if (str.charAt(i).match(/[a-zA-Z]/) !== null) {
if (str.charAt(i).match(/[aeiouAEIOU]/))
{
totalvowels = totalvowels + str.charAt(i);
count9++;
}
if (str.charAt(i).match(/[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]/))
{
totalconsonants = totalconsonants + str.charAt(i);
count2++;
}
if (str.charAt(i).match(/[aA]/))
{
totalA = totalA + str.charAt(i);
count4++;
}
if (str.charAt(i).match(/[eE]/))
{
totalE = totalE + str.charAt(i);
count5++;
}
if (str.charAt(i).match(/[iI]/))
{
totalI = totalI + str.charAt(i);
count6++;
}
if (str.charAt(i).match(/[oO]/))
{
totalO = totalO + str.charAt(i);
count7++;
}
if (str.charAt(i).match(/[uU]/))
{
totalU = totalU + str.charAt(i);
count8++;
}
}
function retnum(str1) {
var num = str1.replace(/[^0-9]/g, '');
return num;
}
function count_digits(str2) {
var num2 = str2.replace(/[^0-9]/g,"").length;
return num2;
}
}
document.getElementById('TotalU').value = count8;
document.getElementById('TotalO').value = count7;
document.getElementById('TotalI').value = count6;
document.getElementById('TotalE').value = count5;
document.getElementById('TotalA').value = count4;
document.getElementById('consonant_counter').value = count2;
document.getElementById('total_consonants').value = totalconsonants;
document.getElementById('vowels').value = totalvowels;
document.getElementById('vocount').value = count9;
document.getElementById('digits1').value = count_digits(str);
document.getElementById('digits2').value = retnum(str);
}
function clear_all()
{
document.getElementById('TotalU').value = "";
document.getElementById('TotalO').value = "";
document.getElementById('TotalI').value = "";
document.getElementById('TotalE').value = "";
document.getElementById('TotalA').value = "";
document.getElementById('consonant_counts').value ="";
document.getElementById('total_consonants').value ="";
document.getElementById('vowels').value = "";
document.getElementById('vcount').value = "";
document.getElementById('digits1').value ="";
document.getElementById('digits2').value ="";
document.getElementById('txtname').value ="";
document.getElementById('txtname').focus();
}
Why not something like this? You supply the string and it will return an object with a count for each vowel?
function countVowels(str) {
var result = {};
result.a = 0;
result.e = 0;
result.i = 0;
result.o = 0;
result.u = 0;
for (var i = 0, len = str.length; i < len; i++) {
switch(str[i].toLowerCase()) {
case "a":
result.a = result.a + 1;
break;
case "e":
result.e = result.e + 1;
break;
case "i":
result.i = result.i + 1;
break;
case "o":
result.o = result.o + 1;
break;
case "u":
result.u = result.u + 1;
break;
default:
break;
}
}
return result;
}
When you get the result object back you can then do your checks to see which is the most occurring.
One other way of doing this could be like
var str = "When found, separator is removed from the string and the substrings are returned in an array. If separator is not found or is omitted, the array contains one element consisting of the entire string. If separator is an empty string, str is converted to an array of characters. If separator is a regular expression that contains capturing parentheses, then each time separator is matched, the results (including any undefined results) of the capturing parentheses are spliced into the output array. However, not all browsers support this capability.",
vowels = {"a":0,"e":0,"i":0,"o":0,"u":0},
ok = Object.keys(vowels);
maxvow = "";
for (var i = 0, len = str.length; i < len; i++ ){
var chr = str[i].toLowerCase();
chr in vowels && ++vowels[chr]; // or ok.includes(chr) && ++vowels[chr];
}
maxvow = ok.reduce((p,k) => vowels[k] > p[0] ? [vowels[k],k] : p,[0,""])[1];
console.log(vowels);
console.log(maxvow);
You could split the string and use an object for counting the vowels and other letters.
var count = { a: 0, e: 0, i: 0, o: 0, u: 0, other: 0 },
test = 'Es war einmal ein schwarzes Kaninchen';
test.toLowerCase().split(/(?=[a-z])/).forEach(function (c, i) {
if (c[0] in count) {
count[c[0]]++;
} else {
count.other++;
}
});
console.log(count);

Longest substring in alphabetical order Javascript

Seeing all the people talking about longest substring in alphabetical order in Python, I have decided to try it in JS.
The function should look for the longest substring inside a given string, where letters are ordered alphabetically.
Here is what I have:
var s = 'azcbobobegghakl'
function substringChecker(s) {
var longestSub = "";
for (var i = 0; i < s.length; i++) {
var count = 0;
var currSub = "";
while((i+count)<=s.length){
var curr = i+count;
var next = curr+1;
var prev = curr-1;
if(curr !== s.length-1) {
if(s[curr] <= s[next]){
currSub += s[curr]
} else {
break;
}
} else {
if(s[curr]>s[prev]) {
currSub += s[curr];
}
}
count++;
}
if(currSub.length >= longestSub.length) {
longestSub = currSub;
}
};
return longestSub;
}
var result = substringChecker(s);;
console.log(result);
The funny thing it works great for all test cases I can come up with, but this one. The result should be "beggh" but it is "begg" instead. Why is the h not showing up, what am I missing?
The algorithm can be linear, I think you are overcomplicating it placing loops inside loops.
I would use something like
function substringChecker(s) {
var longestSub = "",
length = 0,
start = 0,
prev = s[0];
for (var i = 1; i <= s.length; ++i) {
if(i == s.length || s[i] < prev) {
if(length < i-start) {
longestSub = s.substring(start, i);
length = i-start;
}
start = i;
}
prev = s[i];
};
return longestSub;
}
document.write(substringChecker('azcbobobegghakl'));
first I made list of A-z
then check each letter and compare it with the next letter and save it in subString and...
function longest(str) {
//handle the case str is just one letter
if (str.length === 1) return str;
// create a list of alphabet A to Z
const alphabets = [...Array(26)].map(_ => String.fromCharCode(i++), (i = 97));
let longString = "";
let subSting = "";
for (let x = 0; x < str.length; x++) {
let char = str.charAt(x);
const nextChar = str.charAt(x + 1);
let charIndex = alphabets.findIndex(alphabet => alphabet === char);
let nextCharIndex = alphabets.findIndex(alphabet => alphabet === nextChar);
if (nextCharIndex >= charIndex) {
subSting = subSting + nextChar;
} else {
if (!subSting.length) {
subSting = subSting + char;
}
longString = subSting.length > longString.length ? subSting : longString;
subSting = "";
}
}
return longString;
}
console.log(longest("zyba"));

How can i split a string into words without split function in javascript

How can I split string into words without using split function in javascript. I wonder for a little help.
this is my code:
function trocearCadena(cadena) {
var posEspacios = buscarEspacios(cadena);
var palabras = [];
var j = 0;
while (true) {
var pos = posEspacios.shift();
var subcad = (j == 0) ? cadena.substring(0, pos) : cadena.substring(j + 1, pos);
palabras.push(subcad);
j += pos;
if (j > cadena.length) {
var ultpal = cadena.substring(pos + 1);
palabras.push(ultpal);
break;
}
}
return palabras;
}
function buscarEspacios(cadena) {
var espacios = [];
var pos = -1;
do{
pos = cadena.indexOf(" ", ++pos);
if (pos != -1) espacios.push(pos);
} while (pos != -1);
return espacios;
}
I don't understand what your variable names mean, so I wasn't able to fix the code. Here's another one:
str = "How can I split string into words without using split function in javascript."
var words = [""];
for(var i = 0; i < str.length; i++)
if(str[i] !== " ")
words[words.length - 1] += str[i];
else if(words[words.length - 1])
words.push("");
document.write(words)
Assuming that you also can't use what #Oriol suggested, I would use a recursive function like in the following example:
function _split(s, arr) {
var
str = s.trim(),
words = arr || [],
i = str.indexOf(' ');
if(i !== -1) {
words.push(str.substr(0, i)); // collect the next word
return _split(str.slice(i + 1), words); // recur with new string and words array to keep collecting
} else {
words.push(str); // collect the last word
return words;
}
}
Usage:
_split(' one two three ');
//=> ["one", "two", "three"]

Find the characters in a string which are not duplicated

I have to make a function in JavaScript that removes all duplicated letters in a string. So far I've been able to do this: If I have the word "anaconda" it shows me as a result "anaconda" when it should show "cod". Here is my code:
function find_unique_characters( string ){
var unique='';
for(var i=0; i<string.length; i++){
if(unique.indexOf(string[i])==-1){
unique += string[i];
}
}
return unique;
}
console.log(find_unique_characters('baraban'));
We can also now clean things up using filter method:
function removeDuplicateCharacters(string) {
return string
.split('')
.filter(function(item, pos, self) {
return self.indexOf(item) == pos;
})
.join('');
}
console.log(removeDuplicateCharacters('baraban'));
Working example:
function find_unique_characters(str) {
var unique = '';
for (var i = 0; i < str.length; i++) {
if (str.lastIndexOf(str[i]) == str.indexOf(str[i])) {
unique += str[i];
}
}
return unique;
}
console.log(find_unique_characters('baraban'));
console.log(find_unique_characters('anaconda'));
If you only want to return characters that appear occur once in a string, check if their last occurrence is at the same position as their first occurrence.
Your code was returning all characters in the string at least once, instead of only returning characters that occur no more than once. but obviously you know that already, otherwise there wouldn't be a question ;-)
Just wanted to add my solution for fun:
function removeDoubles(string) {
var mapping = {};
var newString = '';
for (var i = 0; i < string.length; i++) {
if (!(string[i] in mapping)) {
newString += string[i];
mapping[string[i]] = true;
}
}
return newString;
}
With lodash:
_.uniq('baraban').join(''); // returns 'barn'
You can put character as parameter which want to remove as unique like this
function find_unique_characters(str, char){
return [...new Set(str.split(char))].join(char);
}
function find_unique_characters(str, char){
return [...new Set(str.split(char))].join(char);
}
let result = find_unique_characters("aaaha ok yet?", "a");
console.log(result);
//One simple way to remove redundecy of Char in String
var char = "aaavsvvssff"; //Input string
var rst=char.charAt(0);
for(var i=1;i<char.length;i++){
var isExist = rst.search(char.charAt(i));
isExist >=0 ?0:(rst += char.charAt(i) );
}
console.log(JSON.stringify(rst)); //output string : avsf
For strings (in one line)
removeDuplicatesStr = str => [...new Set(str)].join('');
For arrays (in one line)
removeDuplicatesArr = arr => [...new Set(arr)]
Using Set:
removeDuplicates = str => [...new Set(str)].join('');
Thanks to David comment below.
DEMO
function find_unique_characters( string ){
unique=[];
while(string.length>0){
var char = string.charAt(0);
var re = new RegExp(char,"g");
if (string.match(re).length===1) unique.push(char);
string=string.replace(re,"");
}
return unique.join("");
}
console.log(find_unique_characters('baraban')); // rn
console.log(find_unique_characters('anaconda')); //cod
​
var str = 'anaconda'.split('');
var rmDup = str.filter(function(val, i, str){
return str.lastIndexOf(val) === str.indexOf(val);
});
console.log(rmDup); //prints ["c", "o", "d"]
Please verify here: https://jsfiddle.net/jmgy8eg9/1/
Using Set() and destructuring twice is shorter:
const str = 'aaaaaaaabbbbbbbbbbbbbcdeeeeefggggg';
const unique = [...new Set([...str])].join('');
console.log(unique);
Yet another way to remove all letters that appear more than once:
function find_unique_characters( string ) {
var mapping = {};
for(var i = 0; i < string.length; i++) {
var letter = string[i].toString();
mapping[letter] = mapping[letter] + 1 || 1;
}
var unique = '';
for (var letter in mapping) {
if (mapping[letter] === 1)
unique += letter;
}
return unique;
}
Live test case.
Explanation: you loop once over all the characters in the string, mapping each character to the amount of times it occurred in the string. Then you iterate over the items (letters that appeared in the string) and pick only those which appeared only once.
function removeDup(str) {
var arOut = [];
for (var i=0; i < str.length; i++) {
var c = str.charAt(i);
if (c === '_') continue;
if (str.indexOf(c, i+1) === -1) {
arOut.push(c);
}
else {
var rx = new RegExp(c, "g");
str = str.replace(rx, '_');
}
}
return arOut.join('');
}
I have FF/Chrome, on which this works:
var h={};
"anaconda".split("").
map(function(c){h[c] |= 0; h[c]++; return c}).
filter(function(c){return h[c] == 1}).
join("")
Which you can reuse if you write a function like:
function nonRepeaters(s) {
var h={};
return s.split("").
map(function(c){h[c] |= 0; h[c]++; return c}).
filter(function(c){return h[c] == 1}).
join("");
}
For older browsers that lack map, filter etc, I'm guessing that it could be emulated by jQuery or prototype...
This code worked for me on removing duplicate(repeated) characters from a string (even if its words separated by space)
Link: Working Sample JSFiddle
/* This assumes you have trim the string and checked if it empty */
function RemoveDuplicateChars(str) {
var curr_index = 0;
var curr_char;
var strSplit;
var found_first;
while (curr_char != '') {
curr_char = str.charAt(curr_index);
/* Ignore spaces */
if (curr_char == ' ') {
curr_index++;
continue;
}
strSplit = str.split('');
found_first = false;
for (var i=0;i<strSplit.length;i++) {
if(str.charAt(i) == curr_char && !found_first)
found_first = true;
else if (str.charAt(i) == curr_char && found_first) {
/* Remove it from the string */
str = setCharAt(str,i,'');
}
}
curr_index++;
}
return str;
}
function setCharAt(str,index,chr) {
if(index > str.length-1) return str;
return str.substr(0,index) + chr + str.substr(index+1);
}
Here's what I used - haven't tested it for spaces or special characters, but should work fine for pure strings:
function uniquereduce(instring){
outstring = ''
instringarray = instring.split('')
used = {}
for (var i = 0; i < instringarray.length; i++) {
if(!used[instringarray[i]]){
used[instringarray[i]] = true
outstring += instringarray[i]
}
}
return outstring
}
Just came across a similar issue (finding the duplicates). Essentially, use a hash to keep track of the character occurrence counts, and build a new string with the "one-hit wonders":
function oneHitWonders(input) {
var a = input.split('');
var l = a.length;
var i = 0;
var h = {};
var r = "";
while (i < l) {
h[a[i]] = (h[a[i]] || 0) + 1;
i += 1;
}
for (var c in h) {
if (h[c] === 1) {
r += c;
}
}
return r;
}
Usage:
var a = "anaconda";
var b = oneHitWonders(a); // b === "cod"
Try this code, it works :)
var str="anaconda";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 && str.lastIndexOf(obj,i-1)==-1){
return obj;
}
}
).join("");
//output: "cod"
This should work using Regex ;
NOTE: Actually, i dont know how this regex works ,but i knew its 'shorthand' ,
so,i would have Explain to you better about meaning of this /(.+)(?=.*?\1)/g;.
this regex only return to me the duplicated character in an array ,so i looped through it to got the length of the repeated characters .but this does not work for a special characters like "#" "_" "-", but its give you expected result ; including those special characters if any
function removeDuplicates(str){
var REPEATED_CHARS_REGEX = /(.+)(?=.*?\1)/g;
var res = str.match(REPEATED_CHARS_REGEX);
var word = res.slice(0,1);
var raw = res.slice(1);
var together = new String (word+raw);
var fer = together.toString();
var length = fer.length;
// my sorted duplicate;
var result = '';
for(var i = 0; i < str.length; i++) {
if(result.indexOf(str[i]) < 0) {
result += str[i];
}
}
return {uniques: result,duplicates: length};
} removeDuplicates('anaconda')
The regular expression /([a-zA-Z])\1+$/ is looking for:
([a-zA-Z]]) - A letter which it captures in the first group; then
\1+ - immediately following it one or more copies of that letter; then
$ - the end of the string.
Changing it to /([a-zA-Z]).*?\1/ instead searches for:
([a-zA-Z]) - A letter which it captures in the first group; then
.*? - zero or more characters (the ? denotes as few as possible); until
\1 - it finds a repeat of the first matched character.
I have 3 loopless, one-line approaches to this.
Approach 1 - removes duplicates, and preserves original character order:
var str = "anaconda";
var newstr = str.replace(new RegExp("[^"+str.split("").sort().join("").replace(/(.)\1+/g, "").replace(/[.?*+^$[\]\\(){}|-]/g, "\\$&")+"]","g"),"");
//cod
Approach 2 - removes duplicates but does NOT preserve character order, but may be faster than Approach 1 because it uses less Regular Expressions:
var str = "anaconda";
var newstr = str.split("").sort().join("").replace(/(.)\1+/g, "");
//cdo
Approach 3 - removes duplicates, but keeps the unique values (also does not preserve character order):
var str = "anaconda";
var newstr = str.split("").sort().join("").replace(/(.)(?=.*\1)/g, "");
//acdno
function removeduplicate(str) {
let map = new Map();
// n
for (let i = 0; i < str.length; i++) {
if (map.has(str[i])) {
map.set(str[i], map.get(str[i]) + 1);
} else {
map.set(str[i], 1);
}
}
let res = '';
for (let i = 0; i < str.length; i++) {
if (map.get(str[i]) === 1) {
res += str[i];
}
}
// o (2n) - > O(n)
// space o(n)
return res;
}
If you want your function to just return you a unique set of characters in your argument, this piece of code might come in handy.
Here, you can also check for non-unique values which are being recorded in 'nonUnique' titled array:
function remDups(str){
if(!str.length)
return '';
var obj = {};
var unique = [];
var notUnique = [];
for(var i = 0; i < str.length; i++){
obj[str[i]] = (obj[str[i]] || 0) + 1;
}
Object.keys(obj).filter(function(el,ind){
if(obj[el] === 1){
unique+=el;
}
else if(obj[el] > 1){
notUnique+=el;
}
});
return unique;
}
console.log(remDups('anaconda')); //prints 'cod'
If you want to return the set of characters with their just one-time occurrences in the passed string, following piece of code might come in handy:
function remDups(str){
if(!str.length)
return '';
var s = str.split('');
var obj = {};
for(var i = 0; i < s.length; i++){
obj[s[i]] = (obj[s[i]] || 0) + 1;
}
return Object.keys(obj).join('');
}
console.log(remDups('anaconda')); //prints 'ancod'
function removeDuplicates(str) {
var result = "";
var freq = {};
for(i=0;i<str.length;i++){
let char = str[i];
if(freq[char]) {
freq[char]++;
} else {
freq[char] =1
result +=char;
}
}
return result;
}
console.log(("anaconda").split('').sort().join('').replace(/(.)\1+/g, ""));
By this, you can do it in one line.
output: 'cdo'
function removeDuplicates(string){
return string.split('').filter((item, pos, self)=> self.indexOf(item) == pos).join('');
}
the filter will remove all characters has seen before using the index of item and position of the current element
Method 1 : one Simple way with just includes JS- function
var data = 'sssssddddddddddfffffff';
var ary = [];
var item = '';
for (const index in data) {
if (!ary.includes(data[index])) {
ary[index] = data[index];
item += data[index];
}
}
console.log(item);
Method 2 : Yes we can make this possible without using JavaScript function :
var name = 'sssssddddddddddfffffff';
let i = 0;
let newarry = [];
for (let singlestr of name) {
newarry[i] = singlestr;
i++;
}
// now we have new Array and length of string
length = i;
function getLocation(recArray, item, arrayLength) {
firstLaction = -1;
for (let i = 0; i < arrayLength; i++) {
if (recArray[i] === item) {
firstLaction = i;
break;
}
}
return firstLaction;
}
let finalString = '';
for (let b = 0; b < length; b++) {
const result = getLocation(newarry, newarry[b], length);
if (result === b) {
finalString += newarry[b];
}
}
console.log(finalString); // sdf
// Try this way
const str = 'anaconda';
const printUniqueChar = str => {
const strArr = str.split("");
const uniqueArray = strArr.filter(el => {
return strArr.indexOf(el) === strArr.lastIndexOf(el);
});
return uniqueArray.join("");
};
console.log(printUniqueChar(str)); // output-> cod
function RemDuplchar(str)
{
var index={},uniq='',i=0;
while(i<str.length)
{
if (!index[str[i]])
{
index[str[i]]=true;
uniq=uniq+str[i];
}
i++;
}
return uniq;
}
We can remove the duplicate or similar elements in string using for loop and extracting string methods like slice, substring, substr
Example if you want to remove duplicate elements such as aababbafabbb:
var data = document.getElementById("id").value
for(var i = 0; i < data.length; i++)
{
for(var j = i + 1; j < data.length; j++)
{
if(data.charAt(i)==data.charAt(j))
{
data = data.substring(0, j) + data.substring(j + 1);
j = j - 1;
console.log(data);
}
}
}
Please let me know if you want some additional information.

Javascript / jQuery Find Text duplicates

How would you approach finding duplicates in a text document. Duplicates can be a set of consecutive words or sentence. Sentence not necessary ending with a dot. let's say page contains a document of 200 lines of which 2 sentences are identical, we want to highlight those 2 sentences as duplicates when "check duplicate button" is clicked.
Interesting question — here's an idea on how I'd do that probably: http://jsfiddle.net/SaQAs/1/ — Not anyhow optimized!
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = [],
sentences = text.split('.'),
sortedSentences = sentences.slice(0).sort(),
duplicateSentences = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var i=0; i<sortedSentences.length-1; i++) {
if (sortedSentences[i+1] == sortedSentences[i]) {
duplicateSentences.push(sortedSentences[i]);
}
}
duplicateSentences = $.unique(duplicateSentences);
$('a.words').click(function(){
var highlighted = $.map(words, function(word){
if ($.inArray(word, duplicateWords) > -1)
return '<span class="duplicate">' + word + '</span>';
else return word;
});
$('p').html(highlighted.join(' '));
return false;
});
$('a.sentences').click(function(){
var highlighted = $.map(sentences, function(sentence){
if ($.inArray(sentence, duplicateSentences) > -1)
return '<span class="duplicate">' + sentence + '</span>';
else return sentence;
});
$('p').html(highlighted.join('.'));
return false;
});
Update 1
This one finds sequences of identical words: http://jsfiddle.net/YQdk5/1/ From here it shouldn't be hard to e.g. ignore any kind of punctuation at the end of fragments when comparing — you'd would just have to write your own version of an inArray method.
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = []
highlighted = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var j=0, m=[]; j<words.length; j++) {
m.push($.inArray(words[j], duplicateWords) > -1);
if (!m[j] && m[j-1])
highlighted.push('</span>');
else if (m[j] && !m[j-1])
highlighted.push('<span class="duplicate">');
highlighted.push(words[j]);
}
$('p').html(highlighted.join(' '));
Update 2
My regex-fu is weak, but this (pretty messy!) version seems to work okay: http://jsfiddle.net/YQdk5/2/ — I'm pretty sure that there might be a better way of doing this, but for now I've got to leave it alone! :D — Good luck!
Update 3
Thinking about it, I don't think that the code from the previous update is any good. That's why I've removed it. You can still find it here: http://jsfiddle.net/YQdk5/2/
The main point is to use a regex to match words, something along the lines of:
/^word(\.?)$/
Here is the solution that uses suffix tree:
function SuffixTree(text) {
var regex = /\b\w+/g;
var words = text.match(regex);
var wave = [];
var words_l = words.length;
if (words_l == 0) return false;
this.tree = this.node("", false);
for (var i = 0; i < words_l; ++i) {
var x = words[i] + "_";
wave.push(this.tree);
var wave_l = wave.length;
for (var j = 0; j < wave_l; ++j) {
var y = wave[j];
if (typeof y[x] != 'undefined') y[x].count++;
else y[x] = this.node(words[i], y);
wave[j] = y[x];
}
}
}
SuffixTree.prototype = {
dummy: {count: 1},
node: function(word, num, parent) {
return {
count: 1,
word: word,
parent: parent
};
},
duplicates: function(h) {
this.dups = [];
this.bypass(this.tree, h, 0);
var l = this.dups.length;
this.dups.sort(function(d1, d2) { return d1.depth > d2.depth ? 1 : -1; });
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
this.dups[i] = { s: " " + this.sentence(d.a) + " ", depth: d.depth, count: d.a.count };
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
console.log(i, d.s);
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
var fl = true;
for (var j = i + 1; j < l; ++j) {
if (this.dups[j].s.indexOf(d.s) != -1) fl = false;
}
if (fl) h(d.s.substr(1, d.s.length - 2), d.count);
}
},
bypass: function(a, h, depth) {
if (a.constructor != Object) return;
var fl = true;
for (var i in a) {
if (i == 'parent') continue;
var b = a[i];
if (b.count == a.count) fl = false;
this.bypass(b, h, depth + 1);
}
if (fl && a.count > 1) {
this.dups.push({ a: a, depth: depth });
}
},
sentence: function(a) {
var s = a.word;
while (a = a.parent) {
s = a.word + " " + s;
}
return s;
}
};
var text = "This is a text with some duplicates: words, sentences of different length. For example here is a duplicate word. This sentence has some duplicates. But not all of us can find clones.";
var T = new SuffixTree(text);
var h = function(s, c) {
document.write(s + "[" + c + "]<br/>");
};
T.duplicates(h);
1) Split the input text to the array of words.
2) Build the suffix tree.
3) Find the longest suffixes of the tree.
4) Remove sentences that are contained in others (i.e. remove "is" that is a part of "this is a").
You can change the regular expression to take into account html tags.
I hope this helps you.
P.S. h is the callback for found duplicates.
You're javascript contains references to a javascript library named jQuery.
You are not including this in your HTML, and thus it will fail.
You can include it via jquery cdn
And todays tip: Use the developer tools in your browser. In the console you can see what parts of javascript that fails.

Categories