var n = "reversestrings", k=3;
want to reverse string in chunk of 'k',
Answer would be : ver sre tse nir gs;
if Last word less then 'k' then don't need to reverse.
I am using below code but not getting expected answer.
var n = 'stringreverses', k = 3, str = '', s = '';
var c = 0;
for( var i=0; i<n.length; i++ ){
if( c<k ){
c++
str += n[i];
s=str.split('').reverse().join('');
}
else{
console.log("-" + s);
c=0;
}
}
First we need to split input to chunks with the same size (the last one can be smaller), next we reverse every chunk and concatenate at the end.
var input = "123456",
chunks = input.match(new RegExp('.{1,' + k + '}', 'g'));
var result = chunks.map(function(chunk) {
return chunk.split('').reverse().join('');
}).join('');
Homework or not, here is a good use case to start with strings.
Here is a C approach but you have more in Javascript.
In fact you want to reverse by chunk so deal with chunk. How to create a chunk of string ? a way is to use slice https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/String/slice
var str = "abcdef";
console.log(str.slice(0,2));
So you have an easy way to slice your string into chunk.
Then you have to iterate over it, there is no good way of doing it actually there is dozen but you could do it from backward to the beginning of the string:
for( i=str.length ; i>0 ; i -= k ){
// i will go from the end of your str to
// the beginning by step of k(=3) and you can use i - k and i
// to slice your string (as we see it before)
// you have to take care of the last part that could be less than
// 3
}
then you have to format the result, the most easy way to do that is to concatenate results into a string here it is :
var strRes = "";
strRes += "res 1";
strRes += "res 2";
console.log(strRes); // should screen "res 1res 2"
As it is homework, I wont make a jsfiddle, you have here all the pieces and it's up to you to build the puzzle.
hope that help
$(function() {
var n = 'reversestrings', k = 3;
var revString = "";
for (var i =0; i<=n.length; i++) {
if (i%k == 0) {
l = parseInt(k) + parseInt(i);
var strChunk = n.substring(i,l);
var innerStr = "";
for (var j =0; j<strChunk.length; j++) {
var opp = parseInt(strChunk.length) - parseInt(j) - 1;
innerStr = innerStr + strChunk.charAt(opp);
}
revString = revString + " "+innerStr;
}
}
alert(revString);
});
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.0/jquery.min.js"></script>
My take on this. Pure JS without even built-in functions:
function reverseSubStr(str) {
var right = str.length - 1, reversedSubStr = '';
while(right >= 0) {
reversedSubStr += str[right];
right--;
}
return reversedSubStr;
}
function reverseStr(str) {
var initialStr = str, newstr = '', k = 3, substr = ''
for(var i = 1; i <= initialStr.length; i++) {
substr += initialStr[i - 1]; // form a substring
if(i % k == 0) { // once there are 3 symbols - reverse the substring
newstr += reverseSubStr(substr) + " "; // ... and add space
substr = ''; // then clean temp var
}
}
return newstr += substr; // add the remainder of the string - 'gs' - and return the result
}
var str = 'reversestrings';
console.log(reverseStr(str)); //ver sre tse nir gs
I like #Jozef 's approch but here is mine as well for those who are not much into Regex -
//Taking care of Tail Calling
function reverStrInChunk(str, k, r=''){
let index=0, revStr,
res = str.substring(index, k), remStr;
revStr = res.split("").reverse().join("");
remStr = str.substring(k, str.length);
r = r + revStr;
if(remStr.length>k){
return reverStrInChunk(remStr,k, r+" ");
}
else if(remStr.length<k) {
return r +" "+remStr;
}else{
return r +" "+ remStr.split("").reverse().join("");
}
}
var aStr = reverStrInChunk('reversestrings',3);//ver sre tse nir gs
console.log(aStr);
How can I make individual characters within a string repeat a given amount of times?
That is, how do I turn "XyZ" into "XXXyyyZZZ"?
Try this:
var foo = 'bar';
function makeString(str, repeat) {
var str = Array.prototype.map.call(str, function(character) {
var nascentStr = '';
while (nascentStr.length < repeat) {
nascentStr += character;
}
return nascentStr;
}).join('');
return str;
}
alert(makeString(foo, 3));
You'll need to use a combination of a few functions. First you'll need to split the string into individual characters:
var charArray = "XyZ".split('');
Once you have it split up, you can use a combination of the .map function and a nifty little trick of javascript Array.
var someMultiplier = 5;
var duplicatedArray = charArray.map(function(char) {
return Array(someMultiplier).join(char);
});
At that point, you have an array of strings that have the duplicate letters, and you can combine them back with .join
var dupString = duplicatedArray.join('');
dupString === "XXXXXyyyyyZZZZZ
Sounds straight forward. You can run this in your browser's console:
var my = 'XyZ';
function repeat(str, times) {
var res = '';
for (var i in str) {
var char = str[i];
for (var j=0; j < times; j++) {
res += char;
}
}
return res;
}
var his = repeat(my, 3);
console.log(his);
you have not mentioned what will happen if input will be like xyxzy. Assuming it will be xxxyyxxxzzzyyy
// function takes input string & num of repitation
function buildString(input, num) {
var _currentChar = ""; // Final output string
for (var m = 0; m < input.length; m++) {
//call another function which will return XXX or yyy or zzz & then finally concat it
_currentChar += _repeatString((input.charAt(m)), num);
}
}
// It will return XXX or yyy or ZZZ
// takes individual char as input and num of times to repeat
function _repeatString(char, num) {
var _r = "";
for (var o = 0; o < num; o++) {
_r += char;
}
return _r
}
buildString('XyZ', 3)
jsfiddle for Example
function repeatStringNumTimes(str, num) {
let valueCopy = str
if (num > 0) {
for (var i = 0; i < num - 1; i++) {
valueCopy = valueCopy.concat(str)
}
} else {
valueCopy = ""
}
return valueCopy;
}
repeatStringNumTimes("abc", 3);
These days can be done a lot easier:
const repeater = (str, n) => [...str].map(c => c.repeat(n)).join('');
alert(repeater('XyZ', 3));
Seeing all the people talking about longest substring in alphabetical order in Python, I have decided to try it in JS.
The function should look for the longest substring inside a given string, where letters are ordered alphabetically.
Here is what I have:
var s = 'azcbobobegghakl'
function substringChecker(s) {
var longestSub = "";
for (var i = 0; i < s.length; i++) {
var count = 0;
var currSub = "";
while((i+count)<=s.length){
var curr = i+count;
var next = curr+1;
var prev = curr-1;
if(curr !== s.length-1) {
if(s[curr] <= s[next]){
currSub += s[curr]
} else {
break;
}
} else {
if(s[curr]>s[prev]) {
currSub += s[curr];
}
}
count++;
}
if(currSub.length >= longestSub.length) {
longestSub = currSub;
}
};
return longestSub;
}
var result = substringChecker(s);;
console.log(result);
The funny thing it works great for all test cases I can come up with, but this one. The result should be "beggh" but it is "begg" instead. Why is the h not showing up, what am I missing?
The algorithm can be linear, I think you are overcomplicating it placing loops inside loops.
I would use something like
function substringChecker(s) {
var longestSub = "",
length = 0,
start = 0,
prev = s[0];
for (var i = 1; i <= s.length; ++i) {
if(i == s.length || s[i] < prev) {
if(length < i-start) {
longestSub = s.substring(start, i);
length = i-start;
}
start = i;
}
prev = s[i];
};
return longestSub;
}
document.write(substringChecker('azcbobobegghakl'));
first I made list of A-z
then check each letter and compare it with the next letter and save it in subString and...
function longest(str) {
//handle the case str is just one letter
if (str.length === 1) return str;
// create a list of alphabet A to Z
const alphabets = [...Array(26)].map(_ => String.fromCharCode(i++), (i = 97));
let longString = "";
let subSting = "";
for (let x = 0; x < str.length; x++) {
let char = str.charAt(x);
const nextChar = str.charAt(x + 1);
let charIndex = alphabets.findIndex(alphabet => alphabet === char);
let nextCharIndex = alphabets.findIndex(alphabet => alphabet === nextChar);
if (nextCharIndex >= charIndex) {
subSting = subSting + nextChar;
} else {
if (!subSting.length) {
subSting = subSting + char;
}
longString = subSting.length > longString.length ? subSting : longString;
subSting = "";
}
}
return longString;
}
console.log(longest("zyba"));
I have to make a function in JavaScript that removes all duplicated letters in a string. So far I've been able to do this: If I have the word "anaconda" it shows me as a result "anaconda" when it should show "cod". Here is my code:
function find_unique_characters( string ){
var unique='';
for(var i=0; i<string.length; i++){
if(unique.indexOf(string[i])==-1){
unique += string[i];
}
}
return unique;
}
console.log(find_unique_characters('baraban'));
We can also now clean things up using filter method:
function removeDuplicateCharacters(string) {
return string
.split('')
.filter(function(item, pos, self) {
return self.indexOf(item) == pos;
})
.join('');
}
console.log(removeDuplicateCharacters('baraban'));
Working example:
function find_unique_characters(str) {
var unique = '';
for (var i = 0; i < str.length; i++) {
if (str.lastIndexOf(str[i]) == str.indexOf(str[i])) {
unique += str[i];
}
}
return unique;
}
console.log(find_unique_characters('baraban'));
console.log(find_unique_characters('anaconda'));
If you only want to return characters that appear occur once in a string, check if their last occurrence is at the same position as their first occurrence.
Your code was returning all characters in the string at least once, instead of only returning characters that occur no more than once. but obviously you know that already, otherwise there wouldn't be a question ;-)
Just wanted to add my solution for fun:
function removeDoubles(string) {
var mapping = {};
var newString = '';
for (var i = 0; i < string.length; i++) {
if (!(string[i] in mapping)) {
newString += string[i];
mapping[string[i]] = true;
}
}
return newString;
}
With lodash:
_.uniq('baraban').join(''); // returns 'barn'
You can put character as parameter which want to remove as unique like this
function find_unique_characters(str, char){
return [...new Set(str.split(char))].join(char);
}
function find_unique_characters(str, char){
return [...new Set(str.split(char))].join(char);
}
let result = find_unique_characters("aaaha ok yet?", "a");
console.log(result);
//One simple way to remove redundecy of Char in String
var char = "aaavsvvssff"; //Input string
var rst=char.charAt(0);
for(var i=1;i<char.length;i++){
var isExist = rst.search(char.charAt(i));
isExist >=0 ?0:(rst += char.charAt(i) );
}
console.log(JSON.stringify(rst)); //output string : avsf
For strings (in one line)
removeDuplicatesStr = str => [...new Set(str)].join('');
For arrays (in one line)
removeDuplicatesArr = arr => [...new Set(arr)]
Using Set:
removeDuplicates = str => [...new Set(str)].join('');
Thanks to David comment below.
DEMO
function find_unique_characters( string ){
unique=[];
while(string.length>0){
var char = string.charAt(0);
var re = new RegExp(char,"g");
if (string.match(re).length===1) unique.push(char);
string=string.replace(re,"");
}
return unique.join("");
}
console.log(find_unique_characters('baraban')); // rn
console.log(find_unique_characters('anaconda')); //cod
var str = 'anaconda'.split('');
var rmDup = str.filter(function(val, i, str){
return str.lastIndexOf(val) === str.indexOf(val);
});
console.log(rmDup); //prints ["c", "o", "d"]
Please verify here: https://jsfiddle.net/jmgy8eg9/1/
Using Set() and destructuring twice is shorter:
const str = 'aaaaaaaabbbbbbbbbbbbbcdeeeeefggggg';
const unique = [...new Set([...str])].join('');
console.log(unique);
Yet another way to remove all letters that appear more than once:
function find_unique_characters( string ) {
var mapping = {};
for(var i = 0; i < string.length; i++) {
var letter = string[i].toString();
mapping[letter] = mapping[letter] + 1 || 1;
}
var unique = '';
for (var letter in mapping) {
if (mapping[letter] === 1)
unique += letter;
}
return unique;
}
Live test case.
Explanation: you loop once over all the characters in the string, mapping each character to the amount of times it occurred in the string. Then you iterate over the items (letters that appeared in the string) and pick only those which appeared only once.
function removeDup(str) {
var arOut = [];
for (var i=0; i < str.length; i++) {
var c = str.charAt(i);
if (c === '_') continue;
if (str.indexOf(c, i+1) === -1) {
arOut.push(c);
}
else {
var rx = new RegExp(c, "g");
str = str.replace(rx, '_');
}
}
return arOut.join('');
}
I have FF/Chrome, on which this works:
var h={};
"anaconda".split("").
map(function(c){h[c] |= 0; h[c]++; return c}).
filter(function(c){return h[c] == 1}).
join("")
Which you can reuse if you write a function like:
function nonRepeaters(s) {
var h={};
return s.split("").
map(function(c){h[c] |= 0; h[c]++; return c}).
filter(function(c){return h[c] == 1}).
join("");
}
For older browsers that lack map, filter etc, I'm guessing that it could be emulated by jQuery or prototype...
This code worked for me on removing duplicate(repeated) characters from a string (even if its words separated by space)
Link: Working Sample JSFiddle
/* This assumes you have trim the string and checked if it empty */
function RemoveDuplicateChars(str) {
var curr_index = 0;
var curr_char;
var strSplit;
var found_first;
while (curr_char != '') {
curr_char = str.charAt(curr_index);
/* Ignore spaces */
if (curr_char == ' ') {
curr_index++;
continue;
}
strSplit = str.split('');
found_first = false;
for (var i=0;i<strSplit.length;i++) {
if(str.charAt(i) == curr_char && !found_first)
found_first = true;
else if (str.charAt(i) == curr_char && found_first) {
/* Remove it from the string */
str = setCharAt(str,i,'');
}
}
curr_index++;
}
return str;
}
function setCharAt(str,index,chr) {
if(index > str.length-1) return str;
return str.substr(0,index) + chr + str.substr(index+1);
}
Here's what I used - haven't tested it for spaces or special characters, but should work fine for pure strings:
function uniquereduce(instring){
outstring = ''
instringarray = instring.split('')
used = {}
for (var i = 0; i < instringarray.length; i++) {
if(!used[instringarray[i]]){
used[instringarray[i]] = true
outstring += instringarray[i]
}
}
return outstring
}
Just came across a similar issue (finding the duplicates). Essentially, use a hash to keep track of the character occurrence counts, and build a new string with the "one-hit wonders":
function oneHitWonders(input) {
var a = input.split('');
var l = a.length;
var i = 0;
var h = {};
var r = "";
while (i < l) {
h[a[i]] = (h[a[i]] || 0) + 1;
i += 1;
}
for (var c in h) {
if (h[c] === 1) {
r += c;
}
}
return r;
}
Usage:
var a = "anaconda";
var b = oneHitWonders(a); // b === "cod"
Try this code, it works :)
var str="anaconda";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 && str.lastIndexOf(obj,i-1)==-1){
return obj;
}
}
).join("");
//output: "cod"
This should work using Regex ;
NOTE: Actually, i dont know how this regex works ,but i knew its 'shorthand' ,
so,i would have Explain to you better about meaning of this /(.+)(?=.*?\1)/g;.
this regex only return to me the duplicated character in an array ,so i looped through it to got the length of the repeated characters .but this does not work for a special characters like "#" "_" "-", but its give you expected result ; including those special characters if any
function removeDuplicates(str){
var REPEATED_CHARS_REGEX = /(.+)(?=.*?\1)/g;
var res = str.match(REPEATED_CHARS_REGEX);
var word = res.slice(0,1);
var raw = res.slice(1);
var together = new String (word+raw);
var fer = together.toString();
var length = fer.length;
// my sorted duplicate;
var result = '';
for(var i = 0; i < str.length; i++) {
if(result.indexOf(str[i]) < 0) {
result += str[i];
}
}
return {uniques: result,duplicates: length};
} removeDuplicates('anaconda')
The regular expression /([a-zA-Z])\1+$/ is looking for:
([a-zA-Z]]) - A letter which it captures in the first group; then
\1+ - immediately following it one or more copies of that letter; then
$ - the end of the string.
Changing it to /([a-zA-Z]).*?\1/ instead searches for:
([a-zA-Z]) - A letter which it captures in the first group; then
.*? - zero or more characters (the ? denotes as few as possible); until
\1 - it finds a repeat of the first matched character.
I have 3 loopless, one-line approaches to this.
Approach 1 - removes duplicates, and preserves original character order:
var str = "anaconda";
var newstr = str.replace(new RegExp("[^"+str.split("").sort().join("").replace(/(.)\1+/g, "").replace(/[.?*+^$[\]\\(){}|-]/g, "\\$&")+"]","g"),"");
//cod
Approach 2 - removes duplicates but does NOT preserve character order, but may be faster than Approach 1 because it uses less Regular Expressions:
var str = "anaconda";
var newstr = str.split("").sort().join("").replace(/(.)\1+/g, "");
//cdo
Approach 3 - removes duplicates, but keeps the unique values (also does not preserve character order):
var str = "anaconda";
var newstr = str.split("").sort().join("").replace(/(.)(?=.*\1)/g, "");
//acdno
function removeduplicate(str) {
let map = new Map();
// n
for (let i = 0; i < str.length; i++) {
if (map.has(str[i])) {
map.set(str[i], map.get(str[i]) + 1);
} else {
map.set(str[i], 1);
}
}
let res = '';
for (let i = 0; i < str.length; i++) {
if (map.get(str[i]) === 1) {
res += str[i];
}
}
// o (2n) - > O(n)
// space o(n)
return res;
}
If you want your function to just return you a unique set of characters in your argument, this piece of code might come in handy.
Here, you can also check for non-unique values which are being recorded in 'nonUnique' titled array:
function remDups(str){
if(!str.length)
return '';
var obj = {};
var unique = [];
var notUnique = [];
for(var i = 0; i < str.length; i++){
obj[str[i]] = (obj[str[i]] || 0) + 1;
}
Object.keys(obj).filter(function(el,ind){
if(obj[el] === 1){
unique+=el;
}
else if(obj[el] > 1){
notUnique+=el;
}
});
return unique;
}
console.log(remDups('anaconda')); //prints 'cod'
If you want to return the set of characters with their just one-time occurrences in the passed string, following piece of code might come in handy:
function remDups(str){
if(!str.length)
return '';
var s = str.split('');
var obj = {};
for(var i = 0; i < s.length; i++){
obj[s[i]] = (obj[s[i]] || 0) + 1;
}
return Object.keys(obj).join('');
}
console.log(remDups('anaconda')); //prints 'ancod'
function removeDuplicates(str) {
var result = "";
var freq = {};
for(i=0;i<str.length;i++){
let char = str[i];
if(freq[char]) {
freq[char]++;
} else {
freq[char] =1
result +=char;
}
}
return result;
}
console.log(("anaconda").split('').sort().join('').replace(/(.)\1+/g, ""));
By this, you can do it in one line.
output: 'cdo'
function removeDuplicates(string){
return string.split('').filter((item, pos, self)=> self.indexOf(item) == pos).join('');
}
the filter will remove all characters has seen before using the index of item and position of the current element
Method 1 : one Simple way with just includes JS- function
var data = 'sssssddddddddddfffffff';
var ary = [];
var item = '';
for (const index in data) {
if (!ary.includes(data[index])) {
ary[index] = data[index];
item += data[index];
}
}
console.log(item);
Method 2 : Yes we can make this possible without using JavaScript function :
var name = 'sssssddddddddddfffffff';
let i = 0;
let newarry = [];
for (let singlestr of name) {
newarry[i] = singlestr;
i++;
}
// now we have new Array and length of string
length = i;
function getLocation(recArray, item, arrayLength) {
firstLaction = -1;
for (let i = 0; i < arrayLength; i++) {
if (recArray[i] === item) {
firstLaction = i;
break;
}
}
return firstLaction;
}
let finalString = '';
for (let b = 0; b < length; b++) {
const result = getLocation(newarry, newarry[b], length);
if (result === b) {
finalString += newarry[b];
}
}
console.log(finalString); // sdf
// Try this way
const str = 'anaconda';
const printUniqueChar = str => {
const strArr = str.split("");
const uniqueArray = strArr.filter(el => {
return strArr.indexOf(el) === strArr.lastIndexOf(el);
});
return uniqueArray.join("");
};
console.log(printUniqueChar(str)); // output-> cod
function RemDuplchar(str)
{
var index={},uniq='',i=0;
while(i<str.length)
{
if (!index[str[i]])
{
index[str[i]]=true;
uniq=uniq+str[i];
}
i++;
}
return uniq;
}
We can remove the duplicate or similar elements in string using for loop and extracting string methods like slice, substring, substr
Example if you want to remove duplicate elements such as aababbafabbb:
var data = document.getElementById("id").value
for(var i = 0; i < data.length; i++)
{
for(var j = i + 1; j < data.length; j++)
{
if(data.charAt(i)==data.charAt(j))
{
data = data.substring(0, j) + data.substring(j + 1);
j = j - 1;
console.log(data);
}
}
}
Please let me know if you want some additional information.
How would you approach finding duplicates in a text document. Duplicates can be a set of consecutive words or sentence. Sentence not necessary ending with a dot. let's say page contains a document of 200 lines of which 2 sentences are identical, we want to highlight those 2 sentences as duplicates when "check duplicate button" is clicked.
Interesting question — here's an idea on how I'd do that probably: http://jsfiddle.net/SaQAs/1/ — Not anyhow optimized!
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = [],
sentences = text.split('.'),
sortedSentences = sentences.slice(0).sort(),
duplicateSentences = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var i=0; i<sortedSentences.length-1; i++) {
if (sortedSentences[i+1] == sortedSentences[i]) {
duplicateSentences.push(sortedSentences[i]);
}
}
duplicateSentences = $.unique(duplicateSentences);
$('a.words').click(function(){
var highlighted = $.map(words, function(word){
if ($.inArray(word, duplicateWords) > -1)
return '<span class="duplicate">' + word + '</span>';
else return word;
});
$('p').html(highlighted.join(' '));
return false;
});
$('a.sentences').click(function(){
var highlighted = $.map(sentences, function(sentence){
if ($.inArray(sentence, duplicateSentences) > -1)
return '<span class="duplicate">' + sentence + '</span>';
else return sentence;
});
$('p').html(highlighted.join('.'));
return false;
});
Update 1
This one finds sequences of identical words: http://jsfiddle.net/YQdk5/1/ From here it shouldn't be hard to e.g. ignore any kind of punctuation at the end of fragments when comparing — you'd would just have to write your own version of an inArray method.
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = []
highlighted = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var j=0, m=[]; j<words.length; j++) {
m.push($.inArray(words[j], duplicateWords) > -1);
if (!m[j] && m[j-1])
highlighted.push('</span>');
else if (m[j] && !m[j-1])
highlighted.push('<span class="duplicate">');
highlighted.push(words[j]);
}
$('p').html(highlighted.join(' '));
Update 2
My regex-fu is weak, but this (pretty messy!) version seems to work okay: http://jsfiddle.net/YQdk5/2/ — I'm pretty sure that there might be a better way of doing this, but for now I've got to leave it alone! :D — Good luck!
Update 3
Thinking about it, I don't think that the code from the previous update is any good. That's why I've removed it. You can still find it here: http://jsfiddle.net/YQdk5/2/
The main point is to use a regex to match words, something along the lines of:
/^word(\.?)$/
Here is the solution that uses suffix tree:
function SuffixTree(text) {
var regex = /\b\w+/g;
var words = text.match(regex);
var wave = [];
var words_l = words.length;
if (words_l == 0) return false;
this.tree = this.node("", false);
for (var i = 0; i < words_l; ++i) {
var x = words[i] + "_";
wave.push(this.tree);
var wave_l = wave.length;
for (var j = 0; j < wave_l; ++j) {
var y = wave[j];
if (typeof y[x] != 'undefined') y[x].count++;
else y[x] = this.node(words[i], y);
wave[j] = y[x];
}
}
}
SuffixTree.prototype = {
dummy: {count: 1},
node: function(word, num, parent) {
return {
count: 1,
word: word,
parent: parent
};
},
duplicates: function(h) {
this.dups = [];
this.bypass(this.tree, h, 0);
var l = this.dups.length;
this.dups.sort(function(d1, d2) { return d1.depth > d2.depth ? 1 : -1; });
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
this.dups[i] = { s: " " + this.sentence(d.a) + " ", depth: d.depth, count: d.a.count };
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
console.log(i, d.s);
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
var fl = true;
for (var j = i + 1; j < l; ++j) {
if (this.dups[j].s.indexOf(d.s) != -1) fl = false;
}
if (fl) h(d.s.substr(1, d.s.length - 2), d.count);
}
},
bypass: function(a, h, depth) {
if (a.constructor != Object) return;
var fl = true;
for (var i in a) {
if (i == 'parent') continue;
var b = a[i];
if (b.count == a.count) fl = false;
this.bypass(b, h, depth + 1);
}
if (fl && a.count > 1) {
this.dups.push({ a: a, depth: depth });
}
},
sentence: function(a) {
var s = a.word;
while (a = a.parent) {
s = a.word + " " + s;
}
return s;
}
};
var text = "This is a text with some duplicates: words, sentences of different length. For example here is a duplicate word. This sentence has some duplicates. But not all of us can find clones.";
var T = new SuffixTree(text);
var h = function(s, c) {
document.write(s + "[" + c + "]<br/>");
};
T.duplicates(h);
1) Split the input text to the array of words.
2) Build the suffix tree.
3) Find the longest suffixes of the tree.
4) Remove sentences that are contained in others (i.e. remove "is" that is a part of "this is a").
You can change the regular expression to take into account html tags.
I hope this helps you.
P.S. h is the callback for found duplicates.
You're javascript contains references to a javascript library named jQuery.
You are not including this in your HTML, and thus it will fail.
You can include it via jquery cdn
And todays tip: Use the developer tools in your browser. In the console you can see what parts of javascript that fails.