This is my code:
function pars(str) {
var p = [str.split("(").length - 1, str.split(")").length - 1];
if (p[0] > p[1]) {
for (var i = 0; i < (p[0] - p[1]); i++) {
str += ")";
}
}
return str;
}
It adds parentheses in the end of the string if it's missing.
Examples:
"((asd)s" -> "((asd)s)"
"(((ss)123" -> "(((ss)123))"
How can I make this work for beginning parentheses aswell?
Like:
"))" -> "(())"
")123))" -> "((()123))"
Here is a simple stack-based approach. The full JSFiddle is below as well as list of confirmed test cases.
function pars(s) {
var missedOpen = 0, stack = new Array();
for (i = 0; i < s.length; i++) {
if (s[i] == '(') {
stack.push(s[i]);
} else if (s[i] == ')') {
if (!stack.pop())
missedOpen++;
}
}
return Array(missedOpen + 1).join('(') + s + Array(stack.length + 1).join(')');
}
Confirmed Test cases:
// Target: Expected
var cases = {
'()': '()',
')(': '()()',
'(': '()',
')': '()',
'This)(is))a((test)': '((This)(is))a((test))',
'(A)': '(A)',
')A(': '()A()'
};
See the complete JSFiddle is here.
As noted by a comment, here's a version without the array at all. This should be the most efficient method. All the test cases passed.
function pars(s) {
var missedOpen = 0, missedClosed = 0;
for (i = 0; i < s.length; i++) {
if (s[i] == '(') {
missedClosed++;
} else if (s[i] == ')') {
if (missedClosed > 0)
missedClosed--;
else
missedOpen++;
}
}
return Array(missedOpen + 1).join('(') + s + Array(missedClosed + 1).join(')');
}
You need both the number of unmatched beginning parenthesis and the number of unmatched end parenthesis. Here is a rough solution:
function pars(str) {
var p = 0;
var minp = 0;
for (var i = 0; i < str.length; i++) {
if (str[i] == "(") p++;
if (str[i] == ")") {
p--;
if (p<minp) minp = p;
}
}
for (i = 0; i > minp; i--) {
str = "(" + str;
}
p = p - minp; // If we added any starting parenthesis, we need to end those as well.
for (i = 0; i < p; i++) {
str = str + ")";
}
return str;
}
This one seems to work:
function pars(str){
var pars = [].reduce.call(str, function(acc, letter){
if(letter === '(') { acc.right++;}
else if(letter === ')') {
if(acc.right) {acc.right--;}
else {acc.left++;}//no starting one
}
return acc;
}, {left: 0, right: 0}),
left = pars.left,
right = pars.right;
if(left) { str = new Array(left+1).join('(') + str;}
if(right) { str += new Array(right+1).join(')');}
return str
}
var str = ')))(((fdfd)fd)('
$("#out").html(str + " - " + pars(str))
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="out"/>
This is an extremely dirty solution (which works in Firefox for the time being due to repeat function):
function pars(str) {
var t = str;
var n;
while ((n = t.replace(/\([^()]*\)/g, "")) != t) {
t = n;
}
var open = t.match(/\)/g);
var close = t.match(/\(/g);
return "(".repeat(open.length) + str + ")".repeat(close.length);
}
Basically, it matches all pairs of (), then count the number of ( and ), and append ) and ( accordingly.
A clean solution should use a counter to count the number of opening unmatched parentheses. It will discard and count the number of unmatched ). For the number of unmatched (, do as per normal. It will be a one-pass solution, instead of multi-pass solution like the one above.
You can use a variation on the famous stack based bracket matching algorithm here.
The general idea is that you scan the code and push an opening parenthesis onto the stack when you see one, then when you see a closing one you pop the top value from the stack and continue. This will ensure that you have the correct balance.
(123())
// ( - push -> ['(']
// 1 - nothing
// 2 - nothing
// 3 - nothing
// ( - push -> ['(', '(']
// ) - pop -> ['(']
// ) - pop -> []
However, we want to bend the rules slightly.
If we push a closing paren onto the stack, and the stack is empty: we need to add an opening paren to the start of the string
If we arrive at the end of the string and there are still open parens on the stack, then we need to close them.
So the code would look something like this:
function parse(str) {
var stack = []
out = str,
idx = 0,
chr = '';
for(idx = 0; idx < str.length; idx++) {
chr = str[idx];
if(chr === '(') {
stack.push(chr);
}
else if(chr === ')') {
if(stack.length > 0) {
stack.pop();
} else {
out = '(' + out;
}
}
}
for(idx = 0; idx < stack.length; idx++) {
out = out + ')';
}
return out;
}
Bonus: Because of the iterative stack based nature of this algorithm, it will generally be faster than RegEx alternatives.
Just scan the string, counting parens. +1 for (, -1 for ). If the number goes negative, you're missing a ( at the beginning. If, when you are finished, the number is positive, then that's how many )s you need to add at the end.
var addmissingparens = function(s){
var n = 0;
Array.prototype.forEach.call(s, function(c){
if(c === '('){
n += 1;
}
if(c === ')'){
if(n === 0){
s = '(' + s;
}else{
n -= 1;
}
}
});
for(; n>0; n-=1){
s += ')';
}
return s;
};
Related
Can't seem to figure out why I keep printing out extra 0's.
As of now, if the value was 730, this is what shows up:
Expected: '700 + 30', instead got: '700 + 30 + 0'
Criteria:
"You will be given a number and you will need to return it as a string in Expanded Form. For example:
12 Should return 10 + 2
42 Should return 40 + 2
70304 Should return 70000 + 300 + 4
NOTE: All numbers will be whole numbers greater than 0."
function expandedForm(num) {
var i,
position,
numArr = Array.from(num.toString()).map(Number),
numArrLen = numArr.length,
result = '';
if(num < 10){
return num;
} else {
for(i = 0; i < numArrLen; i++){
position = numArrLen-1-i;
if( i === numArrLen-1 ){
result += numArr[i] * Math.pow(10, position);
console.log('done',result);
} else {
if(numArr[i] !== 0){
result += numArr[i] * Math.pow(10, position) + " + ";
console.log('keep going',result);
} else {
continue;
console.log('zero', result);
}
}
}
return result;
}
}
Well it goes into the first if check which does not account for zero so you need to check if it is zero in that check.
if (i === numArrLen-1 && numArr[i]!==0)
And the issue you will have after that is you will have a trailing +.
What I would do is instead of adding the string, I would push to an array and join
var result = []
result.push(30)
result.push(2)
console.log(result.join(" + ")
and you can use array methods to actually solve it.
(102030).toString().split("").map((n,i,a) => n*Math.pow(10, a.length-i-1)).filter(n => n>0).join(" + ")
This is how I solved the solution.
function expandedForm(num) {
// Convert num to a string array
let numStringArray = Array.from(String(num));
// Get length of string array
let len = numStringArray.length;
let result = '';
// For each digit in array
numStringArray.map( (n,index) => {
// Perform only if n > 0
if( n>0 ) {
// Add plus sign if result is not empty (for the next digits)
if( result ) { result += ' + '; };
// Pad zeros the right limited to array length minus current index
result += new String(n).padEnd(len-index,'0');
}
});
return result;
}
This was my solution, could have been refactored better but it works.
function expandedForm(num) {
let myStr = num.toString().split(''), //convert number to string array
strLength = myStr.length,
arr = new Array();
if (strLength === 1) { /*If array length is one*/
return num;
}
for (let i = 0; i < strLength; i++) { //loop
if (myStr[i] === 0) { //if number starts with 0
arr.push('0')
}
if (i + 1 === strLength) { //if array is close to end
arr.push(myStr[i])
}
// add zero to number by length difference in array
// add number to
if (myStr[i] !== 0 && i + 1 !== strLength) {
for (let x = 1; x < (strLength - myStr.indexOf(myStr[i])); x++) {
myStr[i] += '0'
}
arr.push(myStr[i]);
}
}
return arr.filter((obj) => obj.match(/[1-9]/)) //remove items with just 0s
.map(obj => obj += ' + ') // add '+'
.reduce((a, b) => a + b).slice(0, -2); //remove whitespace
}
function expandedForm(num) {
const arr = num
.toString()
.split('');
for (let i = 0; i < arr.length - 1; ++i) {
if (arr[i] > 0) {
for (let j = i; j < arr.length - 1; ++j) {
arr[i] += '0';
}
}
}
return arr
.join()
.replace(new RegExp(",0", "g"), "")
.replace(new RegExp(",", "g"), " + ");
}
My solution is fairly similar, uses a bit of RegExp at the end in order to strip out the commas and lone zeroes however
right now this stops when it reaches a character that is different between the two strings. is there a way to make it skip a character that doesn't compare?
var match = function (str1, str2) {
str1 = str1.toString(); str2 = str2.toString();
for (var i = 0; i < str1.length; i++) {
for (var j = str1.length - i; j-1; j--) {
document.body.innerHTML += str1.substr(i, j);
if (str2.indexOf(str1.substr(i, j))!== -1) {
return str1.substr(i, j);
}
}
}
return '';
}
document.body.innerHTML += (match("/some[1]/where[1]/over[3]/here[1]", "/some[1]/where[1]/over[4]/here[1]"));
http://jsfiddle.net/92taU/3/
expected: /some[1]/where[1]/over[]/here[1]
this does what are you looking for:
var match = function (str1, str2) {
str1 = str1.toString(); str2 = str2.toString();
ret=''; i=0; j=0; l=str1.length; k=0; m=0;
while(i<l && j<l)
{
// If char is equal just add!
if(str1[i]==str2[j])
{
ret+=str1[i];
i++;
j++;
} else {
// If it's different search next equal char...
for(k=i;k<l;k++)
{
for(m=j;m<l;m++)
{
if(str1[k]==str2[m])
{
// if char is found adjust indexes and break current for
i=k;
j=m;
k=l; // to break m for
break;
}
}
}
}
}
return ret;
}
document.body.innerHTML += (match("/some[1]/where[1]/over[3]/here[1]", "/some[1]/where[1]/over[4]/here[1]"));
It returns:
/some[1]/where[1]/over[]/here[1]
Different lengths are allowed.
I'm assuming the two strings you're comparing will always be the same length. Here's some code that should do what I think you're asking for:
var match = function (str1, str2) {
var i = 0;
while (i < str1.length) {
if (str1.substr(i, 1) !== str2.substr(i, 1)) {
break;
}
i++;
}
if (i === str1.length) {
return str1;
} else {
return str1.substr(0, i) + match(str1.substr(i + 1), str2.substr(i + 1));
}
}
document.body.innerHTML += (match("/some[1]/where[1]/over[3]/here[1]", "/some[1]/where[1]/over[4]/here[1]"));
Starting at the beginning of each string, this code finds the longest substring. When a mismatch is found, it grabs that matching substring, skips the next character and repeats the process using a recursive function call on the remaining characters from each string.
I'm new to Javascript and wrote the code below to determine if a string is a palindrome. I'm curious as to what is the most efficient way to accomplish the same task.
var isPalindrome = function (string) {
var leftString = [];
var rightString = [];
// Remove spaces in the string and convert to an array
var strArray = string.split(" ").join("").split("");
var strLength = strArray.length;
// Determine if the string is even or odd in length, then assign left and right strings accordingly
if (strLength % 2 !== 0) {
leftString = strArray.slice(0, (Math.round(strLength / 2) - 1));
rightString = strArray.slice(Math.round(strLength / 2), strLength);
} else {
leftString = strArray.slice(0, (strLength / 2));
rightString = strArray.slice((strLength / 2, strLength))
}
if (leftString.join("") === rightString.reverse().join("")) {
alert(string + " is a palindrome.");
} else {
alert(string + " is not a palindrome.")
}
}
isPalindrome("nurses run");
It's not clear if you're talking about efficiency in terms of code length, or amount of computation, but this should be fairly good in both regards. And it takes into account non-alpha characters beside spaces as well as capitalization:
function isPalindrome(str) {
var i, len;
str = str.toLowerCase().replace(/[^a-z]/g, '');
len = str.length;
for(i = 0; i < len / 2; i += 1) {
if(str.charCodeAt(i) != str.charCodeAt(len - i - 1)) {
return false;
}
}
return true;
}
A much shorter approach (though perhaps more computation intensive):
function isPalindrome(str) {
str = str.toLowerCase().replace(/[^a-z]/g, '');
return str == str.split("").reverse().join("");
}
And if you really want that alert stuff, I'd suggest putting it in a separate function:
function isPalindromeAlert(str) {
alert(str + "is " + (isPalindrome(str) ? "" : "not ") + "a palindrome.");
}
function isPalindrome( s )
{
var i = 0, j = s.length-1;
while( i < j )
if( s[i++].toLowerCase() != s[j--].toLowerCase() ) return false;
return true;
}
I think this one is lot simpler:
var isPalindrome = function (string) {
if (string == string.split('').reverse().join('')) {
alert(string + ' is palindrome.');
}
else {
alert(string + ' is not palindrome.');
}
}
See more: Palindrome check in Javascript
var str = "abcba";
var len = str.Lenght;
var index = 0;
while(index <= len/2 && str[index] == str[len - index - 1]) index++;
if(index == len/2) {
alert(string + " is a palindrome.");
}
else {
alert(string + " is not a palindrome.");
}
You made a few unnecesary operations.
To be efficient, you should avoid unnecessary computations. Ask yourself:
do you need to remove spaces?
do you need to convert to an array?
do you need to allocate new objects for the left and right strings?
do you need to reverse the string?
The checking can be done in a very simple loop:
var len=string.length;
for (int i=0; i<(len/2); i++) {
if (string[i] != string[len-i-1]) {
alert(string + " is not a palindrome.");
return;
}
}
alert(string + " is a palindrome.");
To ignore spaces and non-alpha numeric characters, it can be re-written as follows:
function isAlphaNumeric( chr ) {
return ( ((c >= 'a') && (c <= 'z')) ||
((c >= 'A') && (c <= 'Z')) ||
((c >= '0') && (c <= '9')) );
}
// Note - template taken from #Matt's answer!
function isPalindrome( string ) {
var i = 0, j = s.length-1;
while( i < j ) {
if (isAlphaNumeric(string[i])) {
if (isAlphaNumeric(string[j])) {
if ( string[i++].toLowerCase() != string[j--].toLowerCase() )
return false;
} else {
j--;
}
} else {
i++;
if (!isAlphaNumeric(string[j])) j--;
}
}
return true;
}
unreadable + unmaintainable + terse + efficient + recursive + non-branching
function isPalindrome(s,i) {
return (i=i||0)<0|| i>=s.length/2|| s[i]==s[s.length-1-i]&& isPalindrome(s,++i);
}
Fiddle: http://jsfiddle.net/namcx0yf/
I have a string with repeated letters. I want letters that are repeated more than once to show only once.
Example input: aaabbbccc
Expected output: abc
I've tried to create the code myself, but so far my function has the following problems:
if the letter doesn't repeat, it's not shown (it should be)
if it's repeated once, it's show only once (i.e. aa shows a - correct)
if it's repeated twice, shows all (i.e. aaa shows aaa - should be a)
if it's repeated 3 times, it shows 6 (if aaaa it shows aaaaaa - should be a)
function unique_char(string) {
var unique = '';
var count = 0;
for (var i = 0; i < string.length; i++) {
for (var j = i+1; j < string.length; j++) {
if (string[i] == string[j]) {
count++;
unique += string[i];
}
}
}
return unique;
}
document.write(unique_char('aaabbbccc'));
The function must be with loop inside a loop; that's why the second for is inside the first.
Fill a Set with the characters and concatenate its unique entries:
function unique(str) {
return String.prototype.concat.call(...new Set(str));
}
console.log(unique('abc')); // "abc"
console.log(unique('abcabc')); // "abc"
Convert it to an array first, then use Josh Mc’s answer at How to get unique values in an array, and rejoin, like so:
var nonUnique = "ababdefegg";
var unique = Array.from(nonUnique).filter(function(item, i, ar){ return ar.indexOf(item) === i; }).join('');
All in one line. :-)
Too late may be but still my version of answer to this post:
function extractUniqCharacters(str){
var temp = {};
for(var oindex=0;oindex<str.length;oindex++){
temp[str.charAt(oindex)] = 0; //Assign any value
}
return Object.keys(temp).join("");
}
You can use a regular expression with a custom replacement function:
function unique_char(string) {
return string.replace(/(.)\1*/g, function(sequence, char) {
if (sequence.length == 1) // if the letter doesn't repeat
return ""; // its not shown
if (sequence.length == 2) // if its repeated once
return char; // its show only once (if aa shows a)
if (sequence.length == 3) // if its repeated twice
return sequence; // shows all(if aaa shows aaa)
if (sequence.length == 4) // if its repeated 3 times
return Array(7).join(char); // it shows 6( if aaaa shows aaaaaa)
// else ???
return sequence;
});
}
Using lodash:
_.uniq('aaabbbccc').join(''); // gives 'abc'
Per the actual question: "if the letter doesn't repeat its not shown"
function unique_char(str)
{
var obj = new Object();
for (var i = 0; i < str.length; i++)
{
var chr = str[i];
if (chr in obj)
{
obj[chr] += 1;
}
else
{
obj[chr] = 1;
}
}
var multiples = [];
for (key in obj)
{
// Remove this test if you just want unique chars
// But still keep the multiples.push(key)
if (obj[key] > 1)
{
multiples.push(key);
}
}
return multiples.join("");
}
var str = "aaabbbccc";
document.write(unique_char(str));
Your problem is that you are adding to unique every time you find the character in string. Really you should probably do something like this (since you specified the answer must be a nested for loop):
function unique_char(string){
var str_length=string.length;
var unique='';
for(var i=0; i<str_length; i++){
var foundIt = false;
for(var j=0; j<unique.length; j++){
if(string[i]==unique[j]){
foundIt = true;
break;
}
}
if(!foundIt){
unique+=string[i];
}
}
return unique;
}
document.write( unique_char('aaabbbccc'))
In this we only add the character found in string to unique if it isn't already there. This is really not an efficient way to do this at all ... but based on your requirements it should work.
I can't run this since I don't have anything handy to run JavaScript in ... but the theory in this method should work.
Try this if duplicate characters have to be displayed once, i.e.,
for i/p: aaabbbccc o/p: abc
var str="aaabbbccc";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 ){
return obj;
}
}
).join("");
//output: "abc"
And try this if only unique characters(String Bombarding Algo) have to be displayed, add another "and" condition to remove the characters which came more than once and display only unique characters, i.e.,
for i/p: aabbbkaha o/p: kh
var str="aabbbkaha";
Array.prototype.map.call(str,
(obj,i)=>{
if(str.indexOf(obj,i+1)==-1 && str.lastIndexOf(obj,i-1)==-1){ // another and condition
return obj;
}
}
).join("");
//output: "kh"
<script>
uniqueString = "";
alert("Displays the number of a specific character in user entered string and then finds the number of unique characters:");
function countChar(testString, lookFor) {
var charCounter = 0;
document.write("Looking at this string:<br>");
for (pos = 0; pos < testString.length; pos++) {
if (testString.charAt(pos) == lookFor) {
charCounter += 1;
document.write("<B>" + lookFor + "</B>");
} else
document.write(testString.charAt(pos));
}
document.write("<br><br>");
return charCounter;
}
function findNumberOfUniqueChar(testString) {
var numChar = 0,
uniqueChar = 0;
for (pos = 0; pos < testString.length; pos++) {
var newLookFor = "";
for (pos2 = 0; pos2 <= pos; pos2++) {
if (testString.charAt(pos) == testString.charAt(pos2)) {
numChar += 1;
}
}
if (numChar == 1) {
uniqueChar += 1;
uniqueString = uniqueString + " " + testString.charAt(pos)
}
numChar = 0;
}
return uniqueChar;
}
var testString = prompt("Give me a string of characters to check", "");
var lookFor = "startvalue";
while (lookFor.length > 1) {
if (lookFor != "startvalue")
alert("Please select only one character");
lookFor = prompt(testString + "\n\nWhat should character should I look for?", "");
}
document.write("I found " + countChar(testString, lookFor) + " of the<b> " + lookFor + "</B> character");
document.write("<br><br>I counted the following " + findNumberOfUniqueChar(testString) + " unique character(s):");
document.write("<br>" + uniqueString)
</script>
Here is the simplest function to do that
function remove(text)
{
var unique= "";
for(var i = 0; i < text.length; i++)
{
if(unique.indexOf(text.charAt(i)) < 0)
{
unique += text.charAt(i);
}
}
return unique;
}
The one line solution will be to use Set. const chars = [...new Set(s.split(''))];
If you want to return values in an array, you can use this function below.
const getUniqueChar = (str) => Array.from(str)
.filter((item, index, arr) => arr.slice(index + 1).indexOf(item) === -1);
console.log(getUniqueChar("aaabbbccc"));
Alternatively, you can use the Set constructor.
const getUniqueChar = (str) => new Set(str);
console.log(getUniqueChar("aaabbbccc"));
Here is the simplest function to do that pt. 2
const showUniqChars = (text) => {
let uniqChars = "";
for (const char of text) {
if (!uniqChars.includes(char))
uniqChars += char;
}
return uniqChars;
};
const countUnique = (s1, s2) => new Set(s1 + s2).size
a shorter way based on #le_m answer
let unique=myArray.filter((item,index,array)=>array.indexOf(item)===index)
How would you approach finding duplicates in a text document. Duplicates can be a set of consecutive words or sentence. Sentence not necessary ending with a dot. let's say page contains a document of 200 lines of which 2 sentences are identical, we want to highlight those 2 sentences as duplicates when "check duplicate button" is clicked.
Interesting question — here's an idea on how I'd do that probably: http://jsfiddle.net/SaQAs/1/ — Not anyhow optimized!
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = [],
sentences = text.split('.'),
sortedSentences = sentences.slice(0).sort(),
duplicateSentences = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var i=0; i<sortedSentences.length-1; i++) {
if (sortedSentences[i+1] == sortedSentences[i]) {
duplicateSentences.push(sortedSentences[i]);
}
}
duplicateSentences = $.unique(duplicateSentences);
$('a.words').click(function(){
var highlighted = $.map(words, function(word){
if ($.inArray(word, duplicateWords) > -1)
return '<span class="duplicate">' + word + '</span>';
else return word;
});
$('p').html(highlighted.join(' '));
return false;
});
$('a.sentences').click(function(){
var highlighted = $.map(sentences, function(sentence){
if ($.inArray(sentence, duplicateSentences) > -1)
return '<span class="duplicate">' + sentence + '</span>';
else return sentence;
});
$('p').html(highlighted.join('.'));
return false;
});
Update 1
This one finds sequences of identical words: http://jsfiddle.net/YQdk5/1/ From here it shouldn't be hard to e.g. ignore any kind of punctuation at the end of fragments when comparing — you'd would just have to write your own version of an inArray method.
var text = $('p').text(),
words = text.split(' '),
sortedWords = words.slice(0).sort(),
duplicateWords = []
highlighted = [];
for (var i=0; i<sortedWords.length-1; i++) {
if (sortedWords[i+1] == sortedWords[i]) {
duplicateWords.push(sortedWords[i]);
}
}
duplicateWords = $.unique(duplicateWords);
for (var j=0, m=[]; j<words.length; j++) {
m.push($.inArray(words[j], duplicateWords) > -1);
if (!m[j] && m[j-1])
highlighted.push('</span>');
else if (m[j] && !m[j-1])
highlighted.push('<span class="duplicate">');
highlighted.push(words[j]);
}
$('p').html(highlighted.join(' '));
Update 2
My regex-fu is weak, but this (pretty messy!) version seems to work okay: http://jsfiddle.net/YQdk5/2/ — I'm pretty sure that there might be a better way of doing this, but for now I've got to leave it alone! :D — Good luck!
Update 3
Thinking about it, I don't think that the code from the previous update is any good. That's why I've removed it. You can still find it here: http://jsfiddle.net/YQdk5/2/
The main point is to use a regex to match words, something along the lines of:
/^word(\.?)$/
Here is the solution that uses suffix tree:
function SuffixTree(text) {
var regex = /\b\w+/g;
var words = text.match(regex);
var wave = [];
var words_l = words.length;
if (words_l == 0) return false;
this.tree = this.node("", false);
for (var i = 0; i < words_l; ++i) {
var x = words[i] + "_";
wave.push(this.tree);
var wave_l = wave.length;
for (var j = 0; j < wave_l; ++j) {
var y = wave[j];
if (typeof y[x] != 'undefined') y[x].count++;
else y[x] = this.node(words[i], y);
wave[j] = y[x];
}
}
}
SuffixTree.prototype = {
dummy: {count: 1},
node: function(word, num, parent) {
return {
count: 1,
word: word,
parent: parent
};
},
duplicates: function(h) {
this.dups = [];
this.bypass(this.tree, h, 0);
var l = this.dups.length;
this.dups.sort(function(d1, d2) { return d1.depth > d2.depth ? 1 : -1; });
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
this.dups[i] = { s: " " + this.sentence(d.a) + " ", depth: d.depth, count: d.a.count };
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
console.log(i, d.s);
}
for (var i = 0; i < l; ++i) {
var d = this.dups[i];
var fl = true;
for (var j = i + 1; j < l; ++j) {
if (this.dups[j].s.indexOf(d.s) != -1) fl = false;
}
if (fl) h(d.s.substr(1, d.s.length - 2), d.count);
}
},
bypass: function(a, h, depth) {
if (a.constructor != Object) return;
var fl = true;
for (var i in a) {
if (i == 'parent') continue;
var b = a[i];
if (b.count == a.count) fl = false;
this.bypass(b, h, depth + 1);
}
if (fl && a.count > 1) {
this.dups.push({ a: a, depth: depth });
}
},
sentence: function(a) {
var s = a.word;
while (a = a.parent) {
s = a.word + " " + s;
}
return s;
}
};
var text = "This is a text with some duplicates: words, sentences of different length. For example here is a duplicate word. This sentence has some duplicates. But not all of us can find clones.";
var T = new SuffixTree(text);
var h = function(s, c) {
document.write(s + "[" + c + "]<br/>");
};
T.duplicates(h);
1) Split the input text to the array of words.
2) Build the suffix tree.
3) Find the longest suffixes of the tree.
4) Remove sentences that are contained in others (i.e. remove "is" that is a part of "this is a").
You can change the regular expression to take into account html tags.
I hope this helps you.
P.S. h is the callback for found duplicates.
You're javascript contains references to a javascript library named jQuery.
You are not including this in your HTML, and thus it will fail.
You can include it via jquery cdn
And todays tip: Use the developer tools in your browser. In the console you can see what parts of javascript that fails.