Checking Duplicates Script - javascript

I want to display duplicates found from the sheet in a Browser.Msg box and send the duplicate strings via email.
Additionally extra column could be written to that row where status "DUPLICATE - YES" would be written. However just to get it via email / in a popup would be enough.
I have tried logging the data. I have tried setting variables.
function checkDuplicates() {
var sheet = SpreadsheetApp.getActiveSheet();
var dataRange = sheet.getRange("DATA!F2:F"); // Set Any Range
// "A:A" is for Column A
// And if you want to check duplicates for whole sheet then try this:
// var dataRange = sheet.getDataRange();
var data = dataRange.getValues();
var numRows = data.length;
var numColumns = data[0].length;
var dupes = false;
var okdupes0 = 0;
var nodupes0 = 0;
var totbookings0 = 0;
var formats = [];
var values = [];
for (var i = 0; i < numRows; i++) {
formats[i] = [];
for (var j = 0; j < numColumns; j++) {
formats[i][j] = 'WHITE';
if (data[i][j] != '') {
values.push([data[i][j], i, j]);
}
}
}
var numValues = values.length;
for (var k = 0 ; k < numValues - 1; k++) {
if (formats[values[k][1]][values[k][2]] == 'WHITE') {
for (var l = k + 1; l < numValues; l++) {
if (values[k][0] == values[l][0]) {
formats[values[k][1]][values[k][2]] = 'RED';
formats[values[l][1]][values[l][2]] = 'RED';
var dupes = true;
}
}
var okdupes = okdupes0++;
}
var totbookings = totbookings0++;
}
if (dupes) {
// var okdupes = okdupes -1;
var nodupes = totbookings - okdupes;
var emailAddress = "myemail#gmail.com"; // First column
var message = + nodupes + " Duplicate voucher(s) has been found from the system. Duplicate vouchers has been marked with red color."; // Second column
var subject = "System: " + nodupes + " Duplicate Voucher(s) Found!";
MailApp.sendEmail(emailAddress, subject, message);
Browser.msgBox('Warning!', ''+ nodupes +' Possible duplicate voucher(s) has been found and colored red! Please contact the rep who has made the sale. '+ totbookings +' bookings has been scanned through for duplicates.', Browser.Buttons.OK);
} else {
Browser.msgBox('All good!', 'No duplicate vouchers found.', Browser.Buttons.OK);
}
dataRange.setBackgroundColors(formats);
}

You could convert the array of values to a string, then use match to count occurrences.
This code works to find duplicates, even from a two dimensional array. It doesn't determine what cell the duplicate came from. The values of all the duplicates are put into an array.
function findDups() {
var testArray = [['one','two','three'],['three','four','five']];
var allDataAsString = testArray.toString();
Logger.log('allDataAsString: ' + allDataAsString);
//Create one Dimensional array of all values
var allDataInArray = allDataAsString.split(",");
var pattern;
var arrayOfDups = [];
for (var i = 0;i<allDataInArray.length;i++) {
var tempStr = allDataInArray[i];
// the g in the regular expression says to search the whole string
// rather than just find the first occurrence
var regExp = new RegExp(tempStr, "g");
var count = (allDataAsString.match(regExp) || []).length;
Logger.log('count matches: ' + count);
if (count > 1 && arrayOfDups.indexOf(tempStr) === -1) {
arrayOfDups.push(tempStr);
};
};
Logger.log('arrayOfDups: ' + arrayOfDups);
Browser.msgBox('Thest are the duplicate values: ' + arrayOfDups);
//To Do - Send Email
};
The above example code has a hard coded two dimensional array for testing purposes. There are two occurrences of an element with the value of 'three'.

Related

Concatenation Code runs excruciatingly slowly google script

My Concatenation function is running excruciatingly slow, on only 28 rows of data it takes almost 4 minutes to run and I have other code I need to run so I hit the Max execution time in Google sheets
If I was running a similar process in Excel this would take maybe 15-30 sec
Everything is in memory, I can't see (with my limited knowledge of javascript) why my code is so slow
Thanks
A Google sheet with data
//Sheet Name
//Values in Range to be overwritten by Header names
//List of Header names
//New Header Name 1
//New Header Name 2
function AsAboveSoBelow_Offers_Asks() {
AsAboveSoBelow('Elements',
["I can offer", "I have a ask"],
["Header1","Header2","Header3","Header4","Header5","Header6",
"Header7","Header8","Header9","Header10","Header11",
"Header12","Header13","Header14"],
"Offers",
"Asks");
}
function AsAboveSoBelow(shtName, arrPBV, arrHeaders, newHeader1, newHeader2) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var s = ss.getSheetByName(shtName);
var LC = s.getLastColumn();
var r = s.getDataRange();
var v = r.getValues();
var start = new Date();
var temp ="";
//Dim A
var A = [];
for (var i = 0; i < v.length; i++) {
A[i] = [];
for (var j = 0; j <= arrPBV.length - 1; j++) {
A[i][j] = '';
}
}
for(var e = 0; e <= arrPBV.length - 1; e++) {
var search_Term = arrPBV[e];
for(var row = 0; row < v.length; row++) {
for(var col = 0; col <= arrHeaders.length - 1; col++) {
var col2 = HTN(shtName,arrHeaders[col])
var replace_Term = arrHeaders[col];
if(v[row][col2-1].toString().indexOf(search_Term) > -1) {
temp = temp + replace_Term + "|"
}
}
//remove trailing pipe
A[row][e] = temp.replace(/\|(?=\s*$)/, '')
temp = ""
}
}
A[0][0]= newHeader1
A[0][1]= newHeader2
s.getRange(1, v[0].length +1, v.length,A[0].length).setValues(A);
var end = new Date();
var executiontime = end - start;
Logger.log(executiontime);
};
//Helper function
function HTN(shtName,cheader){
var headers =
SpreadsheetApp.getActiveSpreadsheet().getSheetByName(shtName).getDataRange().getValues().shift();
var colindex = headers.indexOf(cheader);
return colindex+1;
}
You want to reduce the process cost of your script.
If my understanding is correct, how about this answer? Please think of this as just one of several possible answers.
Modification point:
In your script, HTN() is used as the for loops. And the function includes SpreadsheetApp.getActiveSpreadsheet().getSheetByName(shtName).getDataRange().getValues().shift();. In your script, the Spreadsheet and sheetName are not changed. So headers is always same. I thought that this might be the main modification point.
Modified script:
Please modify your script as follows.
function AsAboveSoBelow_Offers_Asks() {
AsAboveSoBelow(
'Elements',
["I can offer", "I have a ask"],
["Header1","Header2","Header3","Header4","Header5","Header6","Header7","Header8","Header9","Header10","Header11","Header12","Header13","Header14"],
"Offers",
"Asks"
);
}
function AsAboveSoBelow(shtName, arrPBV, arrHeaders, newHeader1, newHeader2) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var s = ss.getSheetByName(shtName);
// var LC = s.getLastColumn(); // It seems that this is not used.
var r = s.getDataRange();
var v = r.getValues();
var start = new Date();
var temp ="";
//Dim A
var A = [];
for (var i = 0; i < v.length; i++) {
A[i] = [];
for (var j = 0; j <= arrPBV.length - 1; j++) {
A[i][j] = '';
}
}
var h = s.getRange(1, 1, 1, s.getLastColumn()).getValues()[0]; // Added
for(var e = 0; e <= arrPBV.length - 1; e++) {
var search_Term = arrPBV[e];
for(var row = 0; row < v.length; row++) {
for(var col = 0; col <= arrHeaders.length - 1; col++) {
var col2 = h.indexOf(arrHeaders[col]) + 1; // Modified
var replace_Term = arrHeaders[col];
if(v[row][col2-1].toString().indexOf(search_Term) > -1) {
temp = temp + replace_Term + "|"
}
}
//remove trailing pipe
A[row][e] = temp.replace(/\|(?=\s*$)/, '')
temp = ""
}
}
A[0][0]= newHeader1
A[0][1]= newHeader2
s.getRange(1, v[0].length +1, v.length,A[0].length).setValues(A);
var end = new Date();
var executiontime = end - start;
Logger.log(executiontime);
};
Note:
In my environment, the process time of the modified script was about 5 seconds.
If this was not the direct solution, I apologize.

Problems accesing external database inside For iterator

I have an array called countriesData that stores names for various countries, like this:
[Germany,France,Canada,Austria,Switzerland,Spain]
I'm trying to iterate over each element in that array, the idea is use each country in a query search over an external API, and then save the length of items in that external API. To put it simple, Im going through each country and counting how many items from that country are stored in an external database.
I have no problem accessing the database outside of the loop, however, I am unable to access it while inside the for iterator.This is my code:
for (var iter = 0; iter < countriesData.length; iter++) {
var obj = [];
var country = countriesData[iter]
var items;
var itemsCountry = 0;
$http.get("https://api.discogs.com/database/search?q={?country==" + country + " }&token=zwxZExVZTenjPTKumVeTDVRuniqhQLAxymdzSxUQ").then(function(response) {
items = response.data.pagination.items;
})
var str = "";
obj.push(countriesData[iter]);
obj.push(items);
for (var J = 0; J < myStats.data.length; J++) {
if (myStats.data[J].country == countriesData[iter]) {
itemsCountry++;
str += myStats.data[J].title + ", ";
}
}
obj.push(itemsCountry);
var str2 = str.substring(0, str.length - 2);
obj.push(str2);
newData.push(obj);
console.log("new obj : " + obj)
}
Basically, I need the var items to be updated acording to the length of the response data from http.get
This is an example of what I get once I console.log the obj:
France,,2,Thriller, D'eux
As you can see, the second element in the array is empty when it should have been an integer representing how many France related items where found in the database...
What is it that Im doing wrong? I get that the database is big and there might not be enough time for it to load. Any idead?
Thanks in advance :)
The problem is that your data call is asynchronous and hasn't completed before you try to push the data to the array.
function getCountryData(country) {
var obj = [];
var items;
var itemsCountry = 0;
$http.get("https://api.discogs.com/database/search?q={?country==" + country + " }&token=zwxZExVZTenjPTKumVeTDVRuniqhQLAxymdzSxUQ").then(function(response) {
items = response.data.pagination.items;
var str = "";
obj.push(country);
obj.push(items);
for (var J = 0; J < myStats.data.length; J++) {
if (myStats.data[J].country == countriesData[iter]) {
itemsCountry++;
str += myStats.data[J].title + ", ";
}
}
obj.push(itemsCountry);
var str2 = str.substring(0, str.length - 2);
obj.push(str2);
newData.push(obj);
console.log("new obj : " + obj)
})
}
for (var iter = 0; iter < countriesData.length; iter++) {
var country = countriesData[iter];
getCountryData(country);
}

Get the first empty row in multidimensionnal array

I have a question about a function where I'm trying to get a first look at a multidimensional array.
To explain my problem: In a sheet, I manage my roadmap with projects. A project is composed by 4 rows where I have some information (Project Name, Estimated Team, Timeline ...).
And In my timeline, I need to retrieve the first empty rows in multiple arrays (the first non empty is the startDate).
The problem, I have 4 teams in this multidimensional array, and (for example), the start date can be in the 1st team array for the project A, but the start date can be also in the 3rd team array for the project B.
In my function, I'm trying to get to the start date, but my first step is to check the first array ... (projectRange and after in the code)
So ... I think the best way should be check every rows in the first column, and continue like this to the getLastColumn, right?
So, how can I manage my Loop with this way?
function findLastRow(column) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName(roadmapSS);
var startRow = 11;
var startCol = 11;
var dataLength = sheet.getLastRow()-(startRow+2);
var rangeData = sheet.getRange(startRow, 2, sheet.getLastRow(), sheet.getLastColumn());
var dataValues = rangeData.getValues();
var projectsList = rangeData.getValues();
var projectDatas = {};
var projectRange = null;
var projectName = null;
var projectPlan = {};
var realStart = null;
for (var i = 0; i < dataLength; i+=4) {
projectDatas = projectsList[i];
var step = startRow+i;
var realStartRange = startRow+i+1
for (var j = 0; j < 1; j+=4) {
projectName = projectDatas[j];
}
projectRange = ([step, startCol, 4, sheet.getLastColumn()]).toString();
var projectPlan = sheet.getRange(step, startCol, 4, sheet.getLastColumn()).getValues();
for (var k = 0; k < projectPlan.length; ++k) {
realStart = projectPlan[k];
for (var l = 0; l < realStart.length; ++l) {
if (realStart[l] != '') {
break;
}
}
}
//sheet.getRange(realStartRange, 2).setValue(columnToLetter([l]))
console.log(projectName, [l], columnToLetter([l]));
}
}
In fact, i'm trying to get the first column of B in this example (because it's the first non empty occurence :
var projectTimeline = [
['','','A','A','','A'],
['','B','B','','B',''],
['','','','C','C',''],
['','','','','D','D']
]
I found my solution :
I throughed the column first, and then the row. It worked, I founded the column :)
function findFirstDate (timeline) {
for (var col = 0; col < timeline[0].length; col++) {
for (var row = 0; row < 4; row++) {
if (timeline[row][col] != '') {
return [col];
}
}
}
}

Last element of the array is always NaN even though I used Number() function

Kindly help me investigate my function below since I'm stuck and still having a hard time figuring it out.
All is well until it reaches the last column on the nested FOR loop. The last column of each row's values are only "0". However, I used the Number() function to make the cell values(i.e. "0") a number but I keep on getting NaN for the last element of the SUM & COUNT arrays.
colCount = 326 while rowCount = 374.
sum.length and count.length should really be ONLY 325 since the headers are unnecessary and the first column is just composed of time stamps. I was able to .push(0) successfully until the nested FOR loop changed the result of the last element to NaN.
function processDataToDictionary(csv) {
var allTextLines = csv.split(/\r\n|\n/);
var csvArray = [];
for (let i = 0; i < allTextLines.length - 1; i++) {
var row = allTextLines[i].split(',');
csvArray.push(row);
}
var colCount = csvArray[0].length;
var rowCount = csvArray.length;
//Arrays of values
var count = [];
var sum = [];
var average = [];
var headers = [];
for (let i = 1; i < colCount; i++) {
var current = csvArray[0][i].replace(/"/g, '');
sum.push(0);
count.push(0);
headers[i] = current;
}
for (let i = 1; i < rowCount; i++) {
for (let j = 1; j < colCount; j++) {
// Remove the quotes from your array
current = csvArray[i][j].replace(/"/g, '');
// Added the Method IsNullOrWhiteSpace
if (!isNullOrWhitespace(current)) {
// Parse as double not int to account for dec. values
sum[j] += Number(current);
count[j]++;
}
}
}
for (let i = 0; i < colCount; i++) {
average.push((sum[i] + 0.0) / count[i]);
}
for (let i = 1; i < colCount; i++) {
// create an empty array
dictionary[headers[i]] = average[i];
}
return dictionary;
}
function isNullOrWhitespace(input) {
if (input == " ") {
return true;
} else {
return false;
}
}
This gives you a dictionary (Object) with the columns names as keys and numbers that appear to be the correct averages as values. But one must still check whether there is a fault in the logic somewhere and the averages are not correct in fact.
function processDataToDictionary(csv) {
function isNullOrWhitespace(input) {
if (input === " ") {
return true;
} else if (input === null) {
return true;
//} else if (input === undefined) {
//return true;
} else {
return false;
}
}
var allTextLines = csv.split(/\r\n|\n/);
var csvArray = [];
for (let i = 0; i < allTextLines.length - 1; i++) {
var row = allTextLines[i].split(',');
csvArray.push(row);
}
var colCount = csvArray[0].length;
var rowCount = csvArray.length;
//Arrays of values
var count = [];
var sum = [];
var average = [];
var headers = [];
for (let i = 1; i < colCount; i++) {
var current = csvArray[0][i].replace(/"/g, '');
sum.push(0);
count.push(0);
headers[i] = current;
}
/**** I added these two lines ****/
sum.push(0);
count.push(0);
for (let i = 1; i < rowCount; i++) {
for (let j = 1; j < colCount ; j++) {
// Remove the quotes from your array
current = csvArray[i][j].replace(/"/g, '');
// Added the Method IsNullOrWhiteSpace
if (!isNullOrWhitespace(current)) {
// Parse as double not int to account for dec. values
sum[j] += Number(current);
count[j]++;
}
}
}
for (let i = 0; i < colCount; i++) {
average.push((sum[i] + 0.0) / count[i]);
}
// I added this line:
dictionary = {};
for (let i = 1; i < colCount; i++) {
dictionary[headers[i]] = average[i];
}
return dictionary;
}
Let me know if this works out for you. You can loop through the values with: for (let key in dictionary) {console.log("key: " + key + " , value: " + dictionary[key]);} . Regards!

Accessing 2D javascript array

I'm trying to code a simple piece of javascript that reads in a CSV (pasted into a textarea on a webpage) and generates SQL insert statements but I keep getting undefined values when I reference the 2D array..
Please help!
var ret = "";
//alert("called");
//split the textarea into rows of text
var lines = text.split("\n");
//the first line of text is the table name
var table = lines[0];
//the second line of text is an array of the attribute names
var attrnames = lines[1].split(",");
var values = new Array();
//create a new array for each attribute
for (var i = 0; i < attrnames.length; i++) {
//the length of each array is the total number of rows
//of text - 2 (title row and attr row)
values.push(new Array(lines.length - 2));
}
//for each subsequent row, push the values to the appropriate arrays
for (var i = 2; i < lines.length; i++) {
//get the current row (value, value, value, value)
var thisrow = lines[i].split(",");
for (var j = 0; j < attrnames.length; j++) {
//add the j-th attribute (thisrow[j]) to its array (values[j])
values[j].push(thisrow[j]);
}
}
var insertIntoTable = "";
var tableName = "";
var attrList = "";
var valueList = "";
var lead = "";
//loop through each row
for (var k = 2; k < lines.length; k++) {
// --- ONE STATEMENT ---
//create the statements
insertIntoTable = "insert into table `";
tableName = table;
attrList = "` (";
valueList = "(";
for (var i = 0; i < attrnames.length; i++){
attrList += "`" + attrnames[i] + "`,";
}
//trim the last comma, then add the closing parenthesis.
attrList = attrList.substring(0, attrList.length-1) + ") ";
lead = insertIntoTable + tableName + attrList;
for (var i = 0; i < attrnames.length; i++) {
//this always points to undefined
valueList += "'" + values[i][k-2] + "', ";
}
lead += (" values " + valueList);
lead = lead.substring(0, lead.length-2) + ");\n";
ret += lead;
}
alert(ret);
In JavaScript you do not need to set the length of arrays. They are more like ArrayLists or something; read more at MDN's documentation.
When you do
var x = new Array(10); // array with "length" set to 10
x.push("somevalue");
then the value will be inserted at x[10] - at the end of the list. Log it in the console to see it yourself.
So either you drop the push() and use absolute indizes instead, or initialize the array as empty - best with the array literal syntax: []. The relevant area of your code should then look like this:
//create a new empty array for each attribute
for(var i = 0; i<attrnames.length; i++){
values.push([]);
}
You are making an array of length n, where n is the number of rows, and then you are pushing on n more elements of data. Start with 0 length arrays and you will be fine:
//create a new array for each attribute
for(var i = 0; i<attrnames.length; i++){
values.push(new Array(0)); // or '[]' -> the length of each array **will be** the total number of rows of text-2 (title row and attr row)
}
I would add caution that pasted data will be prone to lots of errors and potential security issues, such as SQL injection attacks. Aside from that, what happens if you have extra \ns at the end of your data? You will end up with more undefined data.

Categories