I have google alerts set up and receive alert emails which look like this:
I would like to parse the alert and save it in a google sheet with the format: alert_name,publication_date, title,news source
Not sure where to start any help/guidance appreciated
Thanks
You can use the gmail api with appscript the way GmailApp works is very similar to how the gmail api works.
The first thing you should do is create a search in the gmail web app which would return just the messages you are looking for something like this
var threads = GmailApp.search('from:(googlealerts-noreply#google.com)');
for(var i=0; i<threads.length; i++)
{
messages = threads[i].getMessages();
for(var j=0; j<messages.length; j++)
{
var date = messages[j].getDate();
var body = messages[j].getPlainBody();
var start = body.indexOf('<table');
var end = body.indexOf('</table>');
}
}
}
}
}
The body comes back in html format so your going to have to do some cleaning on it to find the text you want. Once you find the text you want you can just write it out to a sheet using SpreadsheetApp
function WriteToSheet(date, value){
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow([date, value]);
}
This code is from a script i use to scan my one of my own emails for some text.
Although I share the opinion that #Ruben has given in the comments, I think this topic is interesting and could help other users to save and manage their Google Alerts.
Code.gs
/* Retrieve all news from googlealert source */
const threads = GmailApp.search('from:(googlealerts-noreply#google.com)')
/* Our sheet for save the news */
const sS = SpreadsheetApp.openById(SS_ID).getSheetByName('Google Alerts')
/* Control the already added answers */
let addedNews = []
try { addedNews = sS.getRange('A1:A' + sS.getLastRow()).getValues().flat() } catch (err) { }
function parseContent() {
const totalNews = []
/* Maybe add a control system for remove the threads already saved */
threads.forEach((th) => {
const msgs = th.getMessages()
msgs.forEach((msg) => {
/* Divide the content in new lines an parse the content */
const body = msg.getPlainBody().split('\n')
/* Extract the filter name eg === News - 2 new results for [python] === */
const filterName = body.slice(0, 1)[0].match(/\[(.*?)\]/)[1]
const date = msg.getDate()
/* Remove the unnecessary lines */
const cleanedBody = body.slice(1, -11)
/* Detect the news via empty new lines "\r" */
const newsIdxs = cleanedBody.reduce((pre, curr, idx) => {
curr === "\r" && pre.push(idx)
return pre
}, [])
newsIdxs.forEach((v, idx, arr) => {
if (idx == arr.length - 1) return
/* From one empty line to the nex */
const parsedNew = cleanedBody.slice(v + 1, arr[idx + 1])
/* Simply extracted from the first line */
const title = parsedNew[0].split('|')[0]
/* Last line and between <> */
const url = parsedNew[parsedNew.length - 1].match(/<(.*?)>/)[1]
/* Extracted from the URL rather than the title due variability */
const source = url.match(/url=https:\/\/(.*?)\//)[1]
totalNews.push({ title, url, date, source, filterName })
})
})
})
totalNews.forEach((nw) => {
/* Hash the object for preventing adding already present */
const id = hashCode(Object.values(nw).toString())
if (addedNews.includes(id)) return
sS.appendRow([id, ...Object.values(nw)])
})
}
/* Extracted from here https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript */
const hashCode = s => s.split('').reduce((a, b) => { a = ((a << 5) - a) + b.charCodeAt(0); return a & a }, 0)
Results
Note 1: This script is an approximation of the problem, and has only been tested for News related alerts.
Note 2: Thanks to #DalmTo for the pseudo-code, it has helped me to approach the problem more quickly.
Note 3: The hashCode function has been extracted from here
Note 4: I have decided to take an approach using RegExp due to the use of getPlainBody(), but I think that in this case, using a library that allows parsing HTML with getBody() would be easier to implement.
Related
I have around 300 Spreadsheets that I need to copy all data from each spreadsheet and merge into a Master Spreadsheet. I have a spreadsheet that lists all 300 spreadsheet Ids. This script works however its Very slow!
I also tried to manually enter all document Ids as a variable and it did not seem to make a difference.
Is there a better way to handle?
function combineData() {
const masterID = "ID";
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
let targetSheets = docIds();
for (let i = 0, len = targetSheets.length; i < len; i++) {
let sSheet = SpreadsheetApp.openById(targetSheets[i]).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
}
}
function docIds() {
let listOfId = SpreadsheetApp.openById('ID').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfID.getDataRange().getValues()
let arrayId = []
for (let i = 1, len = values.length; i < len; i++) {
let data = values[i];
let ssID = data[1];
arrayId.push(ssID)
}
return arrayId
}
I believe your goal is as follows.
You have 300 Spreadsheets.
You want to retrieve the values from the 1st tab of all Spreadsheets and also, you want to put the retrieved values to the 1st tab of the master Spreadsheet.
You want to reduce the process cost of the script.
Issue and workaround:
In the current stage, unfortunately, there is no method for retrieving the values from multiple Spreadsheets, simultaneously. If the sample script is prepared, it is required to obtain the values from each spreadsheet in a loop. In this case, the process cost becomes high. I think that this might be the reason for your current issue.
In this answer, as another approach, I would like to propose the following flow.
Create the URL list for exporting the values from Spreadsheets.
In the current stage, when Sheets API is used in a loop, an error occurs. So, in this workaround, I use the URL for exporting Spreadsheet as CSV data. In this case, it seems that even when this URL is accessed with a loop, no error occurs.
Retrieve CSV values from the URLs using UrlFetchApp.fetchAll.
fetchAll method works with the asynchronous process. Ref (Author: me)
Merge the retrieved values by parsing CSV data as an array.
Put the values to the master Spreadsheet using Sheets API.
By this flow, I thought that the process cost can be reduced. When this flow is reflected in a sample script, how about the following sample script?
Sample script:
Please set masterID and ssId. And, please enable Sheets API at Advanced Google services. And, please run myFunction.
function myFunction() {
const masterID = "###"; // Please set the master Spreadsheet ID.
const ssId = "###"; // Please set the Spreadsheet ID including the Spreadsheet IDs you want to retrieve in column "B".
// Retrieve Spreadsheet IDs.
const sheet = SpreadsheetApp.openById(ssId).getSheets()[0];
const ssIds = sheet.getRange("B2:B" + sheet.getLastRow()).getDisplayValues().reduce((ar, [b]) => {
if (b) ar.push(b);
return ar;
}, []);
// Retrieve values from all Spreadsheets.
const workers = 50; // Please adjust this value.
const headers = { authorization: "Bearer " + ScriptApp.getOAuthToken() };
const reqs = [...Array(Math.ceil(ssIds.length / workers))].map(_ => ssIds.splice(0, workers).map(id => ({ url: `https://docs.google.com/spreadsheets/export?exportFormat=csv&id=${id}`, headers, muteHttpExceptions: true })));
const values = reqs.flatMap(r =>
UrlFetchApp.fetchAll(r).flatMap(rr => {
if (rr.getResponseCode() == 200) {
const [, ...val] = Utilities.parseCsv(rr.getContentText());
return val;
}
return [];
})
);
// Put values to the master sheet.
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
Sheets.Spreadsheets.Values.update({ values }, masterID, `'${masterSheet.getSheetName()}'!A${masterSheet.getLastRow() + 1}`, { valueInputOption: "USER_ENTERED" });
// DriveApp.getFiles(); // This comment line is used for automatically detecting the scope for Drive API. So, please don't remove this line.
}
When this script is run,
Spreadsheet IDs are retrieved from column "B" of the 1st sheet in the Spreadsheet of ssId.
Values are retrieved from all Spreadsheets.
In this script, the values are retrieved from every 50 Spreadsheets with the asynchronous process. If you increase const workers = 50; to const workers = 100;, the values are retrieved from every 100 Spreadsheets. But, if an error occurs when this value is increased, please adjust the value.
Put values using Sheets API.
When I tested this script for 50 Spreadsheet, the processing time was about 20 seconds. But, I'm not sure about your actual situation. So, please test this script.
Note:
In your script, listOfID is not declared. Please be careful about this.
Unfortunately, I cannot know your all Spreadsheets. So, if all values are more than 10,000,000 cells, an error occurs because of the maximum number of cells in a Spreadsheet. Please be careful about this.
If the number of values is large, an error might occur. At that time, please check my report.
References:
fetchAll(requests)
Method: spreadsheets.values.update
The .setValues() and .getValues() function themselves already run quite heavily specially if you have large data in the sheet, and using it together with for loop will really cause it to be slow since it iterates over 1 by 1. How about changing the for loop to forEach()
Try:
function combineData() {
const masterID = "1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA";
const masterSheet = SpreadsheetApp.openById(masterID).getSheetByName("Master");
let targetSheets = docIds();
targetSheets.forEach(function(x){
let sSheet = SpreadsheetApp.openById(x).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
})
}
function docIds() {
let listOfId = SpreadsheetApp.openById('1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfId.getDataRange().getValues();
values.shift()
let arrayId = []
values.forEach(function(val){
let data = val;
let ssID = data[1];
arrayId.push(ssID)
})
return arrayId
}
Also here are some of the best practices to improve the performance of the script: Best Practices
More details on forEach:
forEach()
Let me know if this helps!
Use the Sheets API, depending on the data it is an order of magintude faster than the native SpreadsheetApp. Add the Google Sheets API under Services in the left pane of the Apps Script editor.
Here is a code snipped of how we use one or the other API:
if(gridData && gridHeight) {
let range = sheet.getRange(startRow, 1, gridHeight, gridData[0].length);
if(useSheetsAPI) {
try {
SpreadsheetApp.flush();
let valueRange = Sheets.newValueRange();
valueRange.values = gridData;
let idAndName = getSpreadsheetIdAndSheetNameByName_(sheetName);
let rangeA1 = idAndName.sheetName + '!' + range.getA1Notation();
let options = { valueInputOption: 'USER_ENTERED' };
let result = Sheets.Spreadsheets.Values.update(valueRange, idAndName.spreadsheetId, rangeA1, options);
debugLog_('sheetReplace(): Sheets.Spreadsheets.Values.update result: '+result);
} catch (err) {
Logger.log('sheetReplace() ERROR: %s', err.message);
return 'ERROR: sheetReplace() failed: ' + err.message;
}
} else {
range.setValues(gridData);
}
}
/**
* Get spreadsheet Id and sheet name by sheet name
*
* #param {string|null} name name of sheet, either "sheet_id:Tab Name", "Tab Name"
* #return {object} object object with spreadsheetId and sheetName
*/
function getSpreadsheetIdAndSheetNameByName_(name) {
let spreadsheetId = '';
if(name && name.length > 44 && name.indexOf(':') > 40) {
// assume format: "sheet_id:Tab Name"
spreadsheetId = name.replace(/:.*$/, '');
name = name.replace(/^.*?:/, '');
} else {
// assume format "Tab Name"
spreadsheetId = SpreadsheetApp.getActiveSpreadsheet().getId();
}
return { spreadsheetId: spreadsheetId, sheetName: name };
}
Also, I submitted an enhancement request for better performance, see https://issuetracker.google.com/issues/222337394 and vote for it.
I'm trying to use app script to give access to my Google sheet by searching through a webpage. so I don't have to give all the data. the search is based on a specific column and the result can be repeatable on the column itself but the other columns of the same row like price and item are different.
with my current code if the searched column has only characters the code works perfectly but once I add numbers to the targeted column on my google sheet the code stop working .. can you help me with that .. my knowledge is basic with coding
function doGet(e) {
return HtmlService.createTemplateFromFile("Index").evaluate()
.setTitle("WebApp: Search By Password")
.addMetaTag('viewport', 'width=device-width, initial-scale=1')
.setXFrameOptionsMode(HtmlService.XFrameOptionsMode.ALLOWALL);
}
/* PROCESS FORM */
function processForm(formObject){
var concat = formObject.searchtext;
var result = "";
if(concat){//Execute if form passes search text
result = search(concat);
}
return result;
}
function search(searchtext = 'searchtext') {
let ar = [];
var spreadsheetId = '1aN8VLL4iKhGjmM84qhncG9cQfKigCWscMT-UkdzNhQs';
const names = ['Data', 'Data2'];
names.forEach((name) => {
var range = SpreadsheetApp.getActive().getSheetByName(name).getDataRange();
var data = range.getValues();
data.forEach(function (f) {
if (f[0] === searchtext) {
ar.push([f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9]]);
}
});
});
return ar;
};
thanks a lot
Try this f[0] == searchtext because === says the same object type and the same value. It may be preventing searchtext to being coerced into a number.
Here is my current script and attached sheet.
I have been able to successfully find the index value with function getColumnIndex(label) and then return that function into function getColumnValues(index) to pull all the rows in that specific column. I can't seem to use the input field from the autocomplete question id="courseCode" Enter Course Code as the search string to be used in the function getExpectations(); to populate the HTML page question id="expectations" as a multi selection question.
It works if I manually add the search string text to return the column rows. I would like to take the first 4 characters of the input field id="courseCode" Enter Course Code (3 letter followed by a number) as the search string to determine what selection options will populate the id="expectations" question.
I am a bit confused with calling back functions within another function and when and how to use a parameter/condition to pass through the function.
I hope this is enough information to solve my script error. Thanks in advance for this concern. Take care.
Added the following lines of code to get all options selected in the multi-selection Course Expectations question.
function collectForm(){
var submission = {};
// gets you the values for all id="specific_names"
submission.grade = document.getElementById("grade").value;
submission.courseCode = document.getElementById("courseCode").value;
var list = document.getElementsByClassName('selectedExpectations');
var selection = ' ';
for (i = 0; i < list.length; i++){
if (list[i].checked === true) {
selection += list[i].value + ", ";
}
}
submission.expectations = selection;
google.script.run.userClicked(submission);
}
In short
You need something like this
/**
*
* #param {string} code
*/
function getExpectations2(code) {
var patt = new RegExp(code.slice(0, 5), 'i');
var data = SpreadsheetApp.openById(
'1evNXXgFITrdNwsSdGXmprgzti74AQy03dg0igP5nT0I'
)
.getSheetByName('expectations')
.getDataRange()
.getValues();
var colIndex = data[0].reduce(function(p, c, i) {
return patt.test(c) ? i : p;
}, -1);
return colIndex === -1
? []
: data
.slice(1)
.filter(function(row) {
return row[colIndex] !== '';
})
.map(function(row) {
return row[colIndex];
});
}
getExpectations2 - returns a column by code as a list.
Also you have to update your listExpectations
function listExpectations(listLabels) {
console.log(listLabels);
const elm = document.getElementById('expectations');
const label = listLabels
.map(row => `<option value="${row}">${row}</option>`)
.join('');
elm.innerHTML =
'<option disabled selected>Select expectations not met</option>' + label;
setTimeout(() => M.FormSelect.init(elm), 0);
}
Of course you need bind all of them
function populateCodes(codes) {
var autocomplete = document.getElementById('courseCode');
var instances = M.Autocomplete.init(autocomplete, {
data: codes,
onAutocomplete: onAutocompleteCourseCode,
});
}
Where onAutocompleteCourseCode is
const onAutocompleteCourseCode = courseCode => {
google.script.run
.withSuccessHandler(listExpectations)
.getExpectations2(courseCode);
};
Import data from google sheets to a MySQL table using google apps script. I have a significantly huge dataset to import google sheet into a table. But, I am running into exceeded maximum execution time exception are there other options to speed-up execution.
var address = 'database_IP_address';
var rootPwd = 'root_password';
var user = 'user_name';
var userPwd = 'user_password';
var db = 'database_name';
var root = 'root';
var instanceUrl = 'jdbc:mysql://' + address;
var dbUrl = instanceUrl + '/' + db;
function googleSheetsToMySQL() {
var RecId;
var Code;
var ProductDescription;
var Price;
var dbconnection = Jdbc.getConnection(dbUrl, root, rootPwd);
var statement = dbconnection.createStatement();
var googlesheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('product');
var data = googlesheet.getDataRange().getValues();
for (var i = 1; i < data.length; i++) {
RecId = data[i][0];
Code = data[i][1];
ProductDescription = data[i][2];
Price = data[i][3];
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
statement = dbconnection.prepareCall(sql);
statement.setString(1, RecId);
statement.setString(2, Code);
statement.setString(3, ProductDescription);
statement.setString(4, Price);
statement.executeUpdate();
}
statement.close();
dbconnection.close();
}
Using batch execution
dbconnection.setAutoCommit(false)
for (var i = 1; i < data.length; i++) {
RecId = data[i][0];
Code = data[i][1];
ProductDescription = data[i][2];
Price = data[i][3];
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
statement = dbconnection.prepareCall(sql);
statement.setString(1, RecId);
statement.setString(2, Code);
statement.setString(3, ProductDescription);
statement.setString(4, Price);
statement.addBatch()
statement.executeBatch()
}
dbconnection.commit()
I suspect that you may have figured out the solution to your problem, but for all those who might stumble across this like I did, there is an easy way to speed up these requests. The OP was nearly there...
Using the provided code:
function googleSheetsToMySQL() {
var sheetName = 'name_of_google_sheet';
var dbAddress = 'database_ip_address';
var dbUser = 'database_user_name';
var dbPassword = 'database_user_password';
var dbName = 'database_name';
var dbTableName = 'database_table_name';
var dbURL = 'jdbc:mysql://' + dbAddress + '/' + dbName;
// Regarding the statement used by the OP, you might find something like....
//
// "INSERT INTO " + dbTableName + " (recid, code, product_description, price) VALUES (?, ?, ?, ?);";
//
// to be more practical if you're trying to implement the OP's code,
// as you are unlikely to have a stored procedure named 'sp_googlesheetstotable', or may be more
// familiar with basic queries like INSERT, UPDATE, or SELECT
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
// The more records/requests you load into the statement object, the longer it will take to process,
// which may mean you exceed the execution time before you can do any post processing.
//
// For example, you may want to record the last row you exported in the event the export must be halted
// prematurely. You could create a series of Triggers to re-initiate the export, picking up right where
// you left off.
//
// The other consideration is that you want your GAS memory utilization to remain as low as possible to
// keep things running smoothly and quickly, so try to strike a balance that fits the data you're
// working with.
var maxRecordsPerBatch = 1000;
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName(sheetName);
var sheetData = sheet.getDataRange().getValues();
var dbConnection = Jdbc.getConnection(dbURL, dbUser, dbPassword);
// The following only needs to be set when you are changing the statement that needs to be prepared
// or when you need to reset the variable.
//
// For example, if you were to switch to a different sheet which may have different values, columns,
// structure, and/or target database table.
var dbStatement = dbConnection.prepareCall(sql);
var RecId;
var Code;
var ProductDescription;
var Price;
var recordCounter = 0;
var lastRow;
dbConnection.setAutoCommit(false);
for (var i = 1; i < sheetData.length; i++) {
lastRow = (i + 1 == sheetData.length ? true : false);
RecId = sheetData[i][0];
Code = sheetData[i][1];
ProductDescription = sheetData[i][2];
Price = sheetData[i][3];
dbStatement.setString(1, RecId);
dbStatement.setString(2, Code);
dbStatement.setString(3, ProductDescription);
dbStatement.setString(4, Price);
// This command takes what has been set above and adds the request to the array that will be sent
// to the database for processing.
dbStatement.addBatch();
recordCounter += 1;
if (recordCounter == maxRecordsPerBatch || lastRow)
{
try {
dbStatement.executeBatch();
}
catch(e)
{
console.log('Attempted to update TABLE `' + dbTableName + '` in DB `' + dbName + '`, but the following error was returned: ' + e);
}
if (!lastRow)
{ // Reset vars
dbStatement = dbConnection.prepareCall( sql ); // Better to reset this variable to avoid any potential "No operations allowed after statement closed" errors
recordCounter = 0;
}
}
}
dbConnection.commit();
dbConnection.close();
}
The OP may still have run up against the execution time limit (I did at less than 10k records), but you should avoid batching individual requests unless you're having trouble locating a problem row.
From this link
It is important to keep in mind, that each update added to a Statement
or PreparedStatement is executed separately by the database. That
means, that some of them may succeed before one of them fails. All the
statements that have succeeded are now applied to the database, but
the rest of the updates may not be. This can result in an inconsistent
data in the database.
To avoid this, you can execute the batch update inside a JDBC
transaction. When executed inside a transaction you can make sure that
either all updates are executed, or none are. Any successful updates
can be rolled back, in case one of the updates fail.
Alternative Solution
If the time limit is a huge bother, you might try externally accessing the data within your Sheets. I've copied the basic instructions for posterity's sake, but please visit the link if it still works.
Link to source
Update composer.json to require “google/apiclient”: “^2.0” and run composer update
Create project on https://console.developers.google.com/apis/dashboard.
Click Enable APIs and enable the Google Sheets API
Go to Credentials, then click Create credentials, and select Service account key
Choose New service account in the drop down. Give the account a name, anything is fine.
For Role I selected Project -> Service Account Actor
For Key type, choose JSON (the default) and download the file. This file contains a private key so be very careful with it, it is your credentials after all
Finally, edit the sharing permissions for the spreadsheet you want to access and share either View (if you only want to read the file) or Edit (if you need read/write) access to the client_email address you can find in the JSON file.
<?php
require __DIR__ . '/vendor/autoload.php';
/*
* We need to get a Google_Client object first to handle auth and api calls, etc.
*/
$client = new \Google_Client();
$client->setApplicationName('My PHP App');
$client->setScopes([\Google_Service_Sheets::SPREADSHEETS]);
$client->setAccessType('offline');
/*
* The JSON auth file can be provided to the Google Client in two ways, one is as a string which is assumed to be the
* path to the json file. This is a nice way to keep the creds out of the environment.
*
* The second option is as an array. For this example I'll pull the JSON from an environment variable, decode it, and
* pass along.
*/
$jsonAuth = getenv('JSON_AUTH');
$client->setAuthConfig(json_decode($jsonAuth, true));
/*
* With the Google_Client we can get a Google_Service_Sheets service object to interact with sheets
*/
$sheets = new \Google_Service_Sheets($client);
/*
* To read data from a sheet we need the spreadsheet ID and the range of data we want to retrieve.
* Range is defined using A1 notation, see https://developers.google.com/sheets/api/guides/concepts#a1_notation
*/
$data = [];
// The first row contains the column titles, so lets start pulling data from row 2
$currentRow = 2;
// The range of A2:H will get columns A through H and all rows starting from row 2
$spreadsheetId = getenv('SPREADSHEET_ID');
$range = 'A2:H';
$rows = $sheets->spreadsheets_values->get($spreadsheetId, $range, ['majorDimension' => 'ROWS']);
if (isset($rows['values'])) {
foreach ($rows['values'] as $row) {
/*
* If first column is empty, consider it an empty row and skip (this is just for example)
*/
if (empty($row[0])) {
break;
}
$data[] = [
'col-a' => $row[0],
'col-b' => $row[1],
'col-c' => $row[2],
'col-d' => $row[3],
'col-e' => $row[4],
'col-f' => $row[5],
'col-g' => $row[6],
'col-h' => $row[7],
];
/*
* Now for each row we've seen, lets update the I column with the current date
*/
$updateRange = 'I'.$currentRow;
$updateBody = new \Google_Service_Sheets_ValueRange([
'range' => $updateRange,
'majorDimension' => 'ROWS',
'values' => ['values' => date('c')],
]);
$sheets->spreadsheets_values->update(
$spreadsheetId,
$updateRange,
$updateBody,
['valueInputOption' => 'USER_ENTERED']
);
$currentRow++;
}
}
print_r($data);
/* Output:
Array
(
[0] => Array
(
[col-a] => 123
[col-b] => test
[col-c] => user
[col-d] => test user
[col-e] => usertest
[col-f] => email#domain.com
[col-g] => yes
[col-h] => no
)
[1] => Array
(
[col-a] => 1234
[col-b] => another
[col-c] => user
[col-d] =>
[col-e] => another
[col-f] => another#eom.com
[col-g] => no
[col-h] => yes
)
)
*/
Try to check this related SO question for some information on how to import data from Google Spreadsheets into MySQL using an Apps Script code.
Now, for your error exceeded maximum execution time exception, remember that the Apps Script quotas have only a maximum execution time for a single script of 6 mins / execution. So it means that you exceeded this limit.
Try to check this page for the tecnique on how to prevent Google Scripts from exceeding the maximum execution time limit.
For more information, check this links:
Exceeded maximum execution time in Google Apps Script
Google app script timeout ~ 5 minutes?
I'm using http://caja.appspot.com/html-css-sanitizer-minified.js to sanitize user html, however in some instances I want to restrict the tags used to just a white list.
I've found https://code.google.com/p/google-caja/wiki/CajaWhitelists which describes how to define a white list, but I can't work out how to pass it to the html_sanitize method provided by html-css-sanitizer-minified.js
I've tried calling html.sanitizeWithPolicy(the_html, white_list); but I get an error:
TypeError: a is not a function
Which is hard to debug due to the minification, but it seems likely that html-css-sanitizer-minified.js does not contain everything in the html-sanitizer.js file.
I've tried using html-sanitizer.js combined with cssparser.js instead of the minified version, but I get errors before calling it, presumably because I am missing other dependencies.
How can I make this work?
Edit: sanitizeWithPolicy does exist in the minified file, but something is missing further down the process. This suggests that this file can't be used with a custom white list. I'm now investigating if it is possible to work out which uniminified files I need to include to make my own version.
Edit2: I was missing two files https://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/html4-defs.js?spec=svn1950&r=1950 and https://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/uri.js?r=5170
However I am now getting an error because sanitizeWithPolicy expects a function not a whitelist object. Also the html4-defs.js file is very old and according to this I would have to build the caja project in order get a more recent one.
I solved this by downloading the unminified files
https://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/html-sanitizer.js
https://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/uri.js
https://code.google.com/p/google-caja/source/browse/trunk/src/com/google/caja/plugin/html4-defs.js?spec=svn1950&r=1950
(This last one is from an old revision. This file is built from the Java files, would be great if a more up to date one was available.)
I then added a new function to html-sanitizer.js
/**
* Trims down the element white list to just those passed in whilst still not allowing unsafe elements.
* #param {array} custom_elements An array of elements to include.
*/
function useCustomElements(custom_elements) {
var length = custom_elements.length;
var new_elements = {};
for (var i = 0; i < length; i++) {
var key = custom_elements[i].toLowerCase();
if (typeof elements.ELEMENTS[key] !== 'undefined') {
new_elements[key] = elements.ELEMENTS[key];
}
}
elements.ELEMENTS = new_elements;
};
I then made this function public with this near the end of the file withthe other public function statements.
html.useCustomElements = html['useCustomElements'] = useCustomElements;
Now I can call it like so:
var raw = '<p>This element is kept</p><div>this element is not</div>';
var white_list ['p', 'b'];
html.useCustomElements(white_list)
var sanitized = html.sanitize(raw);
I then manually added some html5 elements to the html4-defs.js file (The ones that just define block elements like and ).
The attributes sanitization was still broken. This is due to the html4-defs.js file being out of date with the html-sanitizer.js. I changed this in html-sanitizer.js :
if ((attribKey = tagName + '::' + attribName,
elements.ATTRIBS.hasOwnProperty(attribKey)) ||
(attribKey = '*::' + attribName,
elements.ATTRIBS.hasOwnProperty(attribKey))) {
atype = elements.ATTRIBS[attribKey];
}
to
if (elements.ATTRIBS.hasOwnProperty(attribName)) {
atype = elements.ATTRIBS[attribName];
}
This is far from ideal but without compiling Caja and generating an up to date html-defs.js file I can't see a way around this.
This still leaves css sanitization. I would like this as well, but I am missing the css def files and can't find any that work via search so I have turned it off for now.
EDIT: I've managed to extract the html-defs from html-css-sanitizer-minified.js.
I've uploaded a copy to here. It includes elements like 'nav' so it has been updated for html5.
I've tried to do the same for the css parsing, I managed to extract the defs, but they depend on a bit count, and I can't find anyway to calculate what bits were used for which defaults.
I've decided on another approach. I've left the other answer in case I manage to find the bit values for the css definitions as it would be preferable to this one if I could get it to work.
This time I've taken the html-css-sanitizer-minified file and injected a bit of code into it so that the element and attributes can be modified.
Search for :
ka=/^(?:https?|mailto)$/i,m={};
And after it insert the following:
var unmodified_elements = {};
for(var property_name in $.ELEMENTS) {
unmodified_elements[property_name] = $.ELEMENTS[property_name];
};
var unmodified_attributes = {};
for(var property_name in $.ATTRIBS) {
unmodified_attributes[property_name] = $.ATTRIBS[property_name];
};
var resetElements = function () {
$.ELEMENTS = {};
for(var property_name in unmodified_elements) {
$.ELEMENTS[property_name] = unmodified_elements[property_name];
}
$.f = $.ELEMENTS;
};
var resetAttributes = function () {
$.ATTRIBS = {};
for(var property_name in unmodified_attributes) {
$.ATTRIBS[property_name] = unmodified_attributes[property_name];
}
$.m = $.ATTRIBS;
};
var resetWhiteLists = function () {
resetElements();
resetAttributes();
};
/**
* Trims down the element white list to just those passed in whilst still not allowing unsafe elements.
* #param {array} custom_elements An array of elements to include.
*/
var applyElementsWhiteList = function(custom_elements) {
resetElements();
var length = custom_elements.length;
var new_elements = {};
for (var i = 0; i < length; i++) {
var key = custom_elements[i].toLowerCase();
if (typeof $.ELEMENTS[key] !== 'undefined') {
new_elements[key] = $.ELEMENTS[key];
}
}
$.f = new_elements;
$.ELEMENTS = new_elements;
};
/**
* Trims down the attribute white list to just those passed in whilst still not allowing unsafe elements.
* #param {array} custom_attributes An array of attributes to include.
*/
var applyAttributesWhiteList = function(custom_attributes) {
resetAttributes();
var length = custom_attributes.length;
var new_attributes = {};
for (var i = 0; i < length; i++) {
var key = custom_attributes[i].toLowerCase();
if (typeof $.ATTRIBS[key] !== 'undefined') {
new_attributes[key] = $.ATTRIBS[key];
}
}
$.m = new_attributes;
$.ATTRIBS = new_attributes;
};
m.applyElementsWhiteList = applyElementsWhiteList;
m.applyAttributesWhiteList = applyAttributesWhiteList;
m.resetWhiteLists = resetWhiteLists;
You can now apply a white list with :
var raw = "<a>element tags removed</a><p class='class-removed' style='color:black'>the p tag is kept</p>";
var tag_white_list = [
'p'
];
var attribute_white_list = [
'*::style'
];
html.applyElementsWhiteList(tag_white_list);
html.applyAttributesWhiteList(attribute_white_list);
var san = html.sanitize(raw);
This approach also sanatizes the styles, which I needed. Another white list could be injected for those, but I don't need that so I havn't written one.