Import data from google sheets to a MySQL table using google apps script. I have a significantly huge dataset to import google sheet into a table. But, I am running into exceeded maximum execution time exception are there other options to speed-up execution.
var address = 'database_IP_address';
var rootPwd = 'root_password';
var user = 'user_name';
var userPwd = 'user_password';
var db = 'database_name';
var root = 'root';
var instanceUrl = 'jdbc:mysql://' + address;
var dbUrl = instanceUrl + '/' + db;
function googleSheetsToMySQL() {
var RecId;
var Code;
var ProductDescription;
var Price;
var dbconnection = Jdbc.getConnection(dbUrl, root, rootPwd);
var statement = dbconnection.createStatement();
var googlesheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('product');
var data = googlesheet.getDataRange().getValues();
for (var i = 1; i < data.length; i++) {
RecId = data[i][0];
Code = data[i][1];
ProductDescription = data[i][2];
Price = data[i][3];
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
statement = dbconnection.prepareCall(sql);
statement.setString(1, RecId);
statement.setString(2, Code);
statement.setString(3, ProductDescription);
statement.setString(4, Price);
statement.executeUpdate();
}
statement.close();
dbconnection.close();
}
Using batch execution
dbconnection.setAutoCommit(false)
for (var i = 1; i < data.length; i++) {
RecId = data[i][0];
Code = data[i][1];
ProductDescription = data[i][2];
Price = data[i][3];
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
statement = dbconnection.prepareCall(sql);
statement.setString(1, RecId);
statement.setString(2, Code);
statement.setString(3, ProductDescription);
statement.setString(4, Price);
statement.addBatch()
statement.executeBatch()
}
dbconnection.commit()
I suspect that you may have figured out the solution to your problem, but for all those who might stumble across this like I did, there is an easy way to speed up these requests. The OP was nearly there...
Using the provided code:
function googleSheetsToMySQL() {
var sheetName = 'name_of_google_sheet';
var dbAddress = 'database_ip_address';
var dbUser = 'database_user_name';
var dbPassword = 'database_user_password';
var dbName = 'database_name';
var dbTableName = 'database_table_name';
var dbURL = 'jdbc:mysql://' + dbAddress + '/' + dbName;
// Regarding the statement used by the OP, you might find something like....
//
// "INSERT INTO " + dbTableName + " (recid, code, product_description, price) VALUES (?, ?, ?, ?);";
//
// to be more practical if you're trying to implement the OP's code,
// as you are unlikely to have a stored procedure named 'sp_googlesheetstotable', or may be more
// familiar with basic queries like INSERT, UPDATE, or SELECT
var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
// The more records/requests you load into the statement object, the longer it will take to process,
// which may mean you exceed the execution time before you can do any post processing.
//
// For example, you may want to record the last row you exported in the event the export must be halted
// prematurely. You could create a series of Triggers to re-initiate the export, picking up right where
// you left off.
//
// The other consideration is that you want your GAS memory utilization to remain as low as possible to
// keep things running smoothly and quickly, so try to strike a balance that fits the data you're
// working with.
var maxRecordsPerBatch = 1000;
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getSheetByName(sheetName);
var sheetData = sheet.getDataRange().getValues();
var dbConnection = Jdbc.getConnection(dbURL, dbUser, dbPassword);
// The following only needs to be set when you are changing the statement that needs to be prepared
// or when you need to reset the variable.
//
// For example, if you were to switch to a different sheet which may have different values, columns,
// structure, and/or target database table.
var dbStatement = dbConnection.prepareCall(sql);
var RecId;
var Code;
var ProductDescription;
var Price;
var recordCounter = 0;
var lastRow;
dbConnection.setAutoCommit(false);
for (var i = 1; i < sheetData.length; i++) {
lastRow = (i + 1 == sheetData.length ? true : false);
RecId = sheetData[i][0];
Code = sheetData[i][1];
ProductDescription = sheetData[i][2];
Price = sheetData[i][3];
dbStatement.setString(1, RecId);
dbStatement.setString(2, Code);
dbStatement.setString(3, ProductDescription);
dbStatement.setString(4, Price);
// This command takes what has been set above and adds the request to the array that will be sent
// to the database for processing.
dbStatement.addBatch();
recordCounter += 1;
if (recordCounter == maxRecordsPerBatch || lastRow)
{
try {
dbStatement.executeBatch();
}
catch(e)
{
console.log('Attempted to update TABLE `' + dbTableName + '` in DB `' + dbName + '`, but the following error was returned: ' + e);
}
if (!lastRow)
{ // Reset vars
dbStatement = dbConnection.prepareCall( sql ); // Better to reset this variable to avoid any potential "No operations allowed after statement closed" errors
recordCounter = 0;
}
}
}
dbConnection.commit();
dbConnection.close();
}
The OP may still have run up against the execution time limit (I did at less than 10k records), but you should avoid batching individual requests unless you're having trouble locating a problem row.
From this link
It is important to keep in mind, that each update added to a Statement
or PreparedStatement is executed separately by the database. That
means, that some of them may succeed before one of them fails. All the
statements that have succeeded are now applied to the database, but
the rest of the updates may not be. This can result in an inconsistent
data in the database.
To avoid this, you can execute the batch update inside a JDBC
transaction. When executed inside a transaction you can make sure that
either all updates are executed, or none are. Any successful updates
can be rolled back, in case one of the updates fail.
Alternative Solution
If the time limit is a huge bother, you might try externally accessing the data within your Sheets. I've copied the basic instructions for posterity's sake, but please visit the link if it still works.
Link to source
Update composer.json to require “google/apiclient”: “^2.0” and run composer update
Create project on https://console.developers.google.com/apis/dashboard.
Click Enable APIs and enable the Google Sheets API
Go to Credentials, then click Create credentials, and select Service account key
Choose New service account in the drop down. Give the account a name, anything is fine.
For Role I selected Project -> Service Account Actor
For Key type, choose JSON (the default) and download the file. This file contains a private key so be very careful with it, it is your credentials after all
Finally, edit the sharing permissions for the spreadsheet you want to access and share either View (if you only want to read the file) or Edit (if you need read/write) access to the client_email address you can find in the JSON file.
<?php
require __DIR__ . '/vendor/autoload.php';
/*
* We need to get a Google_Client object first to handle auth and api calls, etc.
*/
$client = new \Google_Client();
$client->setApplicationName('My PHP App');
$client->setScopes([\Google_Service_Sheets::SPREADSHEETS]);
$client->setAccessType('offline');
/*
* The JSON auth file can be provided to the Google Client in two ways, one is as a string which is assumed to be the
* path to the json file. This is a nice way to keep the creds out of the environment.
*
* The second option is as an array. For this example I'll pull the JSON from an environment variable, decode it, and
* pass along.
*/
$jsonAuth = getenv('JSON_AUTH');
$client->setAuthConfig(json_decode($jsonAuth, true));
/*
* With the Google_Client we can get a Google_Service_Sheets service object to interact with sheets
*/
$sheets = new \Google_Service_Sheets($client);
/*
* To read data from a sheet we need the spreadsheet ID and the range of data we want to retrieve.
* Range is defined using A1 notation, see https://developers.google.com/sheets/api/guides/concepts#a1_notation
*/
$data = [];
// The first row contains the column titles, so lets start pulling data from row 2
$currentRow = 2;
// The range of A2:H will get columns A through H and all rows starting from row 2
$spreadsheetId = getenv('SPREADSHEET_ID');
$range = 'A2:H';
$rows = $sheets->spreadsheets_values->get($spreadsheetId, $range, ['majorDimension' => 'ROWS']);
if (isset($rows['values'])) {
foreach ($rows['values'] as $row) {
/*
* If first column is empty, consider it an empty row and skip (this is just for example)
*/
if (empty($row[0])) {
break;
}
$data[] = [
'col-a' => $row[0],
'col-b' => $row[1],
'col-c' => $row[2],
'col-d' => $row[3],
'col-e' => $row[4],
'col-f' => $row[5],
'col-g' => $row[6],
'col-h' => $row[7],
];
/*
* Now for each row we've seen, lets update the I column with the current date
*/
$updateRange = 'I'.$currentRow;
$updateBody = new \Google_Service_Sheets_ValueRange([
'range' => $updateRange,
'majorDimension' => 'ROWS',
'values' => ['values' => date('c')],
]);
$sheets->spreadsheets_values->update(
$spreadsheetId,
$updateRange,
$updateBody,
['valueInputOption' => 'USER_ENTERED']
);
$currentRow++;
}
}
print_r($data);
/* Output:
Array
(
[0] => Array
(
[col-a] => 123
[col-b] => test
[col-c] => user
[col-d] => test user
[col-e] => usertest
[col-f] => email#domain.com
[col-g] => yes
[col-h] => no
)
[1] => Array
(
[col-a] => 1234
[col-b] => another
[col-c] => user
[col-d] =>
[col-e] => another
[col-f] => another#eom.com
[col-g] => no
[col-h] => yes
)
)
*/
Try to check this related SO question for some information on how to import data from Google Spreadsheets into MySQL using an Apps Script code.
Now, for your error exceeded maximum execution time exception, remember that the Apps Script quotas have only a maximum execution time for a single script of 6 mins / execution. So it means that you exceeded this limit.
Try to check this page for the tecnique on how to prevent Google Scripts from exceeding the maximum execution time limit.
For more information, check this links:
Exceeded maximum execution time in Google Apps Script
Google app script timeout ~ 5 minutes?
Related
I have around 300 Spreadsheets that I need to copy all data from each spreadsheet and merge into a Master Spreadsheet. I have a spreadsheet that lists all 300 spreadsheet Ids. This script works however its Very slow!
I also tried to manually enter all document Ids as a variable and it did not seem to make a difference.
Is there a better way to handle?
function combineData() {
const masterID = "ID";
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
let targetSheets = docIds();
for (let i = 0, len = targetSheets.length; i < len; i++) {
let sSheet = SpreadsheetApp.openById(targetSheets[i]).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
}
}
function docIds() {
let listOfId = SpreadsheetApp.openById('ID').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfID.getDataRange().getValues()
let arrayId = []
for (let i = 1, len = values.length; i < len; i++) {
let data = values[i];
let ssID = data[1];
arrayId.push(ssID)
}
return arrayId
}
I believe your goal is as follows.
You have 300 Spreadsheets.
You want to retrieve the values from the 1st tab of all Spreadsheets and also, you want to put the retrieved values to the 1st tab of the master Spreadsheet.
You want to reduce the process cost of the script.
Issue and workaround:
In the current stage, unfortunately, there is no method for retrieving the values from multiple Spreadsheets, simultaneously. If the sample script is prepared, it is required to obtain the values from each spreadsheet in a loop. In this case, the process cost becomes high. I think that this might be the reason for your current issue.
In this answer, as another approach, I would like to propose the following flow.
Create the URL list for exporting the values from Spreadsheets.
In the current stage, when Sheets API is used in a loop, an error occurs. So, in this workaround, I use the URL for exporting Spreadsheet as CSV data. In this case, it seems that even when this URL is accessed with a loop, no error occurs.
Retrieve CSV values from the URLs using UrlFetchApp.fetchAll.
fetchAll method works with the asynchronous process. Ref (Author: me)
Merge the retrieved values by parsing CSV data as an array.
Put the values to the master Spreadsheet using Sheets API.
By this flow, I thought that the process cost can be reduced. When this flow is reflected in a sample script, how about the following sample script?
Sample script:
Please set masterID and ssId. And, please enable Sheets API at Advanced Google services. And, please run myFunction.
function myFunction() {
const masterID = "###"; // Please set the master Spreadsheet ID.
const ssId = "###"; // Please set the Spreadsheet ID including the Spreadsheet IDs you want to retrieve in column "B".
// Retrieve Spreadsheet IDs.
const sheet = SpreadsheetApp.openById(ssId).getSheets()[0];
const ssIds = sheet.getRange("B2:B" + sheet.getLastRow()).getDisplayValues().reduce((ar, [b]) => {
if (b) ar.push(b);
return ar;
}, []);
// Retrieve values from all Spreadsheets.
const workers = 50; // Please adjust this value.
const headers = { authorization: "Bearer " + ScriptApp.getOAuthToken() };
const reqs = [...Array(Math.ceil(ssIds.length / workers))].map(_ => ssIds.splice(0, workers).map(id => ({ url: `https://docs.google.com/spreadsheets/export?exportFormat=csv&id=${id}`, headers, muteHttpExceptions: true })));
const values = reqs.flatMap(r =>
UrlFetchApp.fetchAll(r).flatMap(rr => {
if (rr.getResponseCode() == 200) {
const [, ...val] = Utilities.parseCsv(rr.getContentText());
return val;
}
return [];
})
);
// Put values to the master sheet.
const masterSheet = SpreadsheetApp.openById(masterID).getSheets()[0];
Sheets.Spreadsheets.Values.update({ values }, masterID, `'${masterSheet.getSheetName()}'!A${masterSheet.getLastRow() + 1}`, { valueInputOption: "USER_ENTERED" });
// DriveApp.getFiles(); // This comment line is used for automatically detecting the scope for Drive API. So, please don't remove this line.
}
When this script is run,
Spreadsheet IDs are retrieved from column "B" of the 1st sheet in the Spreadsheet of ssId.
Values are retrieved from all Spreadsheets.
In this script, the values are retrieved from every 50 Spreadsheets with the asynchronous process. If you increase const workers = 50; to const workers = 100;, the values are retrieved from every 100 Spreadsheets. But, if an error occurs when this value is increased, please adjust the value.
Put values using Sheets API.
When I tested this script for 50 Spreadsheet, the processing time was about 20 seconds. But, I'm not sure about your actual situation. So, please test this script.
Note:
In your script, listOfID is not declared. Please be careful about this.
Unfortunately, I cannot know your all Spreadsheets. So, if all values are more than 10,000,000 cells, an error occurs because of the maximum number of cells in a Spreadsheet. Please be careful about this.
If the number of values is large, an error might occur. At that time, please check my report.
References:
fetchAll(requests)
Method: spreadsheets.values.update
The .setValues() and .getValues() function themselves already run quite heavily specially if you have large data in the sheet, and using it together with for loop will really cause it to be slow since it iterates over 1 by 1. How about changing the for loop to forEach()
Try:
function combineData() {
const masterID = "1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA";
const masterSheet = SpreadsheetApp.openById(masterID).getSheetByName("Master");
let targetSheets = docIds();
targetSheets.forEach(function(x){
let sSheet = SpreadsheetApp.openById(x).getActiveSheet();
let sData = sSheet.getDataRange().getValues();
sData.shift() //Remove header row
if (sData.length > 0) { //Needed to add to remove errors on Spreadsheets with no data
let fRow = masterSheet.getRange("A" + (masterSheet.getLastRow())).getRow() + 1;
let filter = sData.filter(function (row) {
return row.some(function (cell) {
return cell !== ""; //If sheets have blank rows in between doesnt grab
})
})
masterSheet.getRange(fRow, 1, filter.length, filter[0].length).setValues(filter)
}
})
}
function docIds() {
let listOfId = SpreadsheetApp.openById('1aRQ7rW9tGF25xdmjAfOtT6HtyZKQq0_AIYOGSZMKOcA').getSheets()[0]; //list of 300 Spreadsheet IDs
let values = listOfId.getDataRange().getValues();
values.shift()
let arrayId = []
values.forEach(function(val){
let data = val;
let ssID = data[1];
arrayId.push(ssID)
})
return arrayId
}
Also here are some of the best practices to improve the performance of the script: Best Practices
More details on forEach:
forEach()
Let me know if this helps!
Use the Sheets API, depending on the data it is an order of magintude faster than the native SpreadsheetApp. Add the Google Sheets API under Services in the left pane of the Apps Script editor.
Here is a code snipped of how we use one or the other API:
if(gridData && gridHeight) {
let range = sheet.getRange(startRow, 1, gridHeight, gridData[0].length);
if(useSheetsAPI) {
try {
SpreadsheetApp.flush();
let valueRange = Sheets.newValueRange();
valueRange.values = gridData;
let idAndName = getSpreadsheetIdAndSheetNameByName_(sheetName);
let rangeA1 = idAndName.sheetName + '!' + range.getA1Notation();
let options = { valueInputOption: 'USER_ENTERED' };
let result = Sheets.Spreadsheets.Values.update(valueRange, idAndName.spreadsheetId, rangeA1, options);
debugLog_('sheetReplace(): Sheets.Spreadsheets.Values.update result: '+result);
} catch (err) {
Logger.log('sheetReplace() ERROR: %s', err.message);
return 'ERROR: sheetReplace() failed: ' + err.message;
}
} else {
range.setValues(gridData);
}
}
/**
* Get spreadsheet Id and sheet name by sheet name
*
* #param {string|null} name name of sheet, either "sheet_id:Tab Name", "Tab Name"
* #return {object} object object with spreadsheetId and sheetName
*/
function getSpreadsheetIdAndSheetNameByName_(name) {
let spreadsheetId = '';
if(name && name.length > 44 && name.indexOf(':') > 40) {
// assume format: "sheet_id:Tab Name"
spreadsheetId = name.replace(/:.*$/, '');
name = name.replace(/^.*?:/, '');
} else {
// assume format "Tab Name"
spreadsheetId = SpreadsheetApp.getActiveSpreadsheet().getId();
}
return { spreadsheetId: spreadsheetId, sheetName: name };
}
Also, I submitted an enhancement request for better performance, see https://issuetracker.google.com/issues/222337394 and vote for it.
This question already has answers here:
You do not have permission to call openById
(5 answers)
Dynamic cell position; No permission to call setValues
(1 answer)
Closed 9 months ago.
I have 2 tabs in my Google Sheet:
dashboard:
db:
On dashboard in column B I call function yahoofinance(). This function checks if for the given ticker there exists data in db. If so, this data is returned. If not, OR if so but the data is empty, Yahoo! Finance is contacted to retrieve the data. So far so good.
Take JPM as an example now. It is called in row 3 in dashboard. In db we do find JPM but there is no data for the ticker, so we retrieve it live from Yahoo! Finance. Subsequently, I want to update the JPM row in db with this data, so that next time we open the dashboard, we do not contact Yahoo! again for this information.
However, see line under // update existing row.. the code generates an error Exception: You do not have permission to call setValues and I do not know how to solve it. Do you? Any help is greatly appreciated!
function yahoofinance(ticker) {
// First check if we have this data stored already
var db = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('db');
var tickers = db.getRange('A2:A').getValues();
var stored = false;
var row = 2;
for (var r = 0; r <= tickers.length; r++) {
if (tickers[r] == ticker) { stored = true; row = row + r; }
}
if (stored == true) { // the ticker is known in db
var range = db.getRange(row, 2, 1, 4);
if (range.isBlank()) { // ticker is known but no data yet
var data = get_live_data(ticker);
// update existing row
db.getRange(row, 2).setValues(data);
// return data
return data;
}
else {
return range.getValues();
}
}
else {
var data = get_live_data(ticker);
// append row to db
// return data to sheet
return data;
}
}
function get_live_data(ticker) {
const url = 'https://query2.finance.yahoo.com/v10/finance/quoteSummary/' + encodeURI(ticker) + '?modules=price,assetProfile,summaryDetail';
let response = UrlFetchApp.fetch(url, { muteHttpExceptions: true });
if (response.getResponseCode() == 200) {
var object = JSON.parse(response.getContentText());
}
let fwdPE = object.quoteSummary.result[0]?.summaryDetail?.forwardPE?.fmt || '-';
let sector = object.quoteSummary.result[0]?.assetProfile?.sector || '-';
let mktCap = object.quoteSummary.result[0]?.price?.marketCap?.fmt || '-';
return [[fwdPE, sector, mktCap]];
}
So based on line under // update existing row, you want to add the data in the other sheet and based on documentation, custom functions return values but will only set values in the current cell where you're using the function, you can't modify data in other cell and that's why you're getting the error. You're calling the function in B4 and are trying to set values in other sheet at the same time which is not allowed.
A custom function cannot affect cells other than those it returns a
value to. In other words, a custom function cannot edit arbitrary
cells, only the cells it is called from and their adjacent cells.
I have google alerts set up and receive alert emails which look like this:
I would like to parse the alert and save it in a google sheet with the format: alert_name,publication_date, title,news source
Not sure where to start any help/guidance appreciated
Thanks
You can use the gmail api with appscript the way GmailApp works is very similar to how the gmail api works.
The first thing you should do is create a search in the gmail web app which would return just the messages you are looking for something like this
var threads = GmailApp.search('from:(googlealerts-noreply#google.com)');
for(var i=0; i<threads.length; i++)
{
messages = threads[i].getMessages();
for(var j=0; j<messages.length; j++)
{
var date = messages[j].getDate();
var body = messages[j].getPlainBody();
var start = body.indexOf('<table');
var end = body.indexOf('</table>');
}
}
}
}
}
The body comes back in html format so your going to have to do some cleaning on it to find the text you want. Once you find the text you want you can just write it out to a sheet using SpreadsheetApp
function WriteToSheet(date, value){
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow([date, value]);
}
This code is from a script i use to scan my one of my own emails for some text.
Although I share the opinion that #Ruben has given in the comments, I think this topic is interesting and could help other users to save and manage their Google Alerts.
Code.gs
/* Retrieve all news from googlealert source */
const threads = GmailApp.search('from:(googlealerts-noreply#google.com)')
/* Our sheet for save the news */
const sS = SpreadsheetApp.openById(SS_ID).getSheetByName('Google Alerts')
/* Control the already added answers */
let addedNews = []
try { addedNews = sS.getRange('A1:A' + sS.getLastRow()).getValues().flat() } catch (err) { }
function parseContent() {
const totalNews = []
/* Maybe add a control system for remove the threads already saved */
threads.forEach((th) => {
const msgs = th.getMessages()
msgs.forEach((msg) => {
/* Divide the content in new lines an parse the content */
const body = msg.getPlainBody().split('\n')
/* Extract the filter name eg === News - 2 new results for [python] === */
const filterName = body.slice(0, 1)[0].match(/\[(.*?)\]/)[1]
const date = msg.getDate()
/* Remove the unnecessary lines */
const cleanedBody = body.slice(1, -11)
/* Detect the news via empty new lines "\r" */
const newsIdxs = cleanedBody.reduce((pre, curr, idx) => {
curr === "\r" && pre.push(idx)
return pre
}, [])
newsIdxs.forEach((v, idx, arr) => {
if (idx == arr.length - 1) return
/* From one empty line to the nex */
const parsedNew = cleanedBody.slice(v + 1, arr[idx + 1])
/* Simply extracted from the first line */
const title = parsedNew[0].split('|')[0]
/* Last line and between <> */
const url = parsedNew[parsedNew.length - 1].match(/<(.*?)>/)[1]
/* Extracted from the URL rather than the title due variability */
const source = url.match(/url=https:\/\/(.*?)\//)[1]
totalNews.push({ title, url, date, source, filterName })
})
})
})
totalNews.forEach((nw) => {
/* Hash the object for preventing adding already present */
const id = hashCode(Object.values(nw).toString())
if (addedNews.includes(id)) return
sS.appendRow([id, ...Object.values(nw)])
})
}
/* Extracted from here https://stackoverflow.com/questions/7616461/generate-a-hash-from-string-in-javascript */
const hashCode = s => s.split('').reduce((a, b) => { a = ((a << 5) - a) + b.charCodeAt(0); return a & a }, 0)
Results
Note 1: This script is an approximation of the problem, and has only been tested for News related alerts.
Note 2: Thanks to #DalmTo for the pseudo-code, it has helped me to approach the problem more quickly.
Note 3: The hashCode function has been extracted from here
Note 4: I have decided to take an approach using RegExp due to the use of getPlainBody(), but I think that in this case, using a library that allows parsing HTML with getBody() would be easier to implement.
Is there a way to have a selection of many transactions printed into a single PDF document? I only see two options which seem to have significant drawbacks:
1) Load individual records into each of their own nlobjTemplateRenderer objects, and then stitch them all together within tags before rendering to PDF. Has a limit of less than 50 transactions depending on other actions taken when used within a Suitelet.
2) Do a search based upon internals IDs of selected records and pass the search results into a nlobjTemplateRenderer object. This method, based upon existing documentation, does not lead me to believe that it will properly display records with line data as result columns completely within a single document.
It almost seems like my best option is #1, but to split the desired transaction up into groups of 5-10 records and repeatedly calling a Suitelet with the small groups in the hopes of meeting the 45-second timeout limit of nlapiRequestURL before stitching together all of the results and returning the final PDF document. I pretty much see a basic form of that as the following:
// initial called function that will return completed PDF document file
function buildPdfFromRecords() {
var pdfBuilder = [];
var selectedIDs = [];
var chunks = chunkify(selectedIDs, 10);
for (var c = 0; c < chunks.length; c++) {
var param = { id_list : JSON.stringify(chunks[s]) };
var result = nlapiRequestURL(url, param).getBody();
pdfBuilder.push(result);
}
var finalXML = "<pdfset>" + pdfBuilder.join("") + "</pdfset>";
var pdfDoc = nlapiXMLToPDF(finalXML);
}
// function in suitelet called by url to handle individual groups of record internal IDs
// to mitigate scripting governance limits
function handleRecordIdListRequest(request, reponse) {
var idList = JSON.parse(request.getParameter("id_list"));
var templateXML = nlapiLoadRecord("template.txt").getValue();
var pdfBuilder = [];
for (var i = 0; i < idList.length; i++) {
var transRecord = nlapiLoadRecord("recordtype", idList[i]);
var renderer = nlapiCreateTemplateRenderer();
renderer.setTemplate(templateXML);
renderer.addRecord("record", transRecord);
pdfBuilder.push(renderer.renderToString());
}
response.write(pdfBuilder.join(""));
}
If this is really the best way, then so be it, but I'm hoping there's a more elegant solution out there that I'm just not seeing.
There are a number of pieces you can stitch together to get this done.
In the post handler of your Suitelet use the N/task library to schedule a map/reduce task. The task.submit method returns a taskId that you can use to monitor the progress of your job. Once your UI has a taskId it can periodically check to see if the task has completed. When complete you can show the generated .pdf. You could also let the user know that the pdf might take a few minutes to generate and offer to email it to them when done. Here's a snippet that schedules a scheduled script with parameters:
const mrTask = task.create({
taskType:task.TaskType.SCHEDULED_SCRIPT,
scriptId:'customscript_knsi_batch_products',
deploymentId: deploymentId,
params: {
custscript_knsi_batch_operator:user.id,
custscript_knsi_batch_sourcing: sourcingId
}
});
try{
const taskId = mrTask.submit();
context.response.setHeader({name:'content-type', value:'application/json'});
context.response.write(JSON.stringify({
success:true,
message:'queued as task: '+ taskId
}));
}catch(e){
log.error({
title:'triggering '+ sourcingId +' for '+ user.email,
details:(e.message || e.toString()) + (e.getStackTrace ? (' \n \n' + e.getStackTrace().join(' \n')) : '')
});
context.response.setHeader({name:'content-type', value:'application/json'});
context.response.write(JSON.stringify({
success:false,
message:'An error occured scheduling this script\n'+e.message
}));
Use a Map/Reduce script where your map method generates and returns each transaction's pdf file url. You'll only have a single key so that the results of all map stages coalesce into a single reduce.
In the reduce step you can generate open and close pdf files as necessary and put their references into your mapped array of pdfs.
Use the pdfset to bind all your individual pdfs into a single pdf:
function renderSet(opts){
var tpl = ['<?xml version="1.0"?>','<pdfset>'];
opts.files.forEach(function(id, idx){
const partFile = file.load({id:id});
var pdf_fileURL = xml.escape({xmlText:partFile.url});
tpl.push("<pdf src='" + pdf_fileURL + "'/>");
});
tpl.push("</pdfset>");
log.debug({title:'bound template', details:xml.escape({xmlText:tpl.join('\n')})});
return render.xmlToPdf({
xmlString: tpl.join('\n')
});
}
Why not use a Map Reduce script to generate the PDF? Does it need to be a Suitelet?
I have a working script that upon form submit, specific rows move from one sheet to another. One of the fields I'm pushing is a url.
On the second sheet, the link is listed and it is hyperlinked, but it's really ugly and I really want to format it so that it shows "Edit" with a hyperlink. I've tried a number of ways, but my knowledge is limited so all I get are errors. I'm hoping someone can point me in the right direction.
Here is my code. I'm very new at this so the script is not at all sophisticated. Any help/suggestions would be appreciated!
function copyAdHoc(){
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sh = SpreadsheetApp.setActiveSheet(ss.getSheetByName("Form Responses 1"));
var data = sh.getRange(2, 1, sh.getLastRow() - 1, sh.getLastColumn()).getValues();
// Grab the Headers from master sheet
var headers = sh.getRange(1,1,1,sh.getLastColumn()).getValues();
var date = headers[0].indexOf('Effective Date');
var name = headers[0].indexOf('Employee Name');
var loc = headers[0].indexOf('Location');
var issue = headers[0].indexOf('Description/Question/Issue');
var add = headers[0].indexOf('Additional Information');
var change = headers[0].indexOf('Is this a Qualifying Life Event?');
var url = headers[0].indexOf('Form URL');
var category = headers[0].indexOf('Primary Category');
var status = headers[0].indexOf('Current Status');
var users = headers[0].indexOf('Users');
// Grab only the relevant columns
for(n = 0; n < data.length; ++n ) { // iterate in the array, row by row
if (data[n][change] !== "Yes" & data[n][category] !== "Employee Relations" & data[n][date] !== "") { // if condition is true copy the whole row to target
var arr = [];
arr.push(data[n][url]);
arr.push(data[n][users]);
arr.push(data[n][date]);
arr.push(data[n][loc]);
arr.push(data[n][name]);
arr.push(data[n][category]);
arr.push(data[n][issue] + ". " + data[n][add]);
arr.push(data[n][status]);
var sh2 = SpreadsheetApp.setActiveSheet(ss.getSheetByName("Ad Hoc")); //second sheet of your spreadsheet
sh2.getRange(sh2.getLastRow()+1,2,1,arr.length).setValues([arr]); // paste the selected values in the 2cond sheet in one batch write
}
}
}
It's a bit messy but the only way I know to achieve what you're trying to do would be to insert a column to the left of the hyperlink with the word Edit right justified and then remove the borders between the two.
From your description I am assuming you want the word "Edit" to be Hyperlinked. To do so, try this:
function getHyperlink(url)
{
return "=HYPERLINK(\""+url+"\","+"\"Edit\""+")";
}
function mainFunct()
{
//Do necessary steps
var tarLink = "https://www.google.com";
var tarRng = tarSheet.getRange(rowNum, colNum).setValue(getHyperlink(tarLink));
//perform other steps
}
EDIT:
Forgot to mention, since you're pushing your values to the array... you can do it in a similar way by either just storing the hyperlink in a variable or directly pushing it to the array like all the other values. Or if you're dealing with a hyperlink that has a static and dynamic part, For example: https://stackoverflow.com/questions/post_id, where post_id keeps changing but most of the URL is static, you can easily handle it by just passing the post_id to the getHyperlink function and getting the required Hyperlink in return. Hope this helps.