how to propely assemble csv file to avoid column shift

how to propely assemble csv file to avoid column shift - javascript

in my node app, I'm trying to clean up a csv file.
first, I split it into separate lines
then I replace unwanted characters in the first line (the column headers)
then I re-assemble the file by pushing individual lines into a new array, and writing that array to a new .csv file
For some reason, all my rows ending up being shifted by 1 position with respect to the header row.
I have opened the resulting file in a vu editor, and can see, that all rows somehow acquired a "," character at the besieging
I know I'm doing something incorrectly, but can not see what that is.
Here is my code:
var XLSX = require('xlsx');
var fs = require('fs');
var csv = require("fast-csv");
var workbook = XLSX.readFile('Lineitems.xls');
var worksheet = workbook.Sheets['Sheet1'];
var csv_conversion = XLSX.utils.sheet_to_csv(worksheet);
var csv_lines = csv_conversion.split('\n');
var dirtyHeaderLine = csv_lines[0];
var cleanHeaderLine = dirtyHeaderLine.replace(/\./g,"")
.replace(/"'"/g,"")
.replace(/","/g,"")
.replace(/"\/"/g,"")
.replace(/"#"/g,"");
cleanHeaderLine = cleanHeaderLine.replace(/,+$/, "");
console.log(cleanHeaderLine);
csv_lines[0] = cleanHeaderLine;
var newCsvLines = [];
csv_lines.forEach(function(line){
newCsvLines.push(line + "\n");
});
fs.writeFile('clean_file.csv', newCsvLines, function(err) {
if (err) throw err;
console.log('clean file saved');
});

I don't see any glaring errors here (maybe something with your regex? Not an expert on those) but this will solve your issue.
if (line.charAt(0) == ',') { line = line.substring(1); }
Adjust your variables accordingly. I don't think I have the same case as you.
EDIT: Here's a JSBin of it working.
http://jsbin.com/mirocedagi/1/edit?html,js,output

Related

Line Break Causing Error Importing CSV from Google Drive to Google Sheets

I'm working to automate the flow of data that we use to power some of our analytics/reporting. To summarize, I have a CSV in Google Drive that needs to be imported into a Google Sheet. The CSV contains line breaks in some of the "cells" that is causing the import to be completely askew / out of line. To be clear, I can get the data, but it is misaligned due to the line breaks in the CSV.
I started with code from Ctrl:
function importCSVFromGoogleDrive() {
var file = DriveApp.getFilesByName("tweet_activity.csv").next();
var csvData = Utilities.parseCsv(file.getBlob().getDataAsString());
var sheet = SpreadsheetApp.getActiveSheet();
sheet.getRange(1, 1, csvData.length, csvData[0].length).setValues(csvData);
}
I quickly discovered that, while the code did import the file, the line breaks ruined the alignment of the data. Upon inspection, it's clear that the line breaks are the genesis of the issue.
I scoured the forums and found some possible fixes ([here](Saving as CSV through google script handling newline characters
) and [here](Apps Script Utilities.parseCsv assumes new row on line breaks within double quotes
) for instance)
I've tried:
function importCSVFromGoogleDrive_v2() {
var file = DriveApp.getFilesByName("tweet_activity.csv").next();
var NewFile = file.replace("\n", " ").replace("\r", " ");
var csvData = Utilities.parseCsv(NewFile.getBlob().getDataAsString());
var sheet = SpreadsheetApp.getActiveSheet();
sheet.getRange(1, 1, csvData.length, csvData[0].length).setValues(csvData);
}
This resulted in "TypeError: Cannot find function replace in object tweet_activity.csv. (line 42, file "Code")"
I also tried replacing the line breaks after the parse.
function importCSVFromGoogleDrive_v3() {
var file = DriveApp.getFilesByName("tweet_activity_metrics_downtownstlouis_20190303_20190331_en.csv").next();
var csvData = Utilities.parseCsv(file.getBlob().getDataAsString());
var csvDataRev = csvData.replace("\n", " ").replace("\r", " ");
var sheet = SpreadsheetApp.getActiveSheet();
sheet.getRange(1, 1, csvData.length, csvData[0].length).setValues(csvData);
}
This resulted in "TypeError: Cannot find function replace in object"
Finally, I tried different replace code (I modified for my purposes, but this was the pertinent part):
var dataString = myBlob().getDataAsString();
var escapedString = dataString.replace(/(?=["'])(?:"[^"\](?:\[\s\S][^"\])"|'[^'\]\r\n(?:\[\s\S][^'\]\r\n)')/g, function(match) { return match.replace(/\r\n/g,"\r\n")} );
var csvData = Utilities.parseCsv(escapedString);
No dice.
Any suggestions on how to deal with the line breaks in this scenario?

Convert path string from unc to uri, replacing slashes with backslashes in Google Apps Script

I need to convert a path this UNC. I have searched and search and cannot piece anything together.
"\\NAS_01\GlobalShare\Docs\Customers\2017\S\Smith, John\photo1.jpg"
I need to remove the "\NAS_01\GlobalShare\Docs\Customers\" portion of the path and also "photo1.jpg" and end up with:
2017\S\Smith, John\
so that I can pass it to the following function:
function getDriveFolderNoCreate(path, rootFolder) {
var name, folder, search, fullpath;
// Remove extra slashes and trim the path
fullpath = path.replace(/^\/*|\/*$/g, '').replace(/^\s*|\s*$/g, '').split("/");
// Always start with the main Drive folder
folder = rootFolder;
for (var subfolder in fullpath) {
name = fullpath[subfolder];
search = folder.getFoldersByName(name);
if (search.hasNext()) {
var folder = search.next;
var folderID = folder.getId();
return folderID;
}
}
}
My intention is to return a url to open the Google Drive folder with the same path.

I ended up with a multi-part solution that works very well.
I paste the fill UNC path to cell B2.
This formula is in B3 =Index(split(B2, "\"), 0, 8)
It returns the exact folder name i need.
Then in my gs file:
function findDriveFolder() {
var pFId = "XYZ1233333333333";
var input = SpreadsheetApp.getActive().getRange("B3").getValue();
var folders = DriveApp.getFoldersByName(input);
Logger.log("folders: " + folders[0]);
while (folders.hasNext()) {
var folder = folders.next();
var url = folder.getUrl();
showAnchor("View Folder", url);
}
}
function showAnchor(name,url) {
var html = '<html><body>'+name+'</body></html>';
var ui = HtmlService.createHtmlOutput(html)
SpreadsheetApp.getUi().showModelessDialog(ui,"Files Folder");
}
I have not implemented the searchFolders part yet that I hope will speed it up. At least it's working for now.

Apps Script needs your input backslashes to be escaped if you are writing the string yourself (i.e. testing input).
wrong:
input = "\\NAS_01\GlobalShare\Docs\Customers\2017\S\Smith, John\photo1.jpg"
right:
input = "\\\\NAS_01\\GlobalShare\\Docs\\Customers\\2017\\S\\Smith, John\\photos1.jpg"
In Apps Script then, I am able to get the matching portion with the following regex:
/\d{4}\\[A-Z]\\.+\\/
i.e:
function unc2uri(input) {
const forwardSlash = String.fromCharCode(47);
const backSlash = String.fromCharCode(92);
if(!input)
input = '\\\\NAS_01\\GlobalShare\\Docs\\Customers\\2017\\S\\Smith, John\\photo1.jpg';
// Should show \\NAS_01\GlobalShare\Docs\Customers\2017\S\Smith, John\photo1.jpg
Logger.log(input);
const matcher = /\d{4}\\[A-Z]\\.+\\/;
const arrayOfMatches = input.match(matcher);
// Should show [2017\S\Smith, John\].
Logger.log(arrayOfMatches);
}
To verify, ask for the input string from someplace else (example, Browser.inputBox) and pass that to the above as input:
function readInput() {
unc2uri(Browser.inputBox("What's the path?"));
}
In the inputBox, you would enter the string you expect to be sent, as we view it, i.e. \\NAS_01\GlobalShare\Docs\Customers\2017\S\Smith, John\photo1.jpg

From .txt data give values to inputs

I'm trying to fill some inputs when you load a page, using the data I have from a .txt file. This file has a list of numbers
1
2
3
Something like this. So I wanted to read this lines and put them in their corresponding input. Suggestions on how to do this??
I tried with this code, but maybe I have a mistake that I don't know about, I'm starting with javascript.
function loadvalues()
{
var fso = new ActiveXObject("Scripting.FileSystemObject");
var s = fso.OpenTextFile("E://Steelplanner/Demand_Routing/Pruebas/OrderBalancing_Masivos/ModificaFechaTope/DueDate/Datosactuales.txt", true);
var Ia5 = document.getElementById("Ia5sem");
var text = s.ReadLine();
Ia5.value = text;

Try using file.ReadLine() until document not completely read using while loop with AtEndOfStream of file variable.
Here is example you can refer: ReadLine Method
Don't forget to replace TextFile path to your own text file path
My text file contains same data as in your example
<script type="text/javascript">
var fso = new ActiveXObject("Scripting.FileSystemObject");
//specify the local path to Open and always add escape character else throw error for bad file name
var file = fso.OpenTextFile("C:\\Users\\MY_USER\\Desktop\\txtfile.txt", 1);
var Ia5 = document.getElementById("Ia5sem");
while (!file.AtEndOfStream) {
var r = file.ReadLine();
Ia5.innerHTML = Ia5.innerHTML + ("<br />" + r);
}
file.Close();
</script>
<p id="Ia5sem">HI</p>

So, I don't know why, but I just changed the name of the variables and made a slight change in the .OpenTextFile line and it worked.
function loadvalues()
{
var file = new ActiveXObject("Scripting.FileSystemObject");
var text = file.OpenTextFile("E:\\Steelplanner\\Demand_Routing\\Pruebas\\OrderBalancing_Masivos\\ModificaFechaTope\\DueDate\\Datosactuales.txt", 1,false);
var Ia5s = document.getElementById("Ia5sem");
Ia5s.value = text.ReadLine();
var Ia4s = document.getElementById("Ia4sem");
Ia4s.value = text.ReadLine();
text.close();
}
Anyways, I'm gonna check the FileReader() for future references and the script #Sarjan gave, maybe I can improve it, but I have other things to finish. Thanks for everything.

Removing the last character from file stream in node.js (fs module)

Using node.js, I am trying to build an array of objects and write them to a file. To do this, I'm using the built in fs library.
After calling
var file = fs.createWriteStream('arrayOfObjects.json'); and file.write('[') I run several asynchronous functions to eventually append objects like this:
file.write(JSON.stringify(objectToAppend) + ',\n')
I can determine when all of the objects have stopped appending, and this is where I run file.write(']') and file.end(). My problem is that adding the last comma to the end of the last object causes the JSON to be invalid.
It is very difficult to determine where and when the last object is being created due to the asynchronous nature of the script, so I was wondering if there is a way to strip or remove characters from a file-stream. If so, I could do this before adding the last ']' character.
I could do this manually, but I was hoping to pipe this to another application. The only solution I've thought about is using the fs.truncate() function, however this doesn't seem to work for file streams, and neither file.length or file.length() will give me the length of the contents because it is not a string so it's difficult to determine how or where to truncate the file.
For now I have just been adding '{}]' to the end of the array to make it valid JSON, but this empty object may cause some problems later.
Also note: the array of objects I am writing in this stream is VERY large, so I would rather not end the stream and re-open the file.

I'd recommend to prepend the separator instead, so that you dynamically can adjust it after the first call:
file.write('[\n')
var sep = "";
forEach(function(objectToAppen) {
file.write(sep + JSON.stringify(objectToAppend))
if (!sep)
sep = ",\n";
});

Example using JSONStream:
var JSONStream = require('JSONStream');
var fs = require('fs');
var jsonwriter = JSONStream.stringify();
var file = fs.createWriteStream('arrayOfObjects.json');
// Pipe the JSON data to the file.
jsonwriter.pipe(file);
// Write your objects to the JSON stream.
jsonwriter.write({ foo : 'bar#1' });
jsonwriter.write({ foo : 'bar#2' });
jsonwriter.write({ foo : 'bar#3' });
jsonwriter.write({ foo : 'bar#4' });
// When you're done, end it.
jsonwriter.end();

Here's a snippet incorporating robertklep's answer. This converts from a pipe-separated file to json:
var fs = require('fs');
var readline = require('readline');
var JSONStream = require('JSONStream');
// Make sure we got a filename on the command line.
if (process.argv.length < 3) {
console.log('Usage: node ' + process.argv[1] + ' FILENAME');
process.exit(1);
}
var filename = process.argv[2];
var outputFilename = filename + '.json';
console.log("Converting psv to json. Please wait.");
var jsonwriter = JSONStream.stringify();
var outputFile = fs.createWriteStream(outputFilename);
jsonwriter.pipe(outputFile);
var rl = readline.createInterface({
input: fs.createReadStream(filename),
terminal: false
}).on('line', function(line) {
console.log('Line: ' + line);
if(!/ADDRESS_DETAIL_PID/.test(line))
{
var split = line.split('|');
var line_as_json = { "address_detail_pid": split[0], "flat_type": split[1], "flat_number": split[2], "level_type": split[3], "level_number": split[4], "number_first": split[5], "street_name": split[6], "street_type_code": split[7], "locality_name": split[8], "state_abbreviation": split[9], "postcode": split[10], "longitude": split[11], "latitude": split[12] };
jsonwriter.write(line_as_json);
}
}).on('close', () => {
jsonwriter.end();
});;
console.log('psv2json complete.');

The accepted answer is interesting (prepending the separator) but in my case I have found it easier to append the separator and remove the last character of the file, just as suggested in the question.
This is how you remove the last character of a file with Node.js :
import fs from 'fs'
async function removeLastCharacter(filename) {
const stat = await fs.promises.stat(filename)
const fileSize = stat.size
await fs.promises.truncate(filename, fileSize - 1)
}
explanation :
fs.promises.stat gives us some information about the file, we will use its size.
fs.promises.truncate remove from the file what is after a certain position
We use the position fileSize - 1 which is the last character.
Note :
Yes I know that we need to wait until the stream is closed, but this is ok because truncate and stat functions are very fast and doesn't depend on the file size, it doesn't have to read its content.

javascript to parse csv excel file

I am trying to parse a CSV file I made in Excel. I want to use it to update my Google map. This Google map is in a mobile app that I am developing with Eclipse for Android.
Honestly, I am not sure how to write the JavaScript. Any help will be greatly appreciated. I would be happy to credit your work.
I just want some JavaScript to run when the user hits a button that does the following:
Locates users current location (I have already done this part!)
Locate nearby locations as entered in the .CSV excel file by parsing the .CSV
Display a small link inside every locations notification bubble that says "Navigate" that when the user clicks it, opens google maps app and starts navigating the user to that location from the users current location (Geolocation).
This is the ONLY part I need to finish this application. So once again, any help at all will be greatly appreciated. Thanks everyone!

Honestly, I've been round and round with this problem. The CSV format is not made for easy parsing and even with complicated RegEx it is difficult to parse.
Honestly, the best thing to do is import it into an FormSite or PHPMyAdmin, then re-export the document with a custom separator that is easier to parse than ",". I often use "%%" as the field delimiter and everything works like a charm.

Dont know if this will help but see http://www.randomactsofsentience.com/2012/04/csv-handling-in-javascript.html if it helps...
Additional:
On top of the solution linked to above (my preference) I also used a shed load of stacked regular expressions to token a CSV but it's not as easy to modify for custom error states...
Looks heavy but still only takes milliseconds:
function csvSplit(csv){
csv = csv.replace(/\r\n/g,'\n')
var rows = csv.split("\n");
for (var i=0; i<rows.length; i++){
var row = rows[i];
rows[i] = new Array();
row = row.replace(/&/g, "&");
row = row.replace(/\\\\/g, "\");
row = row.replace(/\\"/g, """);
row = row.replace(/\\'/g, "'");
row = row.replace(/\\,/g, ",");
row = row.replace(/#/g, "#");
row = row.replace(/\?/g, "?");
row = row.replace(/"([^"]*)"/g, "#$1\?");
while (row.match(/#([^\?]*),([^\?]*)\?/)){
row = row.replace(/#([^\?]*),([^\?]*)\?/g, "#$1,$2?");
}
row = row.replace(/[\?#]/g, "");
row = row.replace(/\'([^\']*)\'/g, "#$1\?");
while (row.match(/#([^\?]*),([^\?]*)\?/)){
row = row.replace(/#([^\?]*),([^\?]*)\?/g, "#$1,$2?");
}
row = row.replace(/[\?#]/g, "");
row = row.split(",")
for (var j=0; j<row.length; j++){
col = row[j];
col = col.replace(/?/g, "\?");
col = col.replace(/#/g, "#");
col = col.replace(/,/g, ",");
col = col.replace(/'/g, '\'');
col = col.replace(/"/g, '\"');
col = col.replace(/\/g, '\\');
col = col.replace(/&/g, "&");
row[j]=col;
}
rows[i] = row;
}
return rows;
}

I had this problem which is why I had to come up with this answer, I found on npm a something called masala parser which is indeed a parser combinator. However it didn't run on browsers yet, which is why I am using this fork, the code remains unchanged. Please read it's documentation to understand the Parser-side of the code.
import ('https://cdn.statically.io/gh/kreijstal-contributions/masala-parser/Kreijstal-patch-1/src/lib/index.js').then(({
C,
N,
F,
Streams
}) => {
var CSV = (delimeter, eol) => {
//parses anything beween a string converts "" into "
var innerstring = F.try(C.string('""').returns("\"")).or(C.notChar("\"")).rep().map(a => a.value.join(''));
//allow a string or any token except line delimeter or tabulator delimeter
var attempth = F.try(C.char('"').drop().then(innerstring).then(C.char('"').drop())).or(C.charNotIn(eol[0] + delimeter))
//this is merely just a CSV header entry or the last value of a CSV line (newlines not allowed)
var wordh = attempth.optrep().map(a => (a.value.join('')));
//This parses the whole header
var header = wordh.then(C.char(delimeter).drop().then(wordh).optrep()).map(x => {
x.header = x.value;
return x
})
//allow a string or any token except a tabulator delimeter, the reason why we allow newlines is because we already know how many columns there is, so if there is a newline, it is part of the value.
var attempt = F.try(C.char('"').drop().then(innerstring.opt().map(a=>(a.value.__MASALA_EMPTY__?{value:""}:a))).then(C.char('"').drop())).or(C.notChar(delimeter))
//this is merely just a CSV entry
var word = attempt.optrep().map(a => (a.value[0]?.value??a.value[0]));
//This parses a CSV "line" it will skip newlines if they're enclosed with doublequotation marks
var line = i => C.string(eol).drop().then(word.then(C.char(delimeter).drop().then(word).occurrence(i - 1).then(C.char(delimeter).drop().then(wordh)))).map(a => a.value);
return header.flatMap(a => line(a.header.length - 1).rep().map(b => {
b.header = a.header;
return b
}))
};
var m = {
'tab': '\t',
"comma": ",",
"space": " ",
"semicolon": ";"
}
document.getElementById('button').addEventListener('click', function() {
var val = document.getElementById('csv').value;
var parsedCSV = CSV(m[document.getElementById('delimeter').value], '\n').parse(Streams.ofString(val)).value;
console.log(parsedCSV);
})
})
Type some csv<br>
<textarea id="csv"></textarea>
<label for="delimeter">Choose a delimeter:</label>
<select name="delimeter" id="delimeter">
<option value="comma">,</option>
<option value="tab">\t</option>
<option value="space"> </option>
<option value="semicolon">;</option>
</select>
<button id="button">parse</button>
I would suggest stripping the newlines and the end of the file. Because it might get confused.

This appears to work. You may want to translate the Japanese, but it is very straight-forward to use:
http://code.google.com/p/csvdatajs/

We Keep Coding

JavaScript is the programming language of the Web.

how to propely assemble csv file to avoid column shift - javascript

Related

Line Break Causing Error Importing CSV from Google Drive to Google Sheets

Convert path string from unc to uri, replacing slashes with backslashes in Google Apps Script

From .txt data give values to inputs

Removing the last character from file stream in node.js (fs module)

javascript to parse csv excel file

Categories

Resources