Using node.js, I am trying to build an array of objects and write them to a file. To do this, I'm using the built in fs library.
After calling
var file = fs.createWriteStream('arrayOfObjects.json'); and file.write('[') I run several asynchronous functions to eventually append objects like this:
file.write(JSON.stringify(objectToAppend) + ',\n')
I can determine when all of the objects have stopped appending, and this is where I run file.write(']') and file.end(). My problem is that adding the last comma to the end of the last object causes the JSON to be invalid.
It is very difficult to determine where and when the last object is being created due to the asynchronous nature of the script, so I was wondering if there is a way to strip or remove characters from a file-stream. If so, I could do this before adding the last ']' character.
I could do this manually, but I was hoping to pipe this to another application. The only solution I've thought about is using the fs.truncate() function, however this doesn't seem to work for file streams, and neither file.length or file.length() will give me the length of the contents because it is not a string so it's difficult to determine how or where to truncate the file.
For now I have just been adding '{}]' to the end of the array to make it valid JSON, but this empty object may cause some problems later.
Also note: the array of objects I am writing in this stream is VERY large, so I would rather not end the stream and re-open the file.
I'd recommend to prepend the separator instead, so that you dynamically can adjust it after the first call:
file.write('[\n')
var sep = "";
forEach(function(objectToAppen) {
file.write(sep + JSON.stringify(objectToAppend))
if (!sep)
sep = ",\n";
});
Example using JSONStream:
var JSONStream = require('JSONStream');
var fs = require('fs');
var jsonwriter = JSONStream.stringify();
var file = fs.createWriteStream('arrayOfObjects.json');
// Pipe the JSON data to the file.
jsonwriter.pipe(file);
// Write your objects to the JSON stream.
jsonwriter.write({ foo : 'bar#1' });
jsonwriter.write({ foo : 'bar#2' });
jsonwriter.write({ foo : 'bar#3' });
jsonwriter.write({ foo : 'bar#4' });
// When you're done, end it.
jsonwriter.end();
Here's a snippet incorporating robertklep's answer. This converts from a pipe-separated file to json:
var fs = require('fs');
var readline = require('readline');
var JSONStream = require('JSONStream');
// Make sure we got a filename on the command line.
if (process.argv.length < 3) {
console.log('Usage: node ' + process.argv[1] + ' FILENAME');
process.exit(1);
}
var filename = process.argv[2];
var outputFilename = filename + '.json';
console.log("Converting psv to json. Please wait.");
var jsonwriter = JSONStream.stringify();
var outputFile = fs.createWriteStream(outputFilename);
jsonwriter.pipe(outputFile);
var rl = readline.createInterface({
input: fs.createReadStream(filename),
terminal: false
}).on('line', function(line) {
console.log('Line: ' + line);
if(!/ADDRESS_DETAIL_PID/.test(line))
{
var split = line.split('|');
var line_as_json = { "address_detail_pid": split[0], "flat_type": split[1], "flat_number": split[2], "level_type": split[3], "level_number": split[4], "number_first": split[5], "street_name": split[6], "street_type_code": split[7], "locality_name": split[8], "state_abbreviation": split[9], "postcode": split[10], "longitude": split[11], "latitude": split[12] };
jsonwriter.write(line_as_json);
}
}).on('close', () => {
jsonwriter.end();
});;
console.log('psv2json complete.');
The accepted answer is interesting (prepending the separator) but in my case I have found it easier to append the separator and remove the last character of the file, just as suggested in the question.
This is how you remove the last character of a file with Node.js :
import fs from 'fs'
async function removeLastCharacter(filename) {
const stat = await fs.promises.stat(filename)
const fileSize = stat.size
await fs.promises.truncate(filename, fileSize - 1)
}
explanation :
fs.promises.stat gives us some information about the file, we will use its size.
fs.promises.truncate remove from the file what is after a certain position
We use the position fileSize - 1 which is the last character.
Note :
Yes I know that we need to wait until the stream is closed, but this is ok because truncate and stat functions are very fast and doesn't depend on the file size, it doesn't have to read its content.
Related
I have some data in a txt file in this format:
byr:1985
eyr:2021 iyr:2011 hgt:175cm pid:163069444 hcl:#18171d
eyr:2023
hcl:#cfa07d ecl:blu hgt:169cm pid:494407412 byr:1936
ecl:zzz
eyr:2036 hgt:109 hcl:#623a2f iyr:1997 byr:2029
cid:169 pid:170290956
hcl:#18171d ecl:oth
pid:266824158 hgt:168cm byr:1992 eyr:2021
I already have a function that parses the txt file line by line:
function parse_file_by_line(folder, file_name) {
// Read the input file line by line, creating an array of inputs.
const input_file = path.join(__dirname, folder, file_name);
return (input_array = fs.readFileSync(input_file).toString().split("\r\n"));
}
However, i want to parse on the blank lines. ( the empty spaces in the text file ). Someone suggested to split on "\n\n" Which i have tried but it ends up putting all the data into one big array element." I want to split it on the empty lines and the data between into one array element. For example, the first index would be "byr:1985 eyr:2021 iyr:2011 hgt:175cm pid:163069444 hcl:#18171d".
I want to provide a way to check file string. You could use JSON.stringify to check what kind of string your file have. By this way, you will find out what is your line ending.
const fs = require('fs')
const data = fs.readFileSync("./file.txt")
console.log(JSON.stringify(data.toString()));
In my mac, this is output.
"byr:1985\neyr:2021 iyr:2011 hgt:175cm pid:163069444 hcl:#18171d\n\neyr:2023\nhcl:#cfa07d ecl:blu hgt:169cm pid:494407412 byr:1936\n\necl:zzz\neyr:2036 hgt:109 hcl:#623a2f iyr:1997 byr:2029\ncid:169 pid:170290956\n\nhcl:#18171d ecl:oth\npid:266824158 hgt:168cm byr:1992 eyr:2021"
Use split("\r\n\r\n"). Windows has \r\n on each line ending and linux uses just \n.
You can use a stream approach which mean not reading all the file into memory. This is useful when working with large files. Following handles CRLF too:
const fs = require('fs');
const readline = require('readline');
async function processLineByLine() {
const fileStream = fs.createReadStream('input.txt');
const rl = readline.createInterface({
input: fileStream,
crlfDelay: Infinity
});
// Note: we use the crlfDelay option to recognize all instances of CR LF
// ('\r\n') in input.txt as a single line break.
for await (const line of rl) {
// Each line in input.txt will be successively available here as `line`.
console.log(`Line from file: ${line}`);
}
}
processLineByLine();
For more info see this.
Now, you can customize the for-loop to detect empty line like this:
var lines = []
for await (const line of rl) {
// Each line in input.txt will be successively available here as `line`.
if (line === ''){
if (lines.length > 0)
process_chunk(lines);
lines = [] //empty the array and make it ready for next iteration
}
else
lines.push(line)
}
function process_chunk(lines) {
//now, you have an array of consequtive non empty lines here
}
Right now when file already exist I added prefix which is a timestamp to the filename to make it unique.
But instead of using timestamp I want to use ordinal suffix or add a number to the filename.
I would add an incremented number to the filename if the file exists. But can't quite wrap my head around how to do this in a good way.
Using timestamp works but its too long like when we display the filename it would be like for example so instead of using timestamp I just want to increment a number to a filename.
Hellworldfilename - 1593024232 - timestamp is too long , not a good idea.
It should based from existing records in the database . If for example I add a file with filename Hellworldfilename and it already existed then the new filename would be Hellworldfilename-1 , and if I add Hellworldfilename again the new filename would be Hellworldfilename-2 and so on and so forth. Any idea how we can make a filename everytime unique ?
Let me give an example. let us say I have 3 files in the database with filesname
DOC
DOC-1
DOC-2
If I add a file with filename DOC the new filename would be now DOC-3.
#Code for checking if file exists
const file = await context.service.Model.findOne({
where: { humanId: record.id, filename: data.filename },
paranoid: false,
});
if (file) {
const prefix = Date.now().toString();
// eslint-disable-next-line no-undef
const fileParts = data.filename.split('.');
filename = `${fileParts[0]}-${prefix}.${fileParts[1]}`;
You will need to check whether the filename ends with -somenumber. If so, then you can extract that number and increment it. Otherwise put 1 into the result:
function getNumberedFileName(fileN) {
//These are value initializations to cope with the situation when the file does not have a .
var fileName = fileN;
var fileExtension = "";
var lastDotIndex = fileN.lastIndexOf(".");
if ((lastDotIndex > 0) && (lastDotIndex < fileN.length - 1)) { //We are not interested in file extensions for files without an extension hidden in UNIX systems, like .gitignore and we are not interested in file extensions if the file ends with a dot
fileName = fileN.substring(0, lastDotIndex);
fileExtension = "." + fileN.substring(lastDotIndex + 1);
}
var lastDashIndex = fileName.lastIndexOf("-");
if ((lastDashIndex > 0) && (lastDashIndex < fileName.length - 1)) {
var lastPart = fileName.substring(lastDashIndex + 1);
if (!isNaN(lastPart)) {
var index = parseInt(lastPart) + 1;
return fileName.substring(0, lastDashIndex) + "-" + index + fileExtension;
}
}
return fileName + "-1" + fileExtension;
}
You could declare an object filenames in the global scope like
const filenames={};
and use it for keeping track of already opened files.
Below I defined a function makeUnique() hilighting the ideas I mentioned here before. It turns out I had to tweak my code a little bit but here is a working snippet:
const makeUnique=(function(){
const filenames={};
return function(fn){
const fileParts=fn.split(".");
const prefix=filenames[fn]!=null
? ++filenames[fn]
: filenames[fn]=0;
if (prefix) fileParts[Math.max(fileParts.length-2,0)]+='-'+prefix;
return fileParts.join('.')
}
})();
console.log(["abc","d.e.f.c","abc","ghi","abc","abc.txt","def",
"abc.txt","d.e.f.c","abc.txt","abc"].map(makeUnique))
.as-console-wrapper {max-height:100% !important}
I used an IIFE to generate a protected scope for the static object filenames. This is now accessible by all calls of makeUnique() but otherwise "private", i. e. cannot be modified accidentally from anywhere else.
I am creating an xml file using "xml-builder" node module. But when I tried to write angle brackets ("<" or ">"), I got characters like "<" and ">". The code is as follows:
let builder = require('xmlbuilder', { encoding: 'utf-8' });
let name = "ABC";
let xml = builder.create('Slides');
xml.ele('props',"Hello").up();
xml.ele('name',"<Hello> "+name+" </Hello>").up();
xml.end({ pretty: true });
console.log(xml.toString())
The output is as follows:
<Slides>
<props>Hello</props>
<name><Hello> ABC </Hello></name>
</Slides>
What should I do to get < or > printed instead of < or > ?
There is an npm module decode-html that will handle the same use case as your.
var decode = require('decode-html');
console.log(decode('<div class="hidden">NON&SENSE's</div>'));
// -> '<div class="hidden">NON&SENSE\'s</div>'
The problem is that is you are attempting to create a child element in an incorrect way, by passing some xml in the value field of xml.ele. The module is correctly escaping your angle brackets.
What you need to do is create another element named Hello and append it to the name element. You can do this by either chaining your .ele calls or using their return values.
Here is the correct code:
let builder = require('xmlbuilder', { encoding: 'utf-8' });
let name = "ABC";
let xml = builder.create('Slides');
xml.ele('props',"Hello");
xml.ele('name')
.ele("Hello", name);
xml.end({ pretty: true });
console.log(xml.toString())
Output:
<Slides>
<props>Hello</props>
<name>
<Hello>ABC</Hello>
</name>
</Slides>
in my node app, I'm trying to clean up a csv file.
first, I split it into separate lines
then I replace unwanted characters in the first line (the column headers)
then I re-assemble the file by pushing individual lines into a new array, and writing that array to a new .csv file
For some reason, all my rows ending up being shifted by 1 position with respect to the header row.
I have opened the resulting file in a vu editor, and can see, that all rows somehow acquired a "," character at the besieging
I know I'm doing something incorrectly, but can not see what that is.
Here is my code:
var XLSX = require('xlsx');
var fs = require('fs');
var csv = require("fast-csv");
var workbook = XLSX.readFile('Lineitems.xls');
var worksheet = workbook.Sheets['Sheet1'];
var csv_conversion = XLSX.utils.sheet_to_csv(worksheet);
var csv_lines = csv_conversion.split('\n');
var dirtyHeaderLine = csv_lines[0];
var cleanHeaderLine = dirtyHeaderLine.replace(/\./g,"")
.replace(/"'"/g,"")
.replace(/","/g,"")
.replace(/"\/"/g,"")
.replace(/"#"/g,"");
cleanHeaderLine = cleanHeaderLine.replace(/,+$/, "");
console.log(cleanHeaderLine);
csv_lines[0] = cleanHeaderLine;
var newCsvLines = [];
csv_lines.forEach(function(line){
newCsvLines.push(line + "\n");
});
fs.writeFile('clean_file.csv', newCsvLines, function(err) {
if (err) throw err;
console.log('clean file saved');
});
I don't see any glaring errors here (maybe something with your regex? Not an expert on those) but this will solve your issue.
if (line.charAt(0) == ',') { line = line.substring(1); }
Adjust your variables accordingly. I don't think I have the same case as you.
EDIT: Here's a JSBin of it working.
http://jsbin.com/mirocedagi/1/edit?html,js,output
Is it possible to get the filename without the extension from the src filepath.
As an example, let's say the src file is my-file.png - located at images/my-file.png.
In my task I have this at the moment:
var processName = options.processName || function (name) { return name; };
var filename = processName(filepath);
When I reference filename for output it returns:
images/my-file.png
I want to only return the actual filename, without the path and without the extension:
my-file.png
How can I achieve this?
Might be pretty old but if someone else finds this SO., in reply for #user3143218 's comment :
slice(0, -4) will remove the last 4 characters from the name, so for the example my-file.png we will get my-file but for script.js we will get scrip. I suggest using a regex removing everything from the last dot.
You could use a regex like this:
var theFile = filename.match(/\/([^/]*)$/)[1];
var onlyName = theFile.substr(0, theFile.lastIndexOf('.')) || theFile;
That should give you my-file. The regex gives you the string after the last forward slash, and the next line removes everything after the last dot (and the dot).
Thanks to Andeersg's answer below I was able to pull this off. It might not be the best solution but it works. Final code is:
var processName = options.processName || function (name) { return name; };
var filename = processName(filepath);
var theFile = filename.match(/\/([^/]*)$/)[1];
var onlyName = theFile.slice(0, -4);
Now onlyName will return:
my-file