I am trying to implement a Cloudfunction which would run ffmpeg on a Google bucket upload. I have been playing with a script based on https://kpetrovi.ch/2017/11/02/transcoding-videos-with-ffmpeg-in-google-cloud-functions.html
The original script needs little tuning as the library evolved a bit. My current version is here:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
const ffmpeg = require('fluent-ffmpeg');
const ffmpeg_static = require('ffmpeg-static');
console.log("Linking ffmpeg path to:", ffmpeg_static)
ffmpeg.setFfmpegPath(ffmpeg_static);
exports.transcodeVideo = (event, callback) => {
const bucket = storage.bucket(event.bucket);
console.log(event);
if (event.name.indexOf('uploads/') === -1) {
console.log("File " + event.name + " is not to be processed.")
return;
}
// ensure that you only proceed if the file is newly createdxxs
if (event.metageneration !== '1') {
callback();
return;
}
// Open write stream to new bucket, modify the filename as needed.
const targetName = event.name.replace("uploads/", "").replace(/[.][a-z0-9]+$/, "");
console.log("Target name will be: " + targetName);
const remoteWriteStream = bucket.file("processed/" + targetName + ".mp4")
.createWriteStream({
metadata: {
//metadata: event.metadata, // You may not need this, my uploads have associated metadata
contentType: 'video/mp4', // This could be whatever else you are transcoding to
},
});
// Open read stream to our uploaded file
const remoteReadStream = bucket.file(event.name).createReadStream();
// Transcode
ffmpeg()
.input(remoteReadStream)
.outputOptions('-c:v copy') // Change these options to whatever suits your needs
.outputOptions('-c:a aac')
.outputOptions('-b:a 160k')
.outputOptions('-f mp4')
.outputOptions('-preset fast')
.outputOptions('-movflags frag_keyframe+empty_moov')
// https://github.com/fluent-ffmpeg/node-fluent-ffmpeg/issues/346#issuecomment-67299526
.on('start', (cmdLine) => {
console.log('Started ffmpeg with command:', cmdLine);
})
.on('end', () => {
console.log('Successfully re-encoded video.');
callback();
})
.on('error', (err, stdout, stderr) => {
console.error('An error occured during encoding', err.message);
console.error('stdout:', stdout);
console.error('stderr:', stderr);
callback(err);
})
.pipe(remoteWriteStream, { end: true }); // end: true, emit end event when readable stream ends
};
This version correctly runs and I can see this in logs:
2020-06-16 21:24:22.606 Function execution took 912 ms, finished with status: 'ok'
2020-06-16 21:24:52.902 Started ffmpeg with command: ffmpeg -i pipe:0 -c:v copy -c:a aac -b:a 160k -f mp4 -preset fast -movflags frag_keyframe+empty_moov pipe:1
It seems the function execution ends before the actual ffmpeg command, which then never finishes.
Is there a way to make the ffmpeg "synchronous" or "blocking" so that it finishes before the function execution?
From google cloud documentation it seems the function should accept three arguments: (data, context, callback) have you tried this or do you know that context is optional? From the docs it seems that if the function accepts three arguments is treated as a background function, if it accepts only two arguments, is treated as a background function only if it returns a Promise.
More than this some other point:
1: here no callback function is called, if in your tests your function exited with that log line, it is another point suggesting that calling the second argument as a callback function is a required step to make process finish:
if (event.name.indexOf('uploads/') === -1) {
console.log("File " + event.name + " is not to be processed.")
return;
}
I would suggest to add some other console.log (or many other, if you prefer) to clarify the flow: in your question you pasted only 1 log line, it is not so much helpful more to say it is logged after the system log line
the link you used as tutorial is almost three years old, it could be that google cloud has changed its interface in the mean while.
Once said that, if acceptint three arguments rather than only two doesn't solve your problem, you can try changing your function in a Promise:
exports.transcodeVideo = (event, callback) => new Promise((resolve, reject) => {
const bucket = storage.bucket(event.bucket);
console.log(event);
if (event.name.indexOf('uploads/') === -1) {
console.log("File " + event.name + " is not to be processed.")
return resolve(); // or reject if this is an error case
}
// ensure that you only proceed if the file is newly createdxxs
if (event.metageneration !== '1') {
return resolve(); // or reject if this is an error case
}
// Open write stream to new bucket, modify the filename as needed.
const targetName = event.name.replace("uploads/", "").replace(/[.][a-z0-9]+$/, "");
console.log("Target name will be: " + targetName);
const remoteWriteStream = bucket.file("processed/" + targetName + ".mp4")
.createWriteStream({
metadata: {
//metadata: event.metadata, // You may not need this, my uploads have associated metadata
contentType: 'video/mp4', // This could be whatever else you are transcoding to
},
});
// Open read stream to our uploaded file
const remoteReadStream = bucket.file(event.name).createReadStream();
// Transcode
ffmpeg()
.input(remoteReadStream)
.outputOptions('-c:v copy') // Change these options to whatever suits your needs
.outputOptions('-c:a aac')
.outputOptions('-b:a 160k')
.outputOptions('-f mp4')
.outputOptions('-preset fast')
.outputOptions('-movflags frag_keyframe+empty_moov')
// https://github.com/fluent-ffmpeg/node-fluent-ffmpeg/issues/346#issuecomment-67299526
.on('start', (cmdLine) => {
console.log('Started ffmpeg with command:', cmdLine);
})
.on('end', () => {
console.log('Successfully re-encoded video.');
resolve();
})
.on('error', (err, stdout, stderr) => {
console.error('An error occured during encoding', err.message);
console.error('stdout:', stdout);
console.error('stderr:', stderr);
reject(err);
})
.pipe(remoteWriteStream, { end: true }); // end: true, emit end event when readable stream ends
});
Hope this helps.
Related
I have a function that creates a pdf-file and sends it to email using pdfkit and nodemailer Every now or then I get a file I can't open. Can't figure out why this happens and why it works most of the time? I haven't noticed any certain situation when it fails, there doesn't seem to be any formula in it (text length etc). Could someone point out if there is some obvious problem in my pdf -creation code (like with async/await).
exports.sendTranscriptionToEmail = async (req, res) => {
let finalText = [];
let nickColorsArray = [];
const doc = new PDFDocument();
let filename = req.body.sessionName;
let text = [];
if (!filename || typeof filename != "string") {
return res.status(400).send({ message: "Incorrect Information!" });
}
// here I get the data from the database
try {
const rows = await knex("transcriptions").select("*").where({
conversation_id: filename,
});
if (!rows) {
return res.status(400).send({ message: "Transcription not found" });
}
// Stripping special characters
filename = encodeURIComponent(filename) + ".pdf";
res.setHeader(
"Content-disposition",
'attachment; filename="' + filename + '"'
);
res.setHeader("Content-type", "application/pdf");
doc.pipe(fs.createWriteStream(filename));
doc.fontSize(18).fillColor("black").text("Participants:", {
width: 410,
align: "center",
});
doc.moveDown();
nickColorsArray.forEach((n) => {
doc.fontSize(14).fillColor(n.color).text(n.nick, {
width: 410,
align: "center",
});
});
doc.moveDown();
doc.moveDown();
doc.fontSize(18).fillColor("black").text("Transcription:", {
width: 410,
align: "center",
});
doc.moveDown();
finalText.forEach((f) => {
doc
.fontSize(14)
.fillColor(f.color)
.text(f.word + " ", {
width: 410,
continued: true,
});
});
doc.end();
} catch (err) {
console.log("Something went wrong: ", err.message);
}
I have experienced the same issue, after investigation i found out my solution at https://github.com/foliojs/pdfkit/issues/265
PDFKit doesn't actually know when all of the data has been flushed to
whatever stream you're writing to (file, http response, etc.). Since
PDFKit has no access to the actual writable stream it is being piped
to (PDFKit itself is a readable stream, and you set up the writable
part), it only knows when it has finished pumping out chunks to
whoever might be reading. It may be some time later that the writable
stream actually flushes its internal buffers out to the actual
destination.
I believe it's not quite as simple as listening for the 'finish' event
on the write stream, specifically in the case of errors, so I
implemented the following function which returns a Promise.
function savePdfToFile(pdf : PDFKit.PDFDocument, fileName : string) : Promise<void> {
return new Promise<void>((resolve, reject) => {
// To determine when the PDF has finished being written successfully
// we need to confirm the following 2 conditions:
//
// 1. The write stream has been closed
// 2. PDFDocument.end() was called syncronously without an error being thrown
let pendingStepCount = 2;
const stepFinished = () => {
if (--pendingStepCount == 0) {
resolve();
}
};
const writeStream = fs.createWriteStream(fileName);
writeStream.on('close', stepFinished);
pdf.pipe(writeStream);
pdf.end();
stepFinished();
});
}
Instead of calling .end() directly, you call this function and pass
the pdf document and filename.
This should correctly handle the following situations:
PDF Generated successfully Error is thrown inside pdf.end() before
write stream is closed Error is thrown inside pdf.end() after write
stream has been closed
So in the end it seems that sometimes server does not create the file fast enough for you, after implementing this solution,my response time did increase by something like 1sec and got rid of this corruption behavior
I am in need to wait for task A to complete before executing task B code.
task A is to convert audio file and
task B uses the converted audio for further process.
because task A store new audio file to particular directory and task B is trying to access the file which does not exist my code breaks.
How do I make sure task B code executes once the new audio file is saved to a directory?
code
var track = fileURL;//your path to source file
ffmpeg(track)
.toFormat('flac')
.on('error', function (err) {
console.log('An error occurred: ' + err.message);
})
.on('progress', function (progress) {
// console.log(JSON.stringify(progress));
console.log('Processing: ' + progress.targetSize + ' KB converted');
})
.on('end', function () {
console.log('Processing finished !');
})
.save(path.join(__dirname, '/public/downloads/Test.flac'));//path where you want to save your file
The above part of the code takes file from uploads folder converts it to new file format and saves it to the downloads directory.
You can see below I am trying to access the file (Test.flac) in downloads folder. There is lot more code but I need to execute this block of code only after completion of above task.
const Speech = require('#google-cloud/speech');
const projectId = 'uliq-68823';
// Instantiates a client
const speechClient = Speech({
projectId: projectId
});
// The name of the audio file to transcribe
const fileName2 = path.join(__dirname, '/public/downloads/' + 'Test.flac');
// Reads a local audio file and converts it to base64
const file2 = fs.readFileSync(fileName2);
const audioBytes = file2.toString('base64');
The fluent-ffmpeg library uses streams to process your files. Therefore if you want to execute code after the stream is done, call your code in the callback called on the 'end' event of the stream.
Example:
var track = fileURL;//your path to source file
ffmpeg(track)
.toFormat('flac')
.on('error', function (err) {
console.log('An error occurred: ' + err.message);
})
.on('progress', function (progress) {
// console.log(JSON.stringify(progress));
console.log('Processing: ' + progress.targetSize + ' KB converted');
})
.on('end', function () {
console.log('Processing finished !');
// USE THE FILE HERE
// <----------------
})
.save(path.join(__dirname, '/public/downloads/Test.flac'));
Use async water fall package which is used to serialize the function so that second function will run after first
here is link package link
When building complex stream pipelines, it often makes sense to pass errors from one stage to the next. (In the situation at hand, I'm considering Gulp pipelines.) But apparently the Node.js stream pipe method doesn't do that.
One can manually bind the error event listener of one stream to the error event emitter of the next. But that's pretty tedious.
Is there some easy way to build pipelines with error chaining? Or is there some reason why one shouldn't do this?
Example:
var through = require("through2");
var s1 = through(function(chunk, enc, cb) {
console.log("In 1: " + chunk.toString());
cb(null, chunk);
});
var s2 = through(function(chunk, enc, cb) {
console.log("In 2: " + chunk.toString());
if (chunk.toString() === "error")
cb(Error("Something broke"), null);
else
cb(null, chunk);
});
var s3 = through(function(chunk, enc, cb) {
console.log("In 3: " + chunk.toString());
cb(null, chunk);
});
s1.pipe(s2).pipe(s3)
.on("data", function(chunk, enc) {
console.log("Out: " + chunk.toString());
})
.on("error", function(err) {
console.log("Final error: " + err);
})
;
////////////////////////////////////////////////////
// The following two lines are the relevant part. //
s1.on("error", s2.emit.bind(s2, "error")); //
s2.on("error", s3.emit.bind(s3, "error")); //
////////////////////////////////////////////////////
s1.write("first");
s1.write("error");
s1.write("last");
Is there some easy way to build pipelines with error chaining?
bubble-stream-error provides a mechanism to do this.
Or is there some reason why one shouldn't do this?
Not that I'm aware of. This seems like something you would reasonably want to do in many cases.
I'm just wondering whether it is at all possible to transfer a directory from a unix server to my local machine using the ssh2 module in node.js. I have connected to the remote host and can read the directory as well as transfer single files, but there are 28 folders in the directory which each contain files and sub directories. What I'd like to do is take an exact copy of the main directory from the server to my local machine.
I was using fastGet with single files, but transferring a directory gives: Error: EISDIR, open __dirname/../localdirectory/ which I think implies I can't use fastGet to get an entire directory. I also tried using the exec command to try and scp it over, but I couldn't work out the syntax for the local directory:
// c is an active connection
c.exec('scp filethatexists.extension /../filepath/newname.extension', function(err, stream) {
if (err) {
console.log("error: " + err);
stream.end;
};
stream.on('data', function(data, extended) {
console.log((extended === 'stderr' ? 'STDERR: ' : 'STDOUT: ') + data);
});
stream.on('end', function() {
console.log('Stream :: EOF');
});
stream.on('close', function() {
console.log('Stream :: close');
});
stream.on('exit', function(code, signal) {
console.log('Stream :: exit :: code: ' + code + ', signal: ' + signal);
c.end();
});
});
This just results in the EOF calling. This code was just me testing If I could get a single file transferring.
Can anyone provide me with any assistance? Thank you in advance.
A couple of solutions:
You could recursively traverse the directory (making directories and transferring files as needed) using the sftp methods
Tar the directory (compress it too if you want) to stdout (e.g. tar cf - mydir) and then process that incoming stdout data with the tar module (and the built-in zlib module first if you end up compressing the directory).
// Requires:
// * `npm install tar-fs`
// * `ssh2` v0.5.x or newer
var tar = require('tar-fs');
var zlib = require('zlib');
function transferDir(conn, remotePath, localPath, compression, cb) {
var cmd = 'tar cf - "' + remotePath + '" 2>/dev/null';
if (typeof compression === 'function')
cb = compression;
else if (compression === true)
compression = 6;
if (typeof compression === 'number'
&& compression >= 1
&& compression <= 9)
cmd += ' | gzip -' + compression + 'c 2>/dev/null';
else
compression = undefined;
conn.exec(cmd, function(err, stream) {
if (err)
return cb(err);
var exitErr;
var tarStream = tar.extract(localPath);
tarStream.on('finish', function() {
cb(exitErr);
});
stream.on('exit', function(code, signal) {
if (typeof code === 'number' && code !== 0) {
exitErr = new Error('Remote process exited with code '
+ code);
} else if (signal) {
exitErr = new Error('Remote process killed with signal '
+ signal);
}
}).stderr.resume();
if (compression)
stream = stream.pipe(zlib.createGunzip());
stream.pipe(tarStream);
});
}
// USAGE ===============================================================
var ssh = require('ssh2');
var conn = new ssh();
conn.on('ready', function() {
transferDir(conn,
'/home/foo',
__dirname + '/download',
true, // uses compression with default level of 6
function(err) {
if (err) throw err;
console.log('Done transferring');
conn.end();
});
}).connect({
host: '192.168.100.10',
port: 22,
username: 'foo',
password: 'bar'
});
i m also trying to downlaod folders using ssh. It took me more than 10 days and i'm still trying to do that. But in the mean time i found some other code which will do the same thing for me.This code below will download every folder and file inside a directory
enter image description here
I need to pass in a text file in the terminal and then read the data from it, how can I do this?
node server.js file.txt
How do I pass in the path from the terminal, how do I read that on the other side?
You'll want to use the process.argv array to access the command-line arguments to get the filename and the FileSystem module (fs) to read the file. For example:
// Make sure we got a filename on the command line.
if (process.argv.length < 3) {
console.log('Usage: node ' + process.argv[1] + ' FILENAME');
process.exit(1);
}
// Read the file and print its contents.
var fs = require('fs')
, filename = process.argv[2];
fs.readFile(filename, 'utf8', function(err, data) {
if (err) throw err;
console.log('OK: ' + filename);
console.log(data)
});
To break that down a little for you process.argv will usually have length two, the zeroth item being the "node" interpreter and the first being the script that node is currently running, items after that were passed on the command line. Once you've pulled a filename from argv then you can use the filesystem functions to read the file and do whatever you want with its contents. Sample usage would look like this:
$ node ./cat.js file.txt
OK: file.txt
This is file.txt!
[Edit] As #wtfcoder mentions, using the "fs.readFile()" method might not be the best idea because it will buffer the entire contents of the file before yielding it to the callback function. This buffering could potentially use lots of memory but, more importantly, it does not take advantage of one of the core features of node.js - asynchronous, evented I/O.
The "node" way to process a large file (or any file, really) would be to use fs.read() and process each available chunk as it is available from the operating system. However, reading the file as such requires you to do your own (possibly) incremental parsing/processing of the file and some amount of buffering might be inevitable.
Usign fs with node.
var fs = require('fs');
try {
var data = fs.readFileSync('file.txt', 'utf8');
console.log(data.toString());
} catch(e) {
console.log('Error:', e.stack);
}
IMHO, fs.readFile() should be avoided because it loads ALL the file in memory and it won't call the callback until all the file has been read.
The easiest way to read a text file is to read it line by line. I recommend a BufferedReader:
new BufferedReader ("file", { encoding: "utf8" })
.on ("error", function (error){
console.log ("error: " + error);
})
.on ("line", function (line){
console.log ("line: " + line);
})
.on ("end", function (){
console.log ("EOF");
})
.read ();
For complex data structures like .properties or json files you need to use a parser (internally it should also use a buffered reader).
You can use readstream and pipe to read the file line by line without read all the file into memory one time.
var fs = require('fs'),
es = require('event-stream'),
os = require('os');
var s = fs.createReadStream(path)
.pipe(es.split())
.pipe(es.mapSync(function(line) {
//pause the readstream
s.pause();
console.log("line:", line);
s.resume();
})
.on('error', function(err) {
console.log('Error:', err);
})
.on('end', function() {
console.log('Finish reading.');
})
);
I am posting a complete example which I finally got working. Here I am reading in a file rooms/rooms.txt from a script rooms/rooms.js
var fs = require('fs');
var path = require('path');
var readStream = fs.createReadStream(path.join(__dirname, '../rooms') + '/rooms.txt', 'utf8');
let data = ''
readStream.on('data', function(chunk) {
data += chunk;
}).on('end', function() {
console.log(data);
});
The async way of life:
#! /usr/bin/node
const fs = require('fs');
function readall (stream)
{
return new Promise ((resolve, reject) => {
const chunks = [];
stream.on ('error', (error) => reject (error));
stream.on ('data', (chunk) => chunk && chunks.push (chunk));
stream.on ('end', () => resolve (Buffer.concat (chunks)));
});
}
function readfile (filename)
{
return readall (fs.createReadStream (filename));
}
(async () => {
let content = await readfile ('/etc/ssh/moduli').catch ((e) => {})
if (content)
console.log ("size:", content.length,
"head:", content.slice (0, 46).toString ());
})();