I have a buffer that contain the voice of the person that in media stream, i and I send it from JavaScript to NodeJS using socket.io
I need to convert that buffer to text (like speech to text, but the voice stored as buffer coming from media stream)
There is a helper function I used (in nodejs see below) that convert from/to buffer/arrayBuffer
and there is a package called node-blob that convert buffer to audio blob
but I search a lot how convert audio or even buffer to text, but I failed
any help, code or package that may help to convert it to text ?
JavaScript
navigator.mediaDevices
.getUserMedia({
video: true,
audio: true,
})
.then((stream) => {
setSrcVideo(stream);
const mediasStream = new MediaStream();
mediasStream.addTrack(stream.getVideoTracks()[0]);
mediasStream.addTrack(stream.getAudioTracks()[0]);
const mediaRecorder = new MediaRecorder(mediasStream);
socket.emit('ready');
mediaRecorder.addEventListener('dataavailable', (event) => {
if (event.data && event.data.size > 0) {
socket.emit('send-chunks', event.data);
}
});
socket.on('start-recording', () => {
mediaRecorder.start(1000);
});
});
and I receive that buffer bysocket.on('send-chunks') in NodeJS like this
NodeJS
// connection to socket.io
io.on('connection', (socket) => {
socket.on('ready', () => {
socket.emit('start-recording');
});
socket.on('send-chunks', (chunks) => {
// covert to text
});
});
// helper functions
const toArrayBuffer = (buffer) => {
const arrayBuffer = new ArrayBuffer(buffer.length);
const view = new Uint8Array(arrayBuffer);
for (let i = 0; i < buffer.length; ++i) {
view[i] = buffer[i];
}
return arrayBuffer;
};
const toBuffer = (arrayBuffer) => {
const buffer = Buffer.alloc(arrayBuffer.byteLength);
const view = new Uint8Array(arrayBuffer);
for (let i = 0; i < buffer.length; ++i) {
buffer[i] = view[i];
}
return buffer;
};
I'm trying to parse the following serial input raw data:
[51,45,51,30,33,30,33,00,00,00,C0,50,17,FE,05,00,00,58,02,9A,0E,00,00,00,00,02,00,0D,0A]
It consist of some chars(0X51,0x45,0x51...) and some raw data (0x00,0x00,0xC0...) and it ends with '\r\n' (0x0D,0x0A).
The end result I'm getting is ('\r\n' is not included):
['51','45','51','30','33','30','33','0','0','0','fffd','50','17','fffd','5','0','0','58','2','fffd','e','0','0','0','0','2','0']
The values in index 10,13,19 (C0,FE,9A) are not displayed correctly (because they are not part of the ASCII table?).
To get the data I use TextDecoderStream, and I'm not sure it's the right decoder, although the data is partially comprised of ASCII chars and ends with '\r\n'.
Is there more suitable decoder or there is an error in my code?
Thanks in advance.
// request & open port here.
port = await navigator.serial.requestPort();
await port.open({
baudrate: 115200,
baudRate: 115200,
dataBits: 8,
stopBits: 1,
parity: "none",
flowControl: "none",
});
// setup the output stream here.
const encoder = new TextEncoderStream();
outputDone = encoder.readable.pipeTo(port.writable);
outputStream = encoder.writable;
/**********************************/
// code to read the stream here.
let decoder = new TextDecoderStream();
inputDone = port.readable.pipeTo(decoder.writable);
inputStream = decoder.readable.pipeThrough(
new TransformStream(new LineBreakTransformer())
);
reader = inputStream.getReader();
readLoop();
}
/**
* Reads data from the input stream and displays it on screen.
*/
async function readLoop() {
let i = 0;
while (true) {
const { value, done } = await reader.read();
if (value) {
let arrValue = [...value];
ascii_to_hexa(arrValue);
console.log(arrValue);
if (done) {
console.log("[readLoop] DONE", done);
reader.releaseLock();
break;
}
}
}
}
class LineBreakTransformer {
constructor() {
// A container for holding stream data until a new line.
this.container = "";
}
transform(chunk, controller) {
// Handle incoming chunk
this.container += chunk;
const lines = this.container.split("\r\n");
this.container = lines.pop();
lines.forEach((line) => controller.enqueue(line));
}
flush(controller) {
// Flush the stream.
controller.enqueue(this.container);
}
}
function ascii_to_hexa(arr) {
for (let i = 0; i < arr.length; i++) {
arr[i] = Number(arr[i].charCodeAt(0)).toString(16);
}
}
I need to validate video resolution, if it's above 1280x720, I need to block it before user uploads it. How to do this on browser or javascript?
Can use mp4box npm library.
import MP4Box from 'mp4box'
let reader = new FileReader()
const mp4boxfile = MP4Box.createFile();
function _base64ToArrayBuffer(base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}
reader.onload = function() {
mp4boxfile.onReady = function (info) {
console.log('Video info: ', info); // resolution info is available on info.videoTracks[0].video.width and .height
}
const arrayBuffer = _base64ToArrayBuffer(reader.result.slice(22))
arrayBuffer.fileStart = 0;
mp4boxfile.appendBuffer(arrayBuffer);
};
reader.readAsDataURL(videoFile); // videoFile comes from onChange event of input file element
Video codec info detail as well as video duration also available on info object.
MediaRecorder ondataavailable work successful once.
I need to get blob, get it base64, send to my server, decode this base64 to audio blob.
This is very strange.
For example, output:
blob1
blob2
blob3
blob4
blob5
blob6
blob7
blob8
blob9
....
I can hear just blob1, other blobs is "disabled".
Try it!
This code record audio:
window.startRecord = function(cb){
var int;
navigator.mediaDevices.getUserMedia({ audio: true , video:false}).then(function(stream){
var options = {
audioBitsPerSecond : 128000,
videoBitsPerSecond : 2500000,
mimeType : 'audio/webm\;codecs=opus'
}
if(!MediaRecorder.isTypeSupported(options['mimeType'])) options['mimeType'] = "audio/ogg; codecs=opus";
window.voice = new MediaRecorder(stream, options);
voice.start(500);
voice.ondataavailable = function(data){
var reader = new FileReader();
var blob = data.data;
reader.readAsDataURL(blob);
reader.onloadend = function () {
var result = reader.result;
cb(result);
}
};
voice.onstop = function(){
console.log('stop audio call');
}
});
}
window.convertDataURIToBinary = function(dataURI) {
var BASE64_MARKER = ';base64,';
var base64Index = dataURI.indexOf(BASE64_MARKER) + BASE64_MARKER.length;
var base64 = dataURI.substring(base64Index);
var raw = window.atob(base64);
var rawLength = raw.length;
var array = new Uint8Array(new ArrayBuffer(rawLength));
for(i = 0; i < rawLength; i++) {
array[i] = raw.charCodeAt(i);
}
return array;
}
<body>
<button onclick="startRecord(function(r){
var binary= convertDataURIToBinary(r);
var blob=new window.Blob([binary], {type : 'audio/webm'});
var blobUrl = window.URL.createObjectURL(blob);
console.log('URL : ' + blobUrl);
document.getElementById('data').append(blobUrl + `
|
`);
})">Exec</button>
<div id="data">
</div>
<body>
</body>
I am not sure what is the problem you try to highlight, but:
The dataavailable event's data property contains only a chunk of the whole data that has been recorded.
For instance, only the first chunk will contain the metadata needed for the final recorded media.
It is then expected that you will merge all these chunks together at the time you will export them.
And this should be done only once, at the MediaRecorder.stop event.
const chunks = []; // store all the chunks in an array
recorder.ondataavailable = e => chunks.push(e.data);
// merge the chunks in a single Blob here
recoder.onstop = e => export_media(new Blob(chunks));
I am trying to stream MP3 file from a nodeJS server using BinaryJS - http://binaryjs.com/
But, when I am decoding the buffers on the client side they are seems to be overlapping, Meaning that the new chunk of data is being played few milliseconds before the previous one ended, causing the audio to lag.
is there any way to make the client wait until the current buffer is finished before starting the new one?
Server:
var BinaryServer = require('binaryjs').BinaryServer;
var fs = require('fs');
var server = BinaryServer({port: 9000});
server.on('connection', function(client){
var file = fs.createReadStream(__dirname + '/Song.mp3', {
'flags': 'r',
'bufferSize': 4 * 1024
});
});
client.send(file);
});
Client:
var client = new BinaryClient('ws://localhost:9000');
window.AudioContext = window.AudioContext || window.webkitAudioContext;
var context = new AudioContext();
client.on('stream', function (stream, meta) {
var parts = [];
var last = 0;
stream.on('data', function (data) {
var source = context.createBufferSource();
context.decodeAudioData(data, function (buf) {
source.buffer = buf;
source.connect(context.destination);
source.loop = false;
source.start(last);
last += buf.duration;
source.onended = function() {
console.log('Your audio has finished playing');
};
},
function (e) {
"Error with decoding audio data" + e.err
});
parts.push(data);
});
stream.on('end', function () {
console.log(parts);
});
});
Not sure about this, but instead of initializing last to 0, you might want to initialize it to context.currentTime.