I am trying to record and edit my voice in javascript. Specifically, I am trying to record it in an array that looks like this for my boss [0,102, 301,...] where the values are samples of my voice.
When I record my voice in javascript, I get a Blob type. Is there any way to transform a Blob into the [x, y, z,...] array? Or how is javascript signal processing normally completed?
This is code from this medium article that is how we are doing things. I just can't share the actual company code.
const recordAudio = () =>
new Promise(async resolve => {
const stream = await navigator.mediaDevices.getUserMedia({ audio:true});
const mediaRecorder = new MediaRecorder(stream);
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
const start = () => mediaRecorder.start();
const stop = () =>
new Promise(resolve => {
mediaRecorder.addEventListener("stop", () => {
console.log(audioChunks);
console.log(audioChunks)
const audioBlob = new Blob (audioChunks);
const audioURL = URL.createObjectURL(audioBlob);
const audio = new Audio(audioURL);
const play = () => audio.play();
resolve({ audioBlob, audioURL, play });
});
mediaRecorder.stop();
});
resolve({ start, stop});
});
const sleep = time => new Promise(resolve => setTimeout(resolve, time));
const handleAction = async () => {
const recorder = await recordAudio();
const actionButton = document.getElementById('action');
actionButton.disabled = true;
recorder.start();
await sleep(3000);
const audio = await recorder.stop();
audio.play();
await sleep(3000);
actionButton.disabled = false;
}
you can use AudioContext and provide userMediaStream to it, then you can pick up an UInt8Array() that you want with the raw time domain signal, or already transformed frequency domain signal.
Here you can check more details.
https://developer.mozilla.org/en-US/docs/Web/API/AnalyserNode
//initialize your signal catching system
let audioContext = new AudioContext();
let analyser = audioContext.createAnalyser();
navigator.mediaDevices.getUserMedia({audio: true}).then(stream => {
let source = audioContext.createMediaStreamSource(stream);
source.connect(analyser);
})
//then update the array with signal every milisecond
setInterval(() => {
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
//get time domain signal
analyser.getByteTimeDomainData(dataArray);
//get frequency domain signal
analyser.getByteFrequencyData(dataArray)
console.log(dataArray)
}, 1)
as for visualization it works ok, with recording there might be a problem with repeating signal if you pick it up couple times before change, or there will be holes in data, but i cant figure out how to read directly from the stream.
Related
I am trying to implement real time voice changer, I have tried some methods but I could not get any results.
I have tried to implement my codes according to following tutorial link
According to the link above, instead of using bufferSource, I created the source from the stream with createMediaStreamSource and sent it to the demonBeastTransform method
here is my final codes:
const video = document.querySelector("video");
const range = document.querySelector("#gain");
navigator.mediaDevices
.getUserMedia({
audio: true,
video: true
})
.then((stream) => {
video.srcObject = stream;
video.onloadedmetadata = (e) => {
video.play();
video.muted = true;
};
// Create a MediaStreamAudioSourceNode
// Feed the HTMLMediaElement into it
const audioCtx = new AudioContext();
const source = audioCtx.createMediaStreamSource(stream);
demonBeastTransform(source, audioCtx)
})
.catch((err) => {
console.error(`The following error occured: ${err}`);
});
async function demonBeastTransform(source, ctx, distortionAmount = 100) {
/* let ctx = new OfflineAudioContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate); */
// Source
/* let source = ctx.createBufferSource();
source.buffer = audioBuffer; */
// Reverb
let convolver = ctx.createConvolver();
convolver.buffer = await ctx.decodeAudioData(await (await fetch("https://voicechanger.io/audio/impulse-responses/voxengo/Large Wide Echo Hall.wav")).arrayBuffer());
// Fire
let fire = ctx.createBufferSource();
fire.buffer = await ctx.decodeAudioData(await (await fetch("https://voicechanger.io/audio/backgrounds/brush_fire-Stephan_Schutze-55390065.mp3")).arrayBuffer());
fire.loop = true;
// Compressor
let compressor = ctx.createDynamicsCompressor();
compressor.threshold.value = -50;
compressor.ratio.value = 16;
// Wobble
let oscillator = ctx.createOscillator();
oscillator.frequency.value = 50;
oscillator.type = 'sawtooth';
// ---
let oscillatorGain = ctx.createGain();
oscillatorGain.gain.value = 0.004;
// ---
let delay = ctx.createDelay();
delay.delayTime.value = 0.01;
// ---
let fireGain = ctx.createGain();
fireGain.gain.value = 0.2;
// ---
let convolverGain = ctx.createGain();
convolverGain.gain.value = 2;
// Filter
let filter = ctx.createBiquadFilter();
filter.type = "highshelf";
filter.frequency.value = 1000;
filter.gain.value = 10;
// Create graph
oscillator.connect(oscillatorGain);
oscillatorGain.connect(delay.delayTime);
// ---
source.connect(delay)
delay.connect(convolver);
//waveShaper.connect(convolver);
fire.connect(fireGain);
convolver.connect(convolverGain);
convolverGain.connect(filter);
filter.connect(compressor);
fireGain.connect(ctx.destination);
compressor.connect(ctx.destination);
let filter2 = ctx.createBiquadFilter();
filter2.type = "lowpass";
filter2.frequency.value = 2000;
let noConvGain = ctx.createGain();
noConvGain.gain.value = 0.9;
delay.connect(filter2);
filter2.connect(filter);
filter.connect(noConvGain);
noConvGain.connect(compressor);
// Render
oscillator.start(0);
source.start(0);
fire.start(0);
/* let outputAudioBuffer = await ctx.startRendering();
return outputAudioBuffer; */
}
<h1>Web Audio API examples: MediaStreamAudioSourceNode</h1>
<video controls></video>
<br />
I took the liberty of tinkering a bit with your script and made the following modifications:
Stream
The biggest change is how the stream is handled. Now the demonBeastTransform passes the stream object in to the function as its only argument.
The reason for this is that we need both the audio and video tracks from the stream. And the idea behind it is to split the audio and video tracks, modify the audio track and then combine them back together into a single stream that is passed as the srcObject of the video.
CORS
The first issue I ran into is that the fetch requests weren't working because of a Same Origin Policy error. However, I was able to download the files and fetch them from my local server, so I'd recommend that you do the same.
Add a try / catch block around any awaited requests to handle them better in case of an error.
const video = document.querySelector("video");
const range = document.querySelector("#gain");
navigator.mediaDevices
.getUserMedia({
audio: true,
video: true
})
.then(demonBeastTransform)
.then((stream) => {
video.onloadedmetadata = () => video.play()
video.srcObject = stream;
})
.catch((err) => {
console.error(`The following error occured: ${err}`);
});
async function demonBeastTransform(stream) {
const audioCtx = new AudioContext();
const source = audioCtx.createMediaStreamSource(stream);
const streamDestination = audioCtx.createMediaStreamDestination();
// Get the video tracks and add them to the stream destination.
const videoTracks = stream.getVideoTracks();
for (const videoTrack of videoTracks) {
streamDestination.stream.addTrack(videoTrack);
}
// Reverb
let convolver = audioCtx.createConvolver();
try {
const convolerResponse = await fetch("Large Wide Echo Hall.wav");
const convolverBuffer = await convolerResponse.arrayBuffer();
convolver.buffer = await audioCtx.decodeAudioData(convolverBuffer);
} catch (error) {
return Promise.reject(error);
}
// Fire
let fire = audioCtx.createBufferSource();
try {
const fireResponse = await fetch("brush_fire-Stephan_Schutze-55390065.mp3");
const fireBuffer = await fireResponse.arrayBuffer();
fire.buffer = await audioCtx.decodeAudioData(fireBuffer);
fire.loop = true;
} catch (error) {
return Promise.reject(error);
}
// Compressor
let compressor = audioCtx.createDynamicsCompressor();
compressor.threshold.value = -50;
compressor.ratio.value = 16;
// Wobble
let oscillator = audioCtx.createOscillator();
oscillator.frequency.value = 50;
oscillator.type = 'sawtooth';
// ---
let oscillatorGain = audioCtx.createGain();
oscillatorGain.gain.value = 0.004;
// ---
let delay = audioCtx.createDelay();
delay.delayTime.value = 0.01;
// ---
let fireGain = audioCtx.createGain();
fireGain.gain.value = 0.2;
// ---
let convolverGain = audioCtx.createGain();
convolverGain.gain.value = 2;
// Filter
let filter = audioCtx.createBiquadFilter();
filter.type = "highshelf";
filter.frequency.value = 1000;
filter.gain.value = 10;
// Create graph
oscillator.connect(oscillatorGain);
oscillatorGain.connect(delay.delayTime);
// ---
source.connect(delay)
delay.connect(convolver);
//waveShaper.connect(convolver);
fire.connect(fireGain);
convolver.connect(convolverGain);
convolverGain.connect(filter);
filter.connect(compressor);
// Instead of audioCtx.destination we pass the audio into the new stream.
fireGain.connect(streamDestination);
compressor.connect(streamDestination);
let filter2 = audioCtx.createBiquadFilter();
filter2.type = "lowpass";
filter2.frequency.value = 2000;
let noConvGain = audioCtx.createGain();
noConvGain.gain.value = 0.9;
delay.connect(filter2);
filter2.connect(filter);
filter.connect(noConvGain);
noConvGain.connect(compressor);
// Render
oscillator.start(0);
fire.start(0);
return streamDestination.stream;
}
There are similar questions for Java and iOS, but I'm wondering about detecting silence in javascript for audio recordings via getUserMedia(). So given:
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
const mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
});
});
I'm wondering if there is anything that can be checked on the Blob, URL, or Audio objects in the stop event for an absence of audio. In the case of a bad microphone or a virtual device selected - anything along those lines. I was previously checking the blob's size, but silent audio still has a filesize. I can do this on the backend via ffmpeg, but hoping there is a way in pure JS to simplify.
With this solution inspired by Visualizations with Web Audio API, you can set minimal required decibels and detect if anything was recorded.
const MIN_DECIBELS = -45;
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
const mediaRecorder = new MediaRecorder(stream);
mediaRecorder.start();
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream);
const analyser = audioContext.createAnalyser();
analyser.minDecibels = MIN_DECIBELS;
audioStreamSource.connect(analyser);
const bufferLength = analyser.frequencyBinCount;
const domainData = new Uint8Array(bufferLength);
let soundDetected = false;
const detectSound = () => {
if (soundDetected) {
return
}
analyser.getByteFrequencyData(domainData);
for (let i = 0; i < bufferLength; i++) {
const value = domainData[i];
if (domainData[i] > 0) {
soundDetected = true
}
}
window.requestAnimationFrame(detectSound);
};
window.requestAnimationFrame(detectSound);
mediaRecorder.addEventListener("stop", () => {
const audioBlob = new Blob(audioChunks);
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
console.log({ soundDetected });
});
});
This code can run a function for every dialog it detects. it runs in a loop until the user stops it:
VOICE_MIN_DECIBELS = -35;
DELAY_BETWEEN_DIALOGS = 400;
DIALOG_MAX_LENGTH = 60*1000;
MEDIA_RECORDER = null;
IS_RECORDING = false;
//startRecording:
function startRecording(){
IS_RECORDING = true;
record();
}
//stopRecording:
function stopRecording(){
IS_RECORDING = false;
if(MEDIA_RECORDER !== null)
MEDIA_RECORDER.stop();
}
//record:
function record(){
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
//start recording:
MEDIA_RECORDER = new MediaRecorder(stream);
MEDIA_RECORDER.start();
//save audio chunks:
const audioChunks = [];
MEDIA_RECORDER.addEventListener("dataavailable", event => {
audioChunks.push(event.data);
});
//analisys:
const audioContext = new AudioContext();
const audioStreamSource = audioContext.createMediaStreamSource(stream);
const analyser = audioContext.createAnalyser();
analyser.minDecibels = VOICE_MIN_DECIBELS;
audioStreamSource.connect(analyser);
const bufferLength = analyser.frequencyBinCount;
const domainData = new Uint8Array(bufferLength);
//loop:
const time = new Date();
let startTime,
lastDetectedTime = time.getTime();
let anySoundDetected = false;
const detectSound = () => {
//recording stoped by user:
if(IS_RECORDING)
return;
time = new Date();
currentTime = time.getTime();
//time out:
if(currentTime > startTime + DIALOG_MAX_LENGTH){
MEDIA_RECORDER.stop();
return;
}
//a dialog detected:
if( anySoundDetected === true &&
currentTime > lastDetectedTime + DELAY_BETWEEN_DIALOGS
){
MEDIA_RECORDER.stop();
return;
}
//check for detection:
analyser.getByteFrequencyData(domainData);
for(let i = 0; i < bufferLength; i++)
if(domainData[i] > 0){
anySoundDetected = true;
time = new Date();
lastDetectedTime = time.getTime();
}
//continue the loop:
window.requestAnimationFrame(detectSound);
};
window.requestAnimationFrame(detectSound);
//stop event:
MEDIA_RECORDER.addEventListener('stop', () => {
//stop all the tracks:
stream.getTracks().forEach(track => track.stop());
if(!anySoundDetected) return;
//send to server:
const audioBlob = new Blob(audioChunks, {'type': 'audio/mp3'});
doWhateverWithAudio(audioBlob);
//start recording again:
record();
});
});
}
//doWhateverWithAudio:
function doWhateverWithAudio(audioBlob){
//.... send to server, downlod, etc.
}
I have a buffer that contain the voice of the person that in media stream, i and I send it from JavaScript to NodeJS using socket.io
I need to convert that buffer to text (like speech to text, but the voice stored as buffer coming from media stream)
There is a helper function I used (in nodejs see below) that convert from/to buffer/arrayBuffer
and there is a package called node-blob that convert buffer to audio blob
but I search a lot how convert audio or even buffer to text, but I failed
any help, code or package that may help to convert it to text ?
JavaScript
navigator.mediaDevices
.getUserMedia({
video: true,
audio: true,
})
.then((stream) => {
setSrcVideo(stream);
const mediasStream = new MediaStream();
mediasStream.addTrack(stream.getVideoTracks()[0]);
mediasStream.addTrack(stream.getAudioTracks()[0]);
const mediaRecorder = new MediaRecorder(mediasStream);
socket.emit('ready');
mediaRecorder.addEventListener('dataavailable', (event) => {
if (event.data && event.data.size > 0) {
socket.emit('send-chunks', event.data);
}
});
socket.on('start-recording', () => {
mediaRecorder.start(1000);
});
});
and I receive that buffer bysocket.on('send-chunks') in NodeJS like this
NodeJS
// connection to socket.io
io.on('connection', (socket) => {
socket.on('ready', () => {
socket.emit('start-recording');
});
socket.on('send-chunks', (chunks) => {
// covert to text
});
});
// helper functions
const toArrayBuffer = (buffer) => {
const arrayBuffer = new ArrayBuffer(buffer.length);
const view = new Uint8Array(arrayBuffer);
for (let i = 0; i < buffer.length; ++i) {
view[i] = buffer[i];
}
return arrayBuffer;
};
const toBuffer = (arrayBuffer) => {
const buffer = Buffer.alloc(arrayBuffer.byteLength);
const view = new Uint8Array(arrayBuffer);
for (let i = 0; i < buffer.length; ++i) {
buffer[i] = view[i];
}
return buffer;
};
I am trying to use the azure api (speech to text), but when I execute the code it does not give me the audio result.
The audio is in the format requested (.WAV).
code example documentation
const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("---", "eastus2");
function fromFile() {
let pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream("audio/aboutSpeechSdk.wav").on('data', function (arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function () {
pushStream.close();
});
let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(result => {
console.log(`RECOGNIZED: Text=${result.text}`);
recognizer.close();
});
}
fromFile();
According to the code you provide, it seems that you do not configure Speech Recognition Language. Please add the code speechConfig.speechRecognitionLanguage = "" into you sample. For more details about language, please refer to here
For example. You can download the video to do a test.
var sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require("fs");
var subscriptionKey = "";
var serviceRegion = "";
var language = "en-US";
function openPushStream(filename) {
// create the push stream we need for the speech sdk.
var pushStream = sdk.AudioInputStream.createPushStream();
// open the file and push it to the push stream.
fs.createReadStream(filename)
.on("data", function (arrayBuffer) {
pushStream.write(arrayBuffer.slice());
})
.on("end", function () {
pushStream.close();
});
return pushStream;
}
var audioConfig = sdk.AudioConfig.fromStreamInput(
openPushStream("aboutSpeechSdk.wav")
);
var speechConfig = sdk.SpeechConfig.fromSubscription(
subscriptionKey,
serviceRegion
);
speechConfig.speechRecognitionLanguage = language;
var recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
function (result) {
console.log(result.text);
recognizer.close();
recognizer = undefined;
},
function (err) {
console.log(err);
recognizer.close();
recognizer = undefined;
}
For more details, please refer to the blog
I have wav file and i went to convert to AudioBuffer because i have some function that only take audio buffer as input how can i get audio buffer from audio file? i've already searched the internet and all i got was converting from audio buffer to wav not what i wanted, and i try to feed the function with Base64 data of the audio, no luck
i've already write this function but it look like it doesn't gives me what i wanted
const getbuffer = async (url) => {
try {
var result = await axios
.get(url, {
responseType: 'arraybuffer'
})
.then(response => new Buffer.from(response.data, 'binary'))
return result
}catch (e) {
return {error: e};
}
}
the buffer i want is to feed to this function so it can change the audio for me
async function alienSoundTransform(audioBuffer) {
let ctx = offlineContext(audioBuffer.numberOfChannels, audioBuffer.length, audioBuffer.sampleRate);
let source = ctx.createBufferSource();
source.buffer = audioBuffer;
let oscillator = ctx.createOscillator();
oscillator.frequency.value = 5;
oscillator.type = 'sine';
let oscillatorGain = ctx.createGain();
oscillatorGain.gain.value = 0.05;
let delay = ctx.createDelay();
delay.delayTime.value = 0.05;
// source --> delay --> ctx.destination
// oscillator --> oscillatorGain --> delay.delayTime --> ctx.destination
source.connect(delay);
delay.connect(ctx.destination);
oscillator.connect(oscillatorGain);
oscillatorGain.connect(delay.delayTime);
oscillator.start();
source.start();
let outputAudioBuffer = await ctx.startRendering();
return outputAudioBuffer;
}