i am trying to add a voice message feature to my chat app and i have a problem on playing the audio that i record.
recordAudio.js
import { onUnmounted } from 'vue'
const constraint = { audio: true }
let chunks = []
function record() {
let mediaRecorder
let stream
function close() {
// console.log(mediaRecorder?.state, 'state man')
if (mediaRecorder && mediaRecorder?.state == 'recording'){
mediaRecorder?.stop()
stream && (
stream.getTracks()
.forEach(track => track.stop())
)
}
}
onUnmounted(() => {
close()
})
async function start() {
if (navigator.mediaDevices.getUserMedia) {
const strm = await navigator.mediaDevices.getUserMedia(constraint)
// console.log(strm)
if (!strm) return false
// console.log('media', mediaRecorder)
stream = strm
mediaRecorder = new MediaRecorder(strm)
mediaRecorder.start(100)
// console.log('listingin for audio')
mediaRecorder.ondataavailable = (e) => {
// console.log(e.data)
chunks.push(e.data)
}
}
return true
}
function stop() {
close()
const blob = new Blob(chunks, { 'type' : 'audio/ogg; codecs=opus' });
console.log(chunks)
chunks = []
// const audioURL = URL.createObjectURL(blob)
return blob
}
return {
start,
stop
}
}
export {
record
}
this is a code fragment in a .vue file
import {record} from '#util/recordAudio.js'
const { start, stop } = record()
async function recordAudio() {
if (!recording.value) {
let res = await start()
res && (recording.value = true)
} else {
stop()
recording.value = false
}
}
function sendAudio() {
let audioBlob = stop()
recording.value = false
console.log(audioBlob)
messages.addMessage({
id: props.chat.id,
message: {
id: 1,
to: 2,
from: 1,
message: audioBlob,
seen: true,
type: 'audio',
createdAt: new Date()
}
})
}
let url = {}
function getObjectUrl(id, message) {
if (!url[id]) {
url[id] = URL.createObjectURL(message)
return url[id]
}
return url[id]
}
//this is the template for the audio
<div v-if='message.type == "audio"'>
<audio controls :src='getObjectUrl(message.id, message.message)'></audio>
</div>
it seems to work for the first time and it doesnt work after.
in firefox i get the warning/error
Media resource blob:http://localhost:5374/861d13c5-533f-4cd7-8608-68eecc7deb4e could not be decoded
Media resource blob:http://localhost:5374/861d13c5-533f-4cd7-8608-68eecc7deb4e could not be decoded, error: Error Code: NS_ERROR_DOM_MEDIA_METADATA_ERR (0x806e0006)
error message
I like to record my voice with some mp3 music that I choose from the video tag, When I change the src audio element, the new music doesn't record, MediaRecorder just records the first music, so I like to record all music coming from the video tag src.
let constraintObj = {
audio: 1,
video: 0}
navigator.mediaDevices
.getUserMedia(constraintObj)
.then((mediaStreamObj) => {
let start = document.getElementById("btnStart");
let stop = document.getElementById("btnStop");
let autostop = document.getElementById("autobtnStop");
let playAudio = document.getElementById("pma");
var playAudioStream = playAudio.captureStream();
let audioContext = new AudioContext();
const acsource = audioContext.createMediaElementSource(playAudio);
acsource && acsource.connect(audioContext.destination);
let dest = audioContext.createMediaStreamDestination();
let audioIn_01 = audioContext.createMediaStreamSource(mediaStreamObj);
let audioIn_02 = audioContext.createMediaStreamSource(playAudioStream);
audioIn_01.connect(dest);
audioIn_02 && audioIn_02.connect(dest);
let mediaRecorder = new MediaRecorder(dest.stream);
let chunks = [];
start.addEventListener("click", () => {
mediaRecorder.start();
console.log(mediaRecorder.state);
});
stop.addEventListener("click", () => {
mediaRecorder.stop();
});
autostop.addEventListener("click", () => {
if (mediaRecorder.state !== "inactive") {
mediaRecorder.stop();
}
});
mediaRecorder.ondataavailable = (ev) => {
chunks.push(ev.data);
if (selectedEpisodes !== null && sameDate <= 0) {
let blobFile = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
chunks = [];
const datas = {
name,
episode_id,
record_datetime: record_datetime,
playtime_sec: playtime_sec,
files: blobFile,
};
dispatch(editEpisodeStatus(epSelected_ID, { status: "finished" }));
dispatch(createPodcast(datas));
}
};
mediaRecorder.onstop = () => {
let blobFile = new Blob(chunks, { type: "audio/ogg; codecs=opus" });
chunks = [];
const datas = {
name,
episode_id,
record_datetime: record_datetime,
playtime_sec: playtime_sec,
files: blobFile
};
dispatch(editEpisodeStatus(epSelected_ID, { status: "finished" }));
dispatch(createPodcast(datas));
};
})
.catch((err) => {
console.log("catch startLive", err.name, err.message);
});
Hi, I like to record my voice with some mp3 music that I choose from the video tag, When I change the src audio element, the new music doesn't record, MediaRecorder just records the first music, so I like to record all music coming from the video tag src.
when i use chrome.tabCapture.capture() with MediaRecorder API to record stream original audio of tabs which i am capturing gets muted but the audio comes OK in recorded stream, i want the audio in the tab to run normally ....
class Recorder {
constructor(onChunksAvailable) {
this.chunks = [];
this.active = false;
this.callback = onChunksAvailable;
}
start(stream) {
if (this.active) {
throw new Error("recorder is already running");
}
this.recorder = new MediaRecorder(stream, {
mimeType: "audio/webm",
});
this.recorder.onstop = () => {
stream.getAudioTracks()[0].stop();
this.callback([...this.chunks]);
setTimeout(() => {
this.chunks = [];
});
this.active = false;
};
this.recorder.ondataavailable = (event) => this.chunks.push(event.data);
this.active = true;
this.recorder.start();
}
stop() {
if (!this.active) {
throw new Error("recorder is already stop");
} else {
this.recorder.stop();
}
}
}
let rec = new Recorder(async (chunks) => {
//using chunks then to get the stream
});
chrome.tabCapture.capture(
{
audio: true,
video: false,
},
function (stream) {
rec.start(stream);
}
Forgive me for lack of documentation as I last played with these APIs years ago, but MDN has some stuff.
In my case adding these 3 lines to the start function was fixed.
this.context = new AudioContext();
this.stream = this.context.createMediaStreamSource(stream);
this.stream.connect(this.context.destination);
class Recorder {
constructor(onChunksAvailable) {
this.chunks = [];
this.active = false;
this.callback = onChunksAvailable;
this.context = new AudioContext();
}
start(stream) {
if (this.active) {
throw new Error("recorder is already running");
}
// Reconnect the stream to actual output
this.stream = this.context.createMediaStreamSource(stream);
this.stream.connect(this.context.destination);
this.recorder = new MediaRecorder(stream, {
mimeType: "audio/webm",
});
this.recorder.onstop = () => {
stream.getAudioTracks()[0].stop();
this.callback([...this.chunks]);
setTimeout(() => {
this.chunks = [];
});
this.active = false;
};
this.recorder.ondataavailable = (event) => this.chunks.push(event.data);
this.active = true;
this.recorder.start();
}
stop() {
if (!this.active) {
throw new Error("recorder is already stop");
} else {
this.recorder.stop();
}
}
}
let rec = new Recorder(async (chunks) => {
//using chunks then to get the stream
});
chrome.tabCapture.capture(
{
audio: true,
video: false,
},
function (stream) {
rec.start(stream);
})
Sorry for lack of details, but I believe when you start an audio capture it disconnects the stream from the default output (speakers). By creating a secondary MediaStreamSource and connecting it to the default output (AudioContext.destination) you can allow the stream to continue outputting to speakers while being input to your recorder.
Sources
MDN: AudioContext
MDN: MediaStreamSource
Chrome extension I made 2 years ago
I am creating an application using Electron.
Therefore, I want to output SpeechSynthesis voice to wav file.
This is only Japanese voice.
(So I can't use library like meSpeak.)
And I want to use the voice installed in local PC. (like 唄詠, Softalk, etc, ...)
It was unable to output normally if it was the following code.
Although you can output the file itself,
the content is unintended.
The tested environment is macOS 10.13.1.
How should I do it?
Main Process:
// This code is a part.
// Actually there are many different properties,
// This code shows only the minimum necessary part.
let obj = {
speech: "コンニチワ", // it was converted from sentence to reading katakana
cast: {
voice: {
pitch: 100,
speed: 100,
volume: 50.0,
name: ""
}
}
}
// mainWindow is BrowserWindow
mainWindow.webContents.send('record-speech', obj, "example.wav")
Renderer Process:
ipcRenderer.on('record-speech', (event, obj, path) => {
// Setup utterance
let voice = null
speechSynthesis.onvoiceschanged = () => {
voice = speechSynthesis.getVoices().find(({
name: _name
}) => _name === obj.cast.voice.name)
}
let utter = new SpeechSynthesisUtterance()
utter.lang = "ja-JP"
utter.text = obj.speech
utter.pitch = obj.cast.voice.pitch / 100
utter.rate = obj.cast.voice.speed / 20
utter.voice = voice
utter.volume = obj.cast.voice.volume / 100
let audioContext = new AudioContext()
navigator.getUserMedia({audio: true}, (stream) => {
let audioInput = audioContext.createMediaStreamSource(stream)
// Using 'wave-recorder' package of npm
let recorder = WaveRecorder(audioContext, {
channels: 2,
bitDepth: 32,
silenceDuration: 0.1
})
audioInput.connect(recorder.input)
let fileStream = fs.createWriteStream(path)
recorder.pipe(fileStream)
recorder.on('header', (header) => {
let headerStream = fs.createWriteStream(path, {
start: 0,
flags: 'w'
})
headerStream.write(header)
headerStream.end()
})
utter.onend = ((event) => {
console.log("Speak ended");
})
speechSynthesis.speak(utter)
}, (e) => { console.log(e); })
})
Previous questions have presented this same or similar inquiry
Can Web Speech API used in conjunction with Web Audio API?
How to access audio result from Speech Synthesis API?
Record HTML5 SpeechSynthesisUtterance generated speech to file
generate audio file with W3C Web Speech API
yet no workarounds appear to be have been created using window.speechSynthesis(). Though there are workarounds using epeak , meSpeak How to create or convert text to audio at chromium browser? or making requests to external servers.
How to capture and record audio output of window.speechSynthesis.speak() call and return result as a Blob, ArrayBuffer, AudioBuffer or other object type?
The Web Speech API Specification does not presently provide a means or hint on how to achieve returning or capturing and recording audio output of window.speechSynthesis.speak() call.
See also
MediaStream, ArrayBuffer, Blob audio result from speak() for recording?
Re: MediaStream, ArrayBuffer, Blob audio result from speak() for recording?
Re: MediaStream, ArrayBuffer, Blob audio result from speak() for recording?. In pertinent part, use cases include, but are not limited to
Persons who have issues speaking; i.e.g., persons whom have suffered a
stroke or other communication inhibiting afflictions. They could convert
text to an audio file and send the file to another individual or group.
This feature would go towards helping them communicate with other persons,
similar to the technologies which assist Stephen Hawking communicate;
Presently, the only person who can hear the audio output is the person
in front of the browser; in essence, not utilizing the full potential of
the text to speech functionality. The audio result can be used as an
attachment within an email; media stream; chat system; or other
communication application. That is, control over the generated audio output;
Another application would be to provide a free, libre, open source audio
dictionary and translation service - client to client and client to server,
server to client.
It is possible to capture the output of audio output of window.speechSynthesis.speak() call utilizing navigator.mediaDevices.getUserMedia() and MediaRecorder(). The expected result is returned at Chromium browser. Implementation at Firefox has issues. Select Monitor of Built-in Audio Analog Stereo at navigator.mediaDevices.getUserMedia() prompt.
The workaround is cumbersome. We should be able to get generated audio, at least as a Blob, without navigator.mediaDevices.getUserMedia() and MediaRecorder().
More interest is evidently necessary by users of browsers, JavaScript and C++ developers, browser implementers and specification authors for further input; to create a proper specification for the feature, and consistent implementation at browsers' source code; see How to implement option to return Blob, ArrayBuffer, or AudioBuffer from window.speechSynthesis.speak() call.
At Chromium a speech dispatcher program should be installed and the instance launched with --enable-speech-dispatcher flag set, as window.speechSynthesis.getVoices() returns an empty array, see How to use Web Speech API at chromium?.
Proof of concept
// SpeechSynthesisRecorder.js guest271314 6-17-2017
// Motivation: Get audio output from `window.speechSynthesis.speak()` call
// as `ArrayBuffer`, `AudioBuffer`, `Blob`, `MediaSource`, `MediaStream`, `ReadableStream`, or other object or data types
// See https://lists.w3.org/Archives/Public/public-speech-api/2017Jun/0000.html
// https://github.com/guest271314/SpeechSynthesisRecorder
// Configuration: Analog Stereo Duplex
// Input Devices: Monitor of Built-in Audio Analog Stereo, Built-in Audio Analog Stereo
class SpeechSynthesisRecorder {
constructor({text = "", utteranceOptions = {}, recorderOptions = {}, dataType = ""}) {
if (text === "") throw new Error("no words to synthesize");
this.dataType = dataType;
this.text = text;
this.mimeType = MediaRecorder.isTypeSupported("audio/webm; codecs=opus")
? "audio/webm; codecs=opus" : "audio/ogg; codecs=opus";
this.utterance = new SpeechSynthesisUtterance(this.text);
this.speechSynthesis = window.speechSynthesis;
this.mediaStream_ = new MediaStream();
this.mediaSource_ = new MediaSource();
this.mediaRecorder = new MediaRecorder(this.mediaStream_, {
mimeType: this.mimeType,
bitsPerSecond: 256 * 8 * 1024
});
this.audioContext = new AudioContext();
this.audioNode = new Audio();
this.chunks = Array();
if (utteranceOptions) {
if (utteranceOptions.voice) {
this.speechSynthesis.onvoiceschanged = e => {
const voice = this.speechSynthesis.getVoices().find(({
name: _name
}) => _name === utteranceOptions.voice);
this.utterance.voice = voice;
console.log(voice, this.utterance);
}
this.speechSynthesis.getVoices();
}
let {
lang, rate, pitch
} = utteranceOptions;
Object.assign(this.utterance, {
lang, rate, pitch
});
}
this.audioNode.controls = "controls";
document.body.appendChild(this.audioNode);
}
start(text = "") {
if (text) this.text = text;
if (this.text === "") throw new Error("no words to synthesize");
return navigator.mediaDevices.getUserMedia({
audio: true
})
.then(stream => new Promise(resolve => {
const track = stream.getAudioTracks()[0];
this.mediaStream_.addTrack(track);
// return the current `MediaStream`
if (this.dataType && this.dataType === "mediaStream") {
resolve({tts:this, data:this.mediaStream_});
};
this.mediaRecorder.ondataavailable = event => {
if (event.data.size > 0) {
this.chunks.push(event.data);
};
};
this.mediaRecorder.onstop = () => {
track.stop();
this.mediaStream_.getAudioTracks()[0].stop();
this.mediaStream_.removeTrack(track);
console.log(`Completed recording ${this.utterance.text}`, this.chunks);
resolve(this);
}
this.mediaRecorder.start();
this.utterance.onstart = () => {
console.log(`Starting recording SpeechSynthesisUtterance ${this.utterance.text}`);
}
this.utterance.onend = () => {
this.mediaRecorder.stop();
console.log(`Ending recording SpeechSynthesisUtterance ${this.utterance.text}`);
}
this.speechSynthesis.speak(this.utterance);
}));
}
blob() {
if (!this.chunks.length) throw new Error("no data to return");
return Promise.resolve({
tts: this,
data: this.chunks.length === 1 ? this.chunks[0] : new Blob(this.chunks, {
type: this.mimeType
})
});
}
arrayBuffer(blob) {
if (!this.chunks.length) throw new Error("no data to return");
return new Promise(resolve => {
const reader = new FileReader;
reader.onload = e => resolve(({
tts: this,
data: reader.result
}));
reader.readAsArrayBuffer(blob ? new Blob(blob, {
type: blob.type
}) : this.chunks.length === 1 ? this.chunks[0] : new Blob(this.chunks, {
type: this.mimeType
}));
});
}
audioBuffer() {
if (!this.chunks.length) throw new Error("no data to return");
return this.arrayBuffer()
.then(ab => this.audioContext.decodeAudioData(ab))
.then(buffer => ({
tts: this,
data: buffer
}))
}
mediaSource() {
if (!this.chunks.length) throw new Error("no data to return");
return this.arrayBuffer()
.then(({
data: ab
}) => new Promise((resolve, reject) => {
this.mediaSource_.onsourceended = () => resolve({
tts: this,
data: this.mediaSource_
});
this.mediaSource_.onsourceopen = () => {
if (MediaSource.isTypeSupported(this.mimeType)) {
const sourceBuffer = this.mediaSource_.addSourceBuffer(this.mimeType);
sourceBuffer.mode = "sequence"
sourceBuffer.onupdateend = () =>
this.mediaSource_.endOfStream();
sourceBuffer.appendBuffer(ab);
} else {
reject(`${this.mimeType} is not supported`)
}
}
this.audioNode.src = URL.createObjectURL(this.mediaSource_);
}));
}
readableStream({size = 1024, controllerOptions = {}, rsOptions = {}}) {
if (!this.chunks.length) throw new Error("no data to return");
const src = this.chunks.slice(0);
const chunk = size;
return Promise.resolve({
tts: this,
data: new ReadableStream(controllerOptions || {
start(controller) {
console.log(src.length);
controller.enqueue(src.splice(0, chunk))
},
pull(controller) {
if (src.length = 0) controller.close();
controller.enqueue(src.splice(0, chunk));
}
}, rsOptions)
});
}
}
Usage
let ttsRecorder = new SpeechSynthesisRecorder({
text: "The revolution will not be televised",
utternanceOptions: {
voice: "english-us espeak",
lang: "en-US",
pitch: .75,
rate: 1
}
});
// ArrayBuffer
ttsRecorder.start()
// `tts` : `SpeechSynthesisRecorder` instance, `data` : audio as `dataType` or method call result
.then(tts => tts.arrayBuffer())
.then(({tts, data}) => {
// do stuff with `ArrayBuffer`, `AudioBuffer`, `Blob`,
// `MediaSource`, `MediaStream`, `ReadableStream`
// `data` : `ArrayBuffer`
tts.audioNode.src = URL.createObjectURL(new Blob([data], {type:tts.mimeType}));
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
})
// AudioBuffer
ttsRecorder.start()
.then(tts => tts.audioBuffer())
.then(({tts, data}) => {
// `data` : `AudioBuffer`
let source = tts.audioContext.createBufferSource();
source.buffer = data;
source.connect(tts.audioContext.destination);
source.start()
})
// Blob
ttsRecorder.start()
.then(tts => tts.blob())
.then(({tts, data}) => {
// `data` : `Blob`
tts.audioNode.src = URL.createObjectURL(blob);
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
})
// ReadableStream
ttsRecorder.start()
.then(tts => tts.readableStream())
.then(({tts, data}) => {
// `data` : `ReadableStream`
console.log(tts, data);
data.getReader().read().then(({value, done}) => {
tts.audioNode.src = URL.createObjectURL(value[0]);
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
})
})
// MediaSource
ttsRecorder.start()
.then(tts => tts.mediaSource())
.then(({tts, data}) => {
console.log(tts, data);
// `data` : `MediaSource`
tts.audioNode.srcObj = data;
tts.audioNode.title = tts.utterance.text;
tts.audioNode.onloadedmetadata = () => {
console.log(tts.audioNode.duration);
tts.audioNode.play();
}
})
// MediaStream
let ttsRecorder = new SpeechSynthesisRecorder({
text: "The revolution will not be televised",
utternanceOptions: {
voice: "english-us espeak",
lang: "en-US",
pitch: .75,
rate: 1
},
dataType:"mediaStream"
});
ttsRecorder.start()
.then(({tts, data}) => {
// `data` : `MediaStream`
// do stuff with active `MediaStream`
})
.catch(err => console.log(err))
plnkr
This is an updated code from previous answer which works in Chrome 96:
make sure to select "Share system audio" checkbox in "Choose what to share" window
won't run via SO code snippet (save to demo.html)
<script>
(async () => {
const text = "The revolution will not be televised";
const blob = await new Promise(async resolve => {
console.log("picking system audio");
const stream = await navigator.mediaDevices.getDisplayMedia({video:true, audio:true});
const track = stream.getAudioTracks()[0];
if(!track)
throw "System audio not available";
stream.getVideoTracks().forEach(track => track.stop());
const mediaStream = new MediaStream();
mediaStream.addTrack(track);
const chunks = [];
const mediaRecorder = new MediaRecorder(mediaStream, {bitsPerSecond:128000});
mediaRecorder.ondataavailable = event => {
if (event.data.size > 0)
chunks.push(event.data);
}
mediaRecorder.onstop = () => {
stream.getTracks().forEach(track => track.stop());
mediaStream.removeTrack(track);
resolve(new Blob(chunks));
}
mediaRecorder.start();
const utterance = new SpeechSynthesisUtterance(text);
utterance.onend = () => mediaRecorder.stop();
window.speechSynthesis.speak(utterance);
console.log("speaking...");
});
console.log("audio available", blob);
const player = new Audio();
player.src = URL.createObjectURL(blob);
player.autoplay = true;
player.controls = true;
})()
</script>