Remove high frequency sound from streaming audio node js - javascript

I have a small app that accepts incoming audio stream from the internet and I'm trying to find the frequency of a tone or continuous beep. At the time of the tone / beep it is the only thing that would be playing. The rest of the audio is either silence or talking. I'm using the node-pitchfinder npm module to find the tone and when I use a sample audio clip I made of 2,000Hz the app prints out the frequency within one or two Hz. When I pull the audio stream online I keep getting results like 17,000 Hz. My guess is that there is some "noise" in the audio signal and that's what the node-pitchfinder module is picking up.
Is there any way I can filter out that noise in real time to get an accurate frequency?
The streaming audio file is: http://relay.broadcastify.com/fq85hty701gnm4z.mp3
Code below:
const fs = require('fs');
const fsa = require('fs-extra');
const Lame = require('lame');
const Speaker = require('speaker');
const Volume = require('pcm-volume');
const Analyser = require('audio-analyser')
const request = require('request')
const Chunker = require('stream-chunker');
const { YIN } = require('node-pitchfinder')
const detectPitch = YIN({ sampleRate: 44100})
//const BUFSIZE = 64;
const BUFSIZE = 500;
var decoder = new Lame.Decoder();
decoder.on('format', function(format){onFormat(format)});
var chunker = Chunker(BUFSIZE);
chunker.pipe(decoder);
var options = {
url: 'http://relay.broadcastify.com/fq85hty701gnm4z.mp3',
headers: {
"Upgrade-Insecure-Requests": 1,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15"
}
}
var audio_stream = request(options);
//var audio_stream = fs.createReadStream('./2000.mp3');
audio_stream.pipe(chunker);
function onFormat(format)
{
//if (volume == "undefined")
volume = 1.0;
vol = new Volume(volume);
speaker = new Speaker(format);
analyser = createAnalyser(format);
analyser.on('data', sample);
console.log(format);
vol.pipe(speaker);
vol.pipe(analyser);
decoder.pipe(vol);
vol.setVolume(volume);
}
function createAnalyser(format)
{
return new Analyser({
fftSize: 8,
bufferSize: BUFSIZE,
'pcm-stream': {
channels: format.channels,
sampleRate: format.sampleRate,
bitDepth: format.bitDepth
}
});
}
var logFile = 'log.txt';
var logOptions = {flag: 'a'};
function sample()
{
if (analyser) {
const frequency = detectPitch(analyser._data)
console.log(frequency)
}
}
My goal is to find the most dominant audio frequency in a chunk of data so I can figure out the tone.
I found some code that supposedly does this with python
def getFreq( pkt ):
#Use FFT to determine the peak frequency of the last chunk
thefreq = 0
if len(pkt) == bufferSize*swidth:
indata = np.array(wave.struct.unpack("%dh"%(len(pkt)/swidth), pkt))*window
# filter out everything outside of our bandpass Hz
bp = np.fft.rfft(indata)
minFilterBin = (bandPass[0]/(sampleRate/bufferSize)) + 1
maxFilterBin = (bandPass[1]/(sampleRate/bufferSize)) - 1
for i in range(len(bp)):
if i < minFilterBin:
bp[i] = 0
if i > maxFilterBin:
bp[i] = 0
# Take the fft and square each value
fftData = abs(bp)**2
# find the maximum
which = fftData[1:].argmax() + 1
# Compute the magnitude of the sample we found
dB = 10*np.log10(1e-20+abs(bp[which]))
#avgdB = 10*np.log10(1e-20+abs(bp[which - 10:which + 10].mean()))
if dB >= minDbLevel:
# use quadratic interpolation around the max
if which != len(fftData)-1:
warnings.simplefilter("error")
try:
y0, y1, y2 = np.log(fftData[which-1:which+2:])
x1 = (y2 - y0) * .5 / (2 * y1 - y2 - y0)
except RuntimeWarning:
return(-1)
# find the frequency and output it
warnings.simplefilter("always")
thefreq = (which + x1) * sampleRate/bufferSize
else:
thefreq = which * sampleRate/bufferSize
else:
thefreq = -1
return(thefreq)

Original answer:
I can not provide you with a solution but (hopefully) give you enough advice to solve the problem.
I would recommend that you save a part of the stream you want to analyze to a file and then take a look at the file with a spectrum analyzer (e.g. with Audacity). This allows you to determine if the 17kHz signal is present in the audio stream.
If the 17 kHz signal is present in the audio stream then you can filter the audio stream with a low pass filter (e.g. audio-biquad with type lowpass and frequency at somewhere above 2 kHz).
If the 17 kHz signal is not present in the audio then you could try to increase the buffer size BUFSIZE (currently set to 500 in your code). In the example on node-pitchfinder's GitHub page they use a complete audio file for pitch detection. Depending on how the pitch detection algorithm is implemented the result might be different for larger chunks of audio data (i.e. a few seconds) compared to very short chunks (500 samples is around 11 ms at sample rate 44100). Start with a large value for BUFSIZE (e.g. 44100 -> 1 second) and see if it makes a difference.
Explanation of the python code: The code uses FFT (fast fourier transform) to find out which frequencies are present in the audio signal and then searches for the frequency with the highest value. This usually works well for simple signals like a 2 kHz sine wave. You could use dsp.js which provides an FFT implementation if you want to implement it in javascript. However, it is quite a challenge to get this right without some knowledge of digital signal processing theory.
As a side note: the YIN algorithm does not use FFT, it is based on autocorrelation.
Update
The following script uses the fft data of audio-analyser and searches for the maximum frequency. This approach is very basic and only works well for signals where just one frequency is very dominant. The YIN algorithm is much better suited for pitch detection than this example.
const fs = require('fs');
const Lame = require('lame');
const Analyser = require('audio-analyser')
const Chunker = require('stream-chunker');
var analyser;
var fftSize = 4096;
var decoder = new Lame.Decoder();
decoder.on('format', format => {
analyser = createAnalyser(format);
decoder.pipe(analyser);
analyser.on('data', processSamples);
console.log(format);
});
var chunker = Chunker(fftSize);
var audio_stream = fs.createReadStream('./sine.mp3');
audio_stream.pipe(chunker);
chunker.pipe(decoder);
function createAnalyser(format) {
return new Analyser({
fftSize: fftSize,
frequencyBinCount: fftSize / 2,
sampleRate: format.sampleRate,
channels: format.channels,
bitDepth: format.bitDepth
});
}
function processSamples() {
if (analyser) {
var fftData = new Uint8Array(analyser.frequencyBinCount);
analyser.getByteFrequencyData(fftData);
var maxBin = fftData.indexOf(Math.max(...fftData));
var thefreq = maxBin * analyser.sampleRate / analyser.fftSize;
console.log(maxBin + " " + thefreq);
}
}

Related

How to Play RAW Audio Files?

I am currently working on a project that consists of a chart that shows audio levels picked up by another device. The charts are made through the flot API and I have zooming and selecting capabilities in order to select a time range on the chart and zoom into that selected region. My next step is to allow the user to listen to the audio that corresponds to that region of the chart. I have the audio files stored on a shared server and all of the files are in individual, minute by minute, RAW data files. I have no experience with using audio in a webpage and am currently struggling on how to complete this task. As far as I have found, the <audio> HTML tag is incapable of processing RAW data files for playback. I have been looking into the Web Audio API but am confused about how it works and how to implement it.
My first question is how do I go about decoding RAW audio files from a server and displaying them on an HTML page for a client to listen to?
My second task is to grab all of the audio files corresponding to the selected range and combine them into one audio output. For example, if the client selected a time range of 1:00pm - 1:50pm, I would need to access 50 RAW data audio files each a minute in length. I would then want to combine them together to produce one single playback sound. Therefore, my second question is if anyone knows a way to accomplish this smoothly.
Thank you for any help anyone has to offer!
RAW files are already decoded PCM audio, but Audio elements can't play PCM directly. You'll need to append a RIFF/WAV header to the PCM bytes first. Multiple RAW files could be combined, setting the total sample/frame length in the header. 50 minutes of decoded audio will take up a lot of memory in the browser, so keep an eye on that and measure/optimize accordingly.
initAudio()
async function initAudio() {
// specify your file and its audio properties
const url = 'https://dev.anthum.com/audio-worklet/audio/decoded-left.raw'
const sampleRate = 48000
const numChannels = 1 // mono or stereo
const isFloat = true // integer or floating point
const buffer = await (await fetch(url)).arrayBuffer()
// create WAV header
const [type, format] = isFloat ? [Float32Array, 3] : [Uint8Array, 1]
const wavHeader = new Uint8Array(buildWaveHeader({
numFrames: buffer.byteLength / type.BYTES_PER_ELEMENT,
bytesPerSample: type.BYTES_PER_ELEMENT,
sampleRate,
numChannels,
format
}))
// create WAV file with header and downloaded PCM audio
const wavBytes = new Uint8Array(wavHeader.length + buffer.byteLength)
wavBytes.set(wavHeader, 0)
wavBytes.set(new Uint8Array(buffer), wavHeader.length)
// show audio player
const audio = document.querySelector('audio')
const blob = new Blob([wavBytes], { type: 'audio/wav' })
audio.src = URL.createObjectURL(blob)
document.querySelector('#loading').hidden = true
audio.hidden = false
}
// adapted from https://gist.github.com/also/900023
function buildWaveHeader(opts) {
const numFrames = opts.numFrames;
const numChannels = opts.numChannels || 2;
const sampleRate = opts.sampleRate || 44100;
const bytesPerSample = opts.bytesPerSample || 2;
const format = opts.format
const blockAlign = numChannels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataSize = numFrames * blockAlign;
const buffer = new ArrayBuffer(44);
const dv = new DataView(buffer);
let p = 0;
function writeString(s) {
for (let i = 0; i < s.length; i++) {
dv.setUint8(p + i, s.charCodeAt(i));
}
p += s.length;
}
function writeUint32(d) {
dv.setUint32(p, d, true);
p += 4;
}
function writeUint16(d) {
dv.setUint16(p, d, true);
p += 2;
}
writeString('RIFF'); // ChunkID
writeUint32(dataSize + 36); // ChunkSize
writeString('WAVE'); // Format
writeString('fmt '); // Subchunk1ID
writeUint32(16); // Subchunk1Size
writeUint16(format); // AudioFormat
writeUint16(numChannels); // NumChannels
writeUint32(sampleRate); // SampleRate
writeUint32(byteRate); // ByteRate
writeUint16(blockAlign); // BlockAlign
writeUint16(bytesPerSample * 8); // BitsPerSample
writeString('data'); // Subchunk2ID
writeUint32(dataSize); // Subchunk2Size
return buffer;
}
body {
text-align: center;
padding-top: 1rem;
}
[hidden] {
display: none;
}
audio {
display: inline-block;
}
<div id="loading">Loading...</div>
<audio hidden controls></audio>
An alternative that might be a bit easier with Web Audio, you can basically do the same as above, but don't use an Audio element. If necessary convert the raw audio data to a float array, say, f, and do something like this:
// Only need to do this once when setting up the page
let c = new AudioContext();
// Do this for each clip:
let b = new AudioBuffer({length: f.length, sampleRate: c.sampleRate});
b.copyToChannel(f, 0);
let s = new AudioBufferSourceNode(c, {buffer: b});
s.connect(c.destination);
s.start();
This is rough sketch of how to use Web Audio to do the playback. It can be refined to reuse AudioBuffers. And you have to take care of calling s.start() with the right time values. But I hope this is enough to get you started. If not, please ask more questions.

NodeJS: Capturing a stereo PCM wave stream into mono AudioBuffer

I'm recording audio from nodejs using node-microphone (which is just a javascript interface for arecord), and want to store the stream chunks in an AudioBuffer using web-audio-api (which is a nodejs implementation of the Web Audio API).
My audio source has two channels while my AudioBuffer has only one (in purpose).
This is my working configuration for recording audio with arecord through my USB sound card (I'm using a Raspberry pi 3 running on Raspbian buster):
arecord -D hw:1,0 -c 2 -f S16_LE -r 44100
Running this command with an output path and playing the resulting wav file with aplay works just fine. So node-microphone is able to record audio with these parameters, and at the end I get a nodejs readable stream flowing wave data.
But
I'm struggling doing the bridge from the stream chunks (Buffer instances) to the AudioBuffer. More precisely; I'm not sure of the format of the incoming data, not sure of the destination format, and not sure of how I would do the conversion whatever:
The stream chunks are Buffers so they also are Uint8Arrays. Regarding my configuration, I guess they are binary representations of 16 bits signed integers (little endian, I don't know what it means).
The AudioBuffer holds multiple buffers (one per channel, so only one in my case) that I can access as Float32Arrays by calling AudioBuffer.prototype.getChannelData(). MDN also says:
The buffer contains data in the following format: non-interleaved IEEE754 32-bit linear PCM with a nominal range between -1 and +1, that is, 32bits floating point buffer, with each samples between -1.0 and 1.0.
The point is to find what I have to extract from the incoming Buffers and how I should transform it so it's suitable for the Float32Array destination (and remains valid wave data), knowing that the audio source is stereo and the AudioBuffer isn't.
My best contender so far was the Buffer.prototype.readFloatLE() method whose name looks like it would solve my problem, but this wasn't a success (just noise).
My first try (before doing research) was just to naively copy buffer data to Float32Array and interleaving indexes to handle stereo/mono conversion. Obviously it mostly produced noise but I could hear some of the sound I recorded (incredibly distorted but surely present) so I guess I should mention that.
This is a simplified version of my naive try (I'm aware this is not meant to work well, I just include it in my question as a base of discussion):
import { AudioBuffer } from 'web-audio-api'
import Microphone from 'node-microphone'
const rate = 44100
const channels = 2 // Number of source channels
const microphone = new Microphone({ // These parameters result to the arecord command above
channels,
rate,
device: 'hw:1,0',
bitwidth: 16,
endian: 'little',
encoding: 'signed-integer'
})
const audioBuffer = new AudioBuffer(
1, // 1 channel
30 * rate, // 30 seconds buffer
rate
})
const chunks = []
const data = audioBuffer.getChannelData(0) // This is the Float32Array
const stream = microphone.startRecording()
setTimeout(() => microphone.stopRecording(), 5000) // Recording for 5 seconds
stream.on('data', chunk => chunks.push(chunk))
stream.on('close', () => {
chunks.reduce((offset, chunk) => {
for (var index = 0; index < chunk.length; index += channels) {
let value = 0
for (var channel = 0; channel < channels; channel++) {
value += chunk[index + channel]
}
data[(offset + index) / channels] = value / channels // Average value from the two channels
}
return offset + chunk.length // Since data comes as chunks, this offsets AudioBuffer's index
}, 0)
})
I would be really grateful if you could help :)
So the input stereo signal is coming as 16 bits signed integers, interleaving left and right channels, meaning that the corresponding buffers (8 bits unsigned integers) have this format for a single stereo sample:
[LEFT ] 8 bits (LSB)
[LEFT ] 8 bits (MSB)
[RIGHT] 8 bits (LSB)
[RIGHT] 8 bits (MSB)
Since arecord is configured with little endian format, the Least Significant Byte (LSB) comes first, and the Most Significant Byte (MSB) comes next.
The AudioBuffer single channel buffer, represented by a Float32Array, expects values between -1 and 1 (one value per sample).
So to map values from the input Buffer to the destination Float32Array, I had to use the Buffer.prototype.readInt16LE(offset) method incrementing the bytes offset parameter by 4 each sample (2 left bytes + 2 right bytes = 4 bytes), and interpolating input values from range [-32768;+32768] (16 bits signed integer range) to range [-1;+1]:
import { AudioBuffer } from 'web-audio-api'
import Microphone from 'node-microphone'
const rate = 44100
const channels = 2 // 2 input channels
const microphone = new Microphone({
channels,
rate,
device: 'hw:1,0',
bitwidth: 16,
endian: 'little',
encoding: 'signed-integer'
})
const audioBuffer = new AudioBuffer(
1, // 1 channel
30 * rate, // 30 seconds buffer
rate
})
const chunks = []
const data = audioBuffer.getChannelData(0)
const stream = microphone.startRecording()
setTimeout(() => microphone.stopRecording(), 5000) // Recording for 5 seconds
stream.on('data', chunk => chunks.push(chunk))
stream.on('close', () => {
chunks.reduce((offset, chunk) => {
for (var index = 0; index < chunk.length; index += channels + 2) {
let value = 0
for (var channel = 0; channel < channels; channel++) {
// Iterates through input channels and adds the values
// of all the channel so we can compute the
// average value later to reduce them into a mono signal
// Multiplies the channel index by 2 because
// there are 2 bytes per channel sample
value += chunk.readInt16LE(index + channel * 2)
}
// Interpolates index according to the number of input channels
// (also divides it by 2 because there are 2 bytes per channel sample)
// and computes average value as well as the interpolation
// from range [-32768;+32768] to range [-1;+1]
data[(offset + index) / channels / 2] = value / channels / 32768
}
return offset + chunk.length
}, 0)
})

How to generate HTML5 video volume level chart?

Given a plain web video of say 30s:
<video src="my-video.mp4"></video>
How could I generate its volume level chart?
volume|
level| ******
| * * **
| * * * **
|** * *** *
| ** * * * *
+---------------*-*-----************------+--- time
0 30s
video is and quiet
loud here here
Note:
Plain JavaScript, please. No libraries.
There are several ways to do this depending on what the usage is.
For accuracy you could measure in conventional volumes and units such as RMS, LUFS/LKFS (K-weighted, loudness), dBFS (full-scale dB) and so forth.
The simple naive approach is to just plot the peaks of the waveform. You would be interested in the positive values only. To just get the peaks you would detect direction between two points and log the first point when the direction changes from upward to downwards (p0 > p1).
For all approaches you can finally apply some form of smoothing such as weighted moving average (example) or a generic smoothing algorithm to remove small peaks and changes, in case of RMS, dB etc. you would use a window size which can be combined with bin-smoothing (an average per segment).
To plot you will obtain the value for the current sample, assume it to be normalized and draw it as line or point to canvas scaled by plot area height.
Mini-discussion as to loading the source data
To address some of the questions in the comments; these are just off the top of my heads to give some pointers -
Since Web Audio API cannot do streaming on its own you have to load the entire file into memory and decode the audio track into a buffer.
Pros: works (analysis part), fast analysis when data is eventually ready, works fine for smaller files, if cached the URL can be used without re-downloading
Cons: long initial load time/bad UX, possible memory hog/not good for large files, audio is "detached" from video sync-wise, forces reuse of URL*, if large and/or cache is not in place the file will have to be downloaded again/streamed, currently causes issues in some browsers/versions (see example below).
*: There is always the option of storing the downloaded video as blob in IndexedDB (with its implications) and use an Object-URL with that blob to stream in the video element (may require MSE to work properly, haven't tried myself).
Plotting while streaming:
Pros: Cheap on memory/resources
Cons: the plot cannot be shown in full until the entire file has been played through, the user may skip/jump parts, may not finish
Side-loading a low-quality mono audio-only file:
Pros: audio can be loaded into memory independent of video file, results in good enough approximation for level use
Cons: can delay initial loading of video, may not be ready in time before video starts, will require additional processing in advance
Server-side plotting:
Pros: can be plotted when uploaded, can store raw plot data that is provided as meta-data when video is requested, low bandwidth, data ready when video starts (assuming data is representing averages over time-segments).
Cons: require infrastructure on server that can separate, analyze and produce the plot-data, depending on how the data is stored may require database modification.
I've might left out or missed some points, but it should give the general idea...
Example
This example measures conventional dB of a given window size per sample. The bigger the window size the smoother the result, but will also take more time to calculate.
Note that for simplicity in this example pixel position determines the dB window range. This may produce uneven gaps/overlaps depending on buffer size affecting the current sample value, but should work for the purpose demonstrated here. Also for simplicity I am scaling the dB reading by dividing it by 40, a somewhat arbitrary number here (ABS is just for the plotting and the way my brain worked (?) in the late night/early morning when I made this :) ).
I added bin/segment-smoothing in red on top to better show longer-term audio variations relevant to things such as auto-leveling.
I'm using a audio source here but you can plug in a video source instead as long as it contains an audio track format that can be decoded (aac, mp3, ogg etc.).
Besides from that, the example is just that, an example. It's not production code so take it for what it is worth. Make adjustments as needed.
(for some reason the audio won't play in Firefox v58beta, it will plot though. Audio plays in Chrome, FF58dev).
var ctx = c.getContext("2d"), ref, audio;
var actx = new (AudioContext || webkitAudioContext)();
var url = "//dl.dropboxusercontent.com/s/a6s1qq4lnwj46uj/testaudiobyk3n_lo.mp3";
ctx.font = "20px sans-serif";
ctx.fillText("Loading and processing...", 10, 50);
ctx.fillStyle = "#001730";
// Load audio
fetch(url, {mode: "cors"})
.then(function(resp) {return resp.arrayBuffer()})
.then(actx.decodeAudioData.bind(actx))
.then(function(buffer) {
// Get data from channel 0 (you will want to measure all/avg.)
var channel = buffer.getChannelData(0);
// dB per window + Plot
var points = [0];
ctx.clearRect(0, 0, c.width, c.height);
ctx.moveTo(x, c.height);
for(var x = 1, i, v; x < c.width; x++) {
i = ((x / c.width) * channel.length)|0; // get index in buffer based on x
v = Math.abs(dB(channel, i, 8820)) / 40; // 200ms window, normalize
ctx.lineTo(x, c.height * v);
points.push(v);
}
ctx.fill();
// smooth using bins
var bins = 40; // segments
var range = (c.width / bins)|0;
var sum;
ctx.beginPath();
ctx.moveTo(0,c.height);
for(x = 0, v; x < points.length; x++) {
for(v = 0, i = 0; i < range; i++) {
v += points[x++];
}
sum = v / range;
ctx.lineTo(x - (range>>1), sum * c.height); //-r/2 to compensate visually
}
ctx.lineWidth = 2;
ctx.strokeStyle = "#c00";
ctx.stroke();
// for audio / progressbar only
c.style.backgroundImage = "url(" + c.toDataURL() + ")";
c.width = c.width;
ctx.fillStyle = "#c00";
audio = document.querySelector("audio");
audio.onplay = start;
audio.onended = stop;
audio.style.display = "block";
});
// calculates RMS per window and returns dB
function dB(buffer, pos, winSize) {
for(var rms, sum = 0, v, i = pos - winSize; i <= pos; i++) {
v = i < 0 ? 0 : buffer[i];
sum += v * v;
}
rms = Math.sqrt(sum / winSize); // corrected!
return 20 * Math.log10(rms);
}
// for progress bar (audio)
function start() {if (!ref) ref = requestAnimationFrame(progress)}
function stop() {cancelAnimationFrame(ref);ref=null}
function progress() {
var x = audio.currentTime / audio.duration * c.width;
ctx.clearRect(0,0,c.width,c.height);
ctx.fillRect(x-1,0,2,c.height);
ref = requestAnimationFrame(progress)
}
body {background:#536375}
#c {border:1px solid;background:#7b8ca0}
<canvas id=c width=640 height=300></canvas><br>
<audio style="display:none" src="//dl.dropboxusercontent.com/s/a6s1qq4lnwj46uj/testaudiobyk3n_lo.mp3" controls></audio>

Web Audio API creating a Peak Meter with AnalyserNode

What is the correct way to implement a Peak Meter like those in Logic Pro with the Web Audio API AnalyserNode?
I know AnalyserNode.getFloatFrequencyData() returns decibel values, but how do you combine those values to get the one to be displayed in the meter? Do you just take the maximum value like in the following code sample (where analyserData comes from getFloatFrequencyData():
let peak = -Infinity;
for (let i = 0; i < analyserData.length; i++) {
const x = analyserData[i];
if (x > peak) {
peak = x;
}
}
Inspecting some output from just taking the max makes it look like this is not the correct approach. Am I wrong?
Alternatively, would it be a better idea to use a ScriptProcessorNode instead? How would that approach differ?
If you take the maximum of getFloatFrequencyData()'s results in one frame, then what you are measuring is the audio power at a single frequency (whichever one has the most power). What you actually want to measure is the peak at any frequency — in other words, you want to not use the frequency data, but the unprocessed samples not separated into frequency bins.
The catch is that you'll have to compute the decibels power yourself. This is fairly simple arithmetic: you take some number of samples (one or more), square them, and average them. Note that even a “peak” meter may be doing averaging — just on a much shorter time scale.
Here's a complete example. (Warning: produces sound.)
document.getElementById('start').addEventListener('click', () => {
const context = new(window.AudioContext || window.webkitAudioContext)();
const oscillator = context.createOscillator();
oscillator.type = 'square';
oscillator.frequency.value = 440;
oscillator.start();
const gain1 = context.createGain();
const analyser = context.createAnalyser();
// Reduce output level to not hurt your ears.
const gain2 = context.createGain();
gain2.gain.value = 0.01;
oscillator.connect(gain1);
gain1.connect(analyser);
analyser.connect(gain2);
gain2.connect(context.destination);
function displayNumber(id, value) {
const meter = document.getElementById(id + '-level');
const text = document.getElementById(id + '-level-text');
text.textContent = value.toFixed(2);
meter.value = isFinite(value) ? value : meter.min;
}
// Time domain samples are always provided with the count of
// fftSize even though there is no FFT involved.
// (Note that fftSize can only have particular values, not an
// arbitrary integer.)
analyser.fftSize = 2048;
const sampleBuffer = new Float32Array(analyser.fftSize);
function loop() {
// Vary power of input to analyser. Linear in amplitude, so
// nonlinear in dB power.
gain1.gain.value = 0.5 * (1 + Math.sin(Date.now() / 4e2));
analyser.getFloatTimeDomainData(sampleBuffer);
// Compute average power over the interval.
let sumOfSquares = 0;
for (let i = 0; i < sampleBuffer.length; i++) {
sumOfSquares += sampleBuffer[i] ** 2;
}
const avgPowerDecibels = 10 * Math.log10(sumOfSquares / sampleBuffer.length);
// Compute peak instantaneous power over the interval.
let peakInstantaneousPower = 0;
for (let i = 0; i < sampleBuffer.length; i++) {
const power = sampleBuffer[i] ** 2;
peakInstantaneousPower = Math.max(power, peakInstantaneousPower);
}
const peakInstantaneousPowerDecibels = 10 * Math.log10(peakInstantaneousPower);
// Note that you should then add or subtract as appropriate to
// get the _reference level_ suitable for your application.
// Display value.
displayNumber('avg', avgPowerDecibels);
displayNumber('inst', peakInstantaneousPowerDecibels);
requestAnimationFrame(loop);
}
loop();
});
<button id="start">Start</button>
<p>
Short average
<meter id="avg-level" min="-100" max="10" value="-100"></meter>
<span id="avg-level-text">—</span> dB
</p>
<p>
Instantaneous
<meter id="inst-level" min="-100" max="10" value="-100"></meter>
<span id="inst-level-text">—</span> dB
</p>
Do you just take the maximum value
For a peak meter, yes. For a VU meter, there's all sorts of considerations in measuring the power, as well as the ballistics of an analog meter. There's also RMS power metering.
In digital land, you'll find a peak meter to be most useful for many tasks, and by far the easiest to compute.
A peak for any given set of samples is the highest absolute value in the set. First though, you need that set of samples. If you call getFloatFrequencyData(), you're not getting sample values, you're getting the spectrum. What you want instead is getFloatTimeDomainData(). This data is a low resolution representation of the samples. That is, you might have 4096 samples in your window, but your analyser might be configured with 256 buckets... so those 4096 samples will be resampled down to 256 samples. This is generally acceptable for a metering task.
From there, it's just Math.max(-Math.min(samples), Math.max(samples)) to get the max of the absolute value.
Suppose you wanted a higher resolution peak meter. For that, you need all the raw samples you can get. That's where a ScriptProcessorNode comes in handy. You get access to the actual sample data.
Basically, for this task, AnalyserNode is much faster, but slightly lower resolution. ScriptProcessorNode is much slower, but slightly higher resolution.

How to manipulate the contents of an audio tag and create derivative audio tags from it?

On my webpage, I have an audio file inside of an tag.
<!DOCTYPE html>
<html>
<audio src="myTrack.mp3" controls preload="auto"></audio>
</html>
I want to chop up this file stored in an tag into multiple 10 second audio files that I could then insert into the webpage as their own audio files in seperate <audio> tags.
Is it possible to do this in javascript?
Yes, of course this is possible! :)
Make sure the audio fulfill CORS-requirements so we can load it with AJAX (loading from same origin as the page will of course fulfill this).
Load the file as ArrayBuffer and decode it with AudioContext
Calculate the number of segments and length of each (I use a time based length independent of channels below)
Split the main buffer into smaller buffers
Create a file-wrapper for the new buffer (below I made a simple WAVE wrapper for the demo)
Feed that as Blob via an Object-URL to a new instance of the Audio element
Keep keep track of the object-URLs so you can free them up when not needed anymore (revokeObjectURL()).
One drawback is of course that you would have to load the entire file into memory before processing it.
Example
Hopefully the file I'm using for the demo will be available through the current CDN that is used to allow CORS usage (I own the copyright, feel free to use it for testing, but only testing!! :) ). The loading and decoding can take some time depending on your system and connection, so please be patient...
Ideally you should use an asynchronous approach splitting the buffers, but the demo targets only the needed steps to make the buffer segments available as new file fragments.
Also note that I did not take into consideration the last segment to be shorter than the others (I use floor, you should use ceil for the segment count and cut the last block length short). I'll leave that as an exercise for the reader...
var actx = new(AudioContext || webkitAudioContext)(),
url = "//dl.dropboxusercontent.com/s/7ttdz6xsoaqbzdl/war_demo.mp3";
// STEP 1: Load audio file using AJAX ----------------------------------
fetch(url).then(function(resp) {return resp.arrayBuffer()}).then(decode);
// STEP 2: Decode the audio file ---------------------------------------
function decode(buffer) {
actx.decodeAudioData(buffer, split);
}
// STEP 3: Split the buffer --------------------------------------------
function split(abuffer) {
// calc number of segments and segment length
var channels = abuffer.numberOfChannels,
duration = abuffer.duration,
rate = abuffer.sampleRate,
segmentLen = 10,
count = Math.floor(duration / segmentLen),
offset = 0,
block = 10 * rate;
while(count--) {
var url = URL.createObjectURL(bufferToWave(abuffer, offset, block));
var audio = new Audio(url);
audio.controls = true;
audio.volume = 0.75;
document.body.appendChild(audio);
offset += block;
}
}
// Convert a audio-buffer segment to a Blob using WAVE representation
function bufferToWave(abuffer, offset, len) {
var numOfChan = abuffer.numberOfChannels,
length = len * numOfChan * 2 + 44,
buffer = new ArrayBuffer(length),
view = new DataView(buffer),
channels = [], i, sample,
pos = 0;
// write WAVE header
setUint32(0x46464952); // "RIFF"
setUint32(length - 8); // file length - 8
setUint32(0x45564157); // "WAVE"
setUint32(0x20746d66); // "fmt " chunk
setUint32(16); // length = 16
setUint16(1); // PCM (uncompressed)
setUint16(numOfChan);
setUint32(abuffer.sampleRate);
setUint32(abuffer.sampleRate * 2 * numOfChan); // avg. bytes/sec
setUint16(numOfChan * 2); // block-align
setUint16(16); // 16-bit (hardcoded in this demo)
setUint32(0x61746164); // "data" - chunk
setUint32(length - pos - 4); // chunk length
// write interleaved data
for(i = 0; i < abuffer.numberOfChannels; i++)
channels.push(abuffer.getChannelData(i));
while(pos < length) {
for(i = 0; i < numOfChan; i++) { // interleave channels
sample = Math.max(-1, Math.min(1, channels[i][offset])); // clamp
sample = (0.5 + sample < 0 ? sample * 32768 : sample * 32767)|0; // scale to 16-bit signed int
view.setInt16(pos, sample, true); // update data chunk
pos += 2;
}
offset++ // next source sample
}
// create Blob
return new Blob([buffer], {type: "audio/wav"});
function setUint16(data) {
view.setUint16(pos, data, true);
pos += 2;
}
function setUint32(data) {
view.setUint32(pos, data, true);
pos += 4;
}
}
audio {display:block;margin-bottom:1px}

Categories