I am trying to set up WebRTC's Acoustic Echo Cancellation (AEC). My code performs the following steps:
- Initializes the audio processing module.
- Disables all settings, such as AEC, noise cancellation, etc.
- Generates a sound wave as input data.
- Creates an empty speaker sound reference.
- Passes the data to the APM (Audio Processing Module) for processing.
However, the output is just empty sound. Could there be an issue with my code?
#define RTC_LOG_LEVEL RTC_LS_INFO // Or RTC_LS_VERBOSE for more detailed logging
#include "modules/audio_processing/include/audio_processing.h"
#include "api/audio/builtin_audio_processing_builder.h"
#include "api/environment/environment.h"
#include "api/environment/environment_factory.h" // For creating Environment
#include <vector> // Include vector header
#include <iostream> // Include iostream for error messages
#include <sstream>
// Helper function to print the first few elements of a buffer
void printBuffer(const int16_t* buffer, int length, int num_elements_to_print = 10) {
std::cout << "[";
for (int i = 0; i < std::min(length, num_elements_to_print); ++i) {
std::cout << buffer[i] << (i == num_elements_to_print - 1 || i == length - 1 ? "" : ", ");
}
std::cout << "]" << std::endl;
}
int main() {
int sample_rate = 16000;
int channels = 1;
int samples_per_channel = sample_rate * 0.01; //10ms
int buffer_size = samples_per_channel * channels;
std::cout << "buffer_size: " << buffer_size << std::endl;
// Configure AEC settings
webrtc::AudioProcessing::Config config;
config.echo_canceller.enabled = false;
config.echo_canceller.mobile_mode = false;
config.noise_suppression.enabled = false;
config.high_pass_filter.enabled = false;
config.gain_controller1.enabled = false; // Disable AGC1
config.gain_controller2.enabled = false; // Disable AGC2
config.pre_amplifier.enabled = false; // Disable Pre-amplifier
// Create environment using the factory method
webrtc::Environment env = webrtc::CreateEnvironment();
// Create the audio processing module using builder
webrtc::BuiltinAudioProcessingBuilder builder;
builder.SetConfig(config);
// Build and get a reference to the AudioProcessing instance
rtc::scoped_refptr<webrtc::AudioProcessing> apm = builder.Build(env);
// Configure processing streams
webrtc::ProcessingConfig processing_config;
processing_config.input_stream().set_sample_rate_hz(sample_rate);
processing_config.input_stream().set_num_channels(channels);
processing_config.output_stream().set_sample_rate_hz(sample_rate);
processing_config.output_stream().set_num_channels(channels);
processing_config.reverse_input_stream().set_sample_rate_hz(sample_rate);
processing_config.reverse_input_stream().set_num_channels(channels);
processing_config.reverse_output_stream().set_sample_rate_hz(sample_rate);
processing_config.reverse_output_stream().set_num_channels(channels);
// Initialize with proper config
int err = apm->Initialize(processing_config);
if (err != 0) {
std::cerr << "Error initializing AudioProcessing: " << err << std::endl;
return -1;
}
// // Set up stream configuration EXPLICITLY
// webrtc::ProcessingConfig processing_config;
// processing_config.input_stream().set_sample_rate_hz(sample_rate);
// processing_config.input_stream().set_num_channels(channels);
// processing_config.output_stream().set_sample_rate_hz(sample_rate);
// processing_config.output_stream().set_num_channels(channels);
webrtc::StreamConfig input_stream_config = processing_config.input_stream();
webrtc::StreamConfig output_stream_config = processing_config.output_stream();
// Create input and output buffers
std::vector<int16_t> mic_input(buffer_size, 0); // Initialize with zeros
std::vector<int16_t> speaker_output(buffer_size, 0); // Initialize with zeros
std::vector<int16_t> output_buffer(buffer_size, 0);
// Fill mic_input with a simple sine wave (for testing)
float frequency = 440.0f; // 440 Hz
for (int i = 0; i < buffer_size; ++i) {
float time = static_cast<float>(i) / sample_rate;
float sample = sinf(2.0f * M_PI * frequency * time);
// Convert float sample to int16_t (with clipping)
if (sample > 1.0f) sample = 1.0f;
if (sample < -1.0f) sample = -1.0f;
mic_input[i] = static_cast<int16_t>(sample * 32767.0f);
}
// *** Add Debugging: Print input buffer before processing ***
std::cout << "Mic Input Before Processing: ";
printBuffer(mic_input.data(), buffer_size, 20); // Print first 20 elements
err = apm->ProcessStream(mic_input.data(), input_stream_config, output_stream_config, output_buffer.data());
if (err != 0) {
std::cerr << "Error in ProcessStream: " << err << std::endl;
return err;
}
// *** Add Debugging: Print output buffer after processing ***
std::cout << "Output Buffer After Processing: ";
printBuffer(output_buffer.data(), buffer_size, 20); // Print first 20 elements
std::cout << "Mic Input: ";
printBuffer(mic_input.data(), buffer_size);
std::cout << "Output Buffer: ";
printBuffer(output_buffer.data(), buffer_size);
// *** TEST: Check if output is significantly changed ***
float max_diff = 0.0f;
for (int i = 0; i < buffer_size; ++i) {
// Scale int16_t back to float for comparison
float mic_sample = static_cast<float>(mic_input[i]) / 32767.0f;
float output_sample = static_cast<float>(output_buffer[i]) / 32767.0f;
float diff = std::abs(mic_sample - output_sample);
if (diff > max_diff) {
max_diff = diff;
}
}
float signal_power = 0.0f;
for (int i = 0; i < buffer_size; ++i) {
float mic_sample = static_cast<float>(mic_input[i]) / 32767.0f;
signal_power += mic_sample * mic_sample;
}
signal_power /= buffer_size;
float diff_threshold = 0.01f; // Adjust this threshold as needed
std::cout << "Max absolute difference between input and output: " << max_diff << std::endl;
std::cout << "Signal power: " << signal_power << std::endl;
// Determine if the output is significantly changed
bool output_changed_significantly = max_diff > diff_threshold;
if (output_changed_significantly) {
std::cout << "*** TEST FAILED: Output is significantly changed! ***" << std::endl;
} else {
std::cout << "*** TEST PASSED: Output is within acceptable range. ***" << std::endl;
}
return 0;
}
And the console output
Mic Input Before Processing: [0, 5633, 11099, 16234, 20886, 24916, 28203, 30651, 32186, 32762, 32363, 31000, 28713, 25572, 21669, 17120, 12062, 6644, 1029, -4616]
Output Buffer After Processing: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Mic Input: [0, 5633, 11099, 16234, 20886, 24916, 28203, 30651, 32186, 32762]
Output Buffer: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Max absolute difference between input and output: 1
Signal power: 0.507413
*** TEST FAILED: Output is significantly changed! ***