I've developed a multi-user voice chat system using C++ with ALSA for audio capture/playback and TCP sockets for networking. The system works perfectly with two clients, but when the third client connects, the audio latency accumulates progressively.
What architectural flaws or implementation details could cause this progressive latency increase specifically in multi-client scenarios? How should I profile and optimize this system?
server.cpp
#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <algorithm>
#define PORT 8888
#define BUFFER_SIZE 4096
std::vector<int> clients;
std::mutex mtx;
void broadcast(int sender, const char* buffer, ssize_t len) {
std::lock_guard<std::mutex> lock(mtx);
for(int client : clients) {
if(client != sender) {
send(client, buffer, len, 0);
}
}
}
void handle_client(int sock) {
char buffer[BUFFER_SIZE];
while(true) {
ssize_t len = recv(sock, buffer, BUFFER_SIZE, 0);
if(len <= 0) break;
broadcast(sock, buffer, len);
}
close(sock);
mtx.lock();
clients.erase(std::remove(clients.begin(), clients.end(), sock), clients.end());
mtx.unlock();
}
int main() {
int server_fd = socket(AF_INET, SOCK_STREAM, 0);
sockaddr_in address{AF_INET, htons(PORT), INADDR_ANY};
setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, nullptr, 0);
bind(server_fd, (sockaddr*)&address, sizeof(address));
listen(server_fd, 5);
while(true) {
sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
int client_sock = accept(server_fd, (sockaddr*)&client_addr, &addr_len);
std::lock_guard<std::mutex> lock(mtx);
clients.push_back(client_sock);
std::thread(handle_client, client_sock).detach();
}
return 0;
}
client.cpp
#include <iostream>
#include <thread>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <alsa/asoundlib.h>
#define SERVER_IP "127.0.0.1"
#define PORT 8888
#define SAMPLE_RATE 44100
#define CHANNELS 1
#define BUFFER_FRAMES 512
#define FORMAT SND_PCM_FORMAT_S16_LE
int sock;
bool running = true;
snd_pcm_t *capture_handle;
snd_pcm_t *playback_handle;
snd_pcm_t* InitializeCaptureDevice() {
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
int err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_CAPTURE, 0);
if (err < 0) {
std::cerr << "Capture open error: " << snd_strerror(err) << std::endl;
return nullptr;
}
snd_pcm_hw_params_malloc(¶ms);
snd_pcm_hw_params_any(handle, params);
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(handle, params, FORMAT);
snd_pcm_hw_params_set_channels(handle, params, CHANNELS);
unsigned int rate = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if ((err = snd_pcm_hw_params(handle, params)) < 0) {
std::cerr << "Capture params error: " << snd_strerror(err) << std::endl;
snd_pcm_close(handle);
return nullptr;
}
snd_pcm_hw_params_free(params);
return handle;
}
snd_pcm_t* InitializePlaybackDevice() {
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
int err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, 0);
if (err < 0) {
std::cerr << "Playback open error: " << snd_strerror(err) << std::endl;
return nullptr;
}
snd_pcm_hw_params_malloc(¶ms);
snd_pcm_hw_params_any(handle, params);
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(handle, params, FORMAT);
snd_pcm_hw_params_set_channels(handle, params, CHANNELS);
unsigned int rate = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if ((err = snd_pcm_hw_params(handle, params)) < 0) {
std::cerr << "Playback params error: " << snd_strerror(err) << std::endl;
snd_pcm_close(handle);
return nullptr;
}
snd_pcm_hw_params_free(params);
return handle;
}
void audio_in_thread() {
short buffer[BUFFER_FRAMES];
while(running) {
snd_pcm_readi(capture_handle, buffer, BUFFER_FRAMES);
send(sock, buffer, BUFFER_FRAMES*sizeof(short), 0);
}
}
void audio_out_thread() {
short buffer[BUFFER_FRAMES];
while(running) {
ssize_t len = recv(sock, buffer, BUFFER_FRAMES*sizeof(short), 0);
if(len > 0) {
snd_pcm_writei(playback_handle, buffer, len/sizeof(short));
}
}
}
int main() {
sock = socket(AF_INET, SOCK_STREAM, 0);
sockaddr_in server_addr{AF_INET, htons(PORT)};
inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr);
connect(sock, (sockaddr*)&server_addr, sizeof(server_addr));
if(!(capture_handle = InitializeCaptureDevice()) ||
!(playback_handle = InitializePlaybackDevice())) {
std::cerr << "Audio device init failed" << std::endl;
return -1;
}
std::thread(audio_in_thread).detach();
std::thread(audio_out_thread).detach();
std::cout << "Press 'q' to quit..." << std::endl;
while(getchar() != 'q');
running = false;
close(sock);
snd_pcm_close(capture_handle);
snd_pcm_close(playback_handle);
return 0;
}
I try to use epoll, but maybe my skill is poll, the client can't communicate with the server, so I don't know what to do
I've developed a multi-user voice chat system using C++ with ALSA for audio capture/playback and TCP sockets for networking. The system works perfectly with two clients, but when the third client connects, the audio latency accumulates progressively.
What architectural flaws or implementation details could cause this progressive latency increase specifically in multi-client scenarios? How should I profile and optimize this system?
server.cpp
#include <iostream>
#include <vector>
#include <thread>
#include <mutex>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <algorithm>
#define PORT 8888
#define BUFFER_SIZE 4096
std::vector<int> clients;
std::mutex mtx;
void broadcast(int sender, const char* buffer, ssize_t len) {
std::lock_guard<std::mutex> lock(mtx);
for(int client : clients) {
if(client != sender) {
send(client, buffer, len, 0);
}
}
}
void handle_client(int sock) {
char buffer[BUFFER_SIZE];
while(true) {
ssize_t len = recv(sock, buffer, BUFFER_SIZE, 0);
if(len <= 0) break;
broadcast(sock, buffer, len);
}
close(sock);
mtx.lock();
clients.erase(std::remove(clients.begin(), clients.end(), sock), clients.end());
mtx.unlock();
}
int main() {
int server_fd = socket(AF_INET, SOCK_STREAM, 0);
sockaddr_in address{AF_INET, htons(PORT), INADDR_ANY};
setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, nullptr, 0);
bind(server_fd, (sockaddr*)&address, sizeof(address));
listen(server_fd, 5);
while(true) {
sockaddr_in client_addr;
socklen_t addr_len = sizeof(client_addr);
int client_sock = accept(server_fd, (sockaddr*)&client_addr, &addr_len);
std::lock_guard<std::mutex> lock(mtx);
clients.push_back(client_sock);
std::thread(handle_client, client_sock).detach();
}
return 0;
}
client.cpp
#include <iostream>
#include <thread>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <alsa/asoundlib.h>
#define SERVER_IP "127.0.0.1"
#define PORT 8888
#define SAMPLE_RATE 44100
#define CHANNELS 1
#define BUFFER_FRAMES 512
#define FORMAT SND_PCM_FORMAT_S16_LE
int sock;
bool running = true;
snd_pcm_t *capture_handle;
snd_pcm_t *playback_handle;
snd_pcm_t* InitializeCaptureDevice() {
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
int err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_CAPTURE, 0);
if (err < 0) {
std::cerr << "Capture open error: " << snd_strerror(err) << std::endl;
return nullptr;
}
snd_pcm_hw_params_malloc(¶ms);
snd_pcm_hw_params_any(handle, params);
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(handle, params, FORMAT);
snd_pcm_hw_params_set_channels(handle, params, CHANNELS);
unsigned int rate = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if ((err = snd_pcm_hw_params(handle, params)) < 0) {
std::cerr << "Capture params error: " << snd_strerror(err) << std::endl;
snd_pcm_close(handle);
return nullptr;
}
snd_pcm_hw_params_free(params);
return handle;
}
snd_pcm_t* InitializePlaybackDevice() {
snd_pcm_t *handle;
snd_pcm_hw_params_t *params;
int err = snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, 0);
if (err < 0) {
std::cerr << "Playback open error: " << snd_strerror(err) << std::endl;
return nullptr;
}
snd_pcm_hw_params_malloc(¶ms);
snd_pcm_hw_params_any(handle, params);
snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(handle, params, FORMAT);
snd_pcm_hw_params_set_channels(handle, params, CHANNELS);
unsigned int rate = SAMPLE_RATE;
snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if ((err = snd_pcm_hw_params(handle, params)) < 0) {
std::cerr << "Playback params error: " << snd_strerror(err) << std::endl;
snd_pcm_close(handle);
return nullptr;
}
snd_pcm_hw_params_free(params);
return handle;
}
void audio_in_thread() {
short buffer[BUFFER_FRAMES];
while(running) {
snd_pcm_readi(capture_handle, buffer, BUFFER_FRAMES);
send(sock, buffer, BUFFER_FRAMES*sizeof(short), 0);
}
}
void audio_out_thread() {
short buffer[BUFFER_FRAMES];
while(running) {
ssize_t len = recv(sock, buffer, BUFFER_FRAMES*sizeof(short), 0);
if(len > 0) {
snd_pcm_writei(playback_handle, buffer, len/sizeof(short));
}
}
}
int main() {
sock = socket(AF_INET, SOCK_STREAM, 0);
sockaddr_in server_addr{AF_INET, htons(PORT)};
inet_pton(AF_INET, SERVER_IP, &server_addr.sin_addr);
connect(sock, (sockaddr*)&server_addr, sizeof(server_addr));
if(!(capture_handle = InitializeCaptureDevice()) ||
!(playback_handle = InitializePlaybackDevice())) {
std::cerr << "Audio device init failed" << std::endl;
return -1;
}
std::thread(audio_in_thread).detach();
std::thread(audio_out_thread).detach();
std::cout << "Press 'q' to quit..." << std::endl;
while(getchar() != 'q');
running = false;
close(sock);
snd_pcm_close(capture_handle);
snd_pcm_close(playback_handle);
return 0;
}
I try to use epoll, but maybe my skill is poll, the client can't communicate with the server, so I don't know what to do
Share Improve this question asked Mar 15 at 21:14 QingMengQingMeng 11 silver badge3 bronze badges1 Answer
Reset to default 3Architectural Flaw
Currently only one thread can be holding the mutex in broadcast
at a time, which means you have almost no concurrency. It's also doing a blocking send while holding this mutex, and that's potentially slow.
You have threads, but you're using them ineffectively, just to do blocking reads instead of learning how to write a proper select
/poll
/epoll
loop.
Architectural Solution
You can write this in a single thread if you make your sockets non-blocking.
For that to work you need a collection of buffers, and you need to manage partially-consumed buffers when a non-blocking send can't send the whole thing in one go.
Since each buffer will be sent to multiple sockets and they may drain at different rates, you also need a reference counter to track when a given buffer is finally finished sending, and can be reused.
Finally, you need to use epoll
(or whatever) to tell you both when a client socket becomes readable, and when a socket with an unfinished send buffer becomes writeable.
Finally, profiling. You should consider learning to use perf
, but that's a whole topic in itself.
I'd expect to see someone at least add basic logging with high-resolution timestamps if they're trying to get a rough idea where their program's time is going, but perf
will be more accurate, more flexible, and less intrusive.