Let's pretend I'm writing code for some imaginary CPU which supports plain atomic load and store operations, but has no support for CAS or RMW. Can this non-blocking counter implementation be considered correct?
namespace magic {
template <typename T>
T atomic_load(T & location); // atomically reads value from memory
template <typename T>
void atomic_store(T & location, T value) // atomically stores value in memory
} // namespace magic
class Counter {
public:
int fetch_add(int val) {
// assume all threads are pre-registered
int & this_thread_request = _requests.at(std::this_thread::get_id());
// if this thread takes the lock, it publishes all the modifications it has
if (_mutex.try_lock()) {
int old = magic::atomic_load(_val) + this_thread_request;
magic::atomic_store(_val, old + val);
this_thread_request = 0;
_mutex.unlock();
return old;
}
// if the lock is already taken, modification is saved locally and published later
int old = magic::atomic_load(_val) + this_thread_request;
this_thread_request = this_thread_request + val;
return old;
}
private:
std::unordered_map<std::thread::id, int> _requests;
std::mutex _mutex;
int _val = 0;
};