@inproceedings{li2021asynchronous, author = {Li, Tianyu and Chandramouli, Badrish and Faleiro, Jose and Kossmann, Donald and Madden, Sam}, title = {Asynchronous Prefix Recoverability for Fast Distributed Stores}, booktitle = {SIGMOD 2021}, year = {2021}, month = {June}, abstract = {Accessing and updating data sharded across distributed machines safely and speedily in the face of failures remains a challenging problem. Most prominently, applications that share state across different nodes want their writes to quickly become visible to others, without giving up recoverability guarantees in case a failure occurs. Current solutions of a fast cache backed by storage cannot support this use case easily. In this work, we design a distributed protocol, called Distributed Prefix Recovery (DPR) that builds on top of a sharded cache-store architecture with single-key operations, to provide cross-shard recoverability guarantees. With DPR, many clients can read and update shared state at sub-millisecond latency, while receiving periodic prefix durability guarantees. On failure, DPR quickly restores the system to a prefix-consistent state with a novel non-blocking rollback scheme. We added DPR to a key-value store (FASTER) and cache (Redis) and show that we can get high throughput and low latency similar to in-memory systems, while lazily providing durability guarantees similar to persistent stores.}, url = {http://approjects.co.za/?big=en-us/research/publication/asynchronous-prefix-recoverability-for-fast-distributed-stores/}, }