@article{chun2017apache, author = {Chun, Byung-Gon and Condie, Tyson and Chen, Yingda and Cho, Brian and Chung, Andrew and Curino, Carlo and Douglas, Chris and Jeon, Beomyeol and Jeong, Joo Seong and Lee, Gyewon and Lee, Yunseong and Majestro, Tony and Malkhi, Dahlia and Matusevych, Sergiy and Myers, Brandon and Mykhailova, Mariia and Narayanamurthy, Shravan and Noor, Joseph and Ramakrishnan, Raghu and Rao, Sriram and Sears, Russell and Sezgin, Beysim and Um, Taegeon and Wang, Julia and Yang, Youngseok and Ramakrishnan, Raghu and Curino, Carlo and Interlandi, Matteo}, title = {Apache REEF: Retainable Evaluator Execution Framework}, year = {2017}, month = {October}, abstract = {Resource Managers like YARN and Mesos have emerged as a critical layer in the cloud computing system stack, but the developer abstractions for leasing cluster resources and instantiating application logic are very low level. This flexibility comes at a high cost in terms of developer effort, as each application must repeatedly tackle the same challenges (e.g., fault tolerance, task scheduling and coordination) and reimplement common mechanisms (e.g., caching, bulk-data transfers). This article presents REEF, a development framework that provides a control plane for scheduling and coordinating task-level (data-plane) work on cluster resources obtained from a Resource Manager. REEF provides mechanisms that facilitate resource reuse for data caching and state management abstractions that greatly ease the development of elastic data processing pipelines on cloud platforms that support a Resource Manager service. We illustrate the power of REEF by showing applications built atop: a distributed shell application, a machine-learning framework, a distributed in-memory caching system, and a port of the CORFU system. REEF is currently an Apache top-level project that has attracted contributors from several institutions and it is being used to develop several commercial offerings such as the Azure Stream Analytics service.}, url = {http://approjects.co.za/?big=en-us/research/publication/apache-reef-retainable-evaluator-execution-framework/}, pages = {5}, journal = {ACM Transactions on Computer Systems}, volume = {35}, number = {2}, }