@techreport{appuswamy2013nobody, author = {Appuswamy, Raja and Gkantsidis, Christos and Narayanan, Dushyanth and Hodson, Orion and Rowstron, Ant}, title = {Nobody ever got fired for buying a cluster}, year = {2013}, month = {January}, abstract = {In the last decade we have seen a huge deployment of cheap clusters to run data analytics workloads. The conventional wisdom in industry and academia is that scaling out using a cluster is better for these workloads than scaling up by adding more resources to a single server. Popular analytics infrastructures such as Hadoop are aimed at such a cluster scale-out environment, and in today's world nobody gets fired for adopting a cluster solution. Is this the right approach? Our measurements as well as other recent work shows that the majority of real-world analytic jobs process less than 100GB of input, but popular infrastructures such as Hadoop/MapReduce were originally designed for petascale processing. We claim that a single "scale-up" server can process each of these jobs and do as well or better than a cluster in terms of performance, cost, power, and server density. Is it time to consider the "common case" for "big data" analytics to be the single-server rather than the cluster case? If so, this has implications for data center hardware as well as software architectures. Unfortunately widely used platforms such as Hadoop perform poorly in a scale-up configuration. We describe several modifications to the Hadoop runtime to address this problem. These changes are transparent, do not require any changes to application code, and do not compromise scale-out performance. However they do significantly improve Hadoop's scale-up performance. We present a broad evaluation across 11 representative Hadoop jobs that shows scale-up to be competitive in all cases and significantly better in some cases, than scale-out. Our evaluation considers raw performance, as well as performance per dollar and per watt.}, publisher = {Microsoft Technical Report}, url = {http://approjects.co.za/?big=en-us/research/publication/nobody-ever-got-fired-for-buying-a-cluster/}, number = {MSR-TR-2013-2}, }