@inproceedings{karagiannis2010hermes, author = {Karagiannis, Thomas and Gkantsidis, Christos and Narayanan, Dushyanth and Rowstron, Ant}, title = {Hermes: Clustering Users in Large-Scale E-mail Services}, booktitle = {ACM Symposium on Cloud Computing 2010 (ACM SOCC 2010)}, year = {2010}, month = {June}, abstract = {Hermes is an optimization engine for large-scale enterprise e-mail services. Such services could be hosted by a virtualized e-mail service provider, or by dedicated enterprise data centers. In both cases we observe that the pattern of e-mails between employees of an enterprise forms an implicit social graph. Hermes tracks this implicit social graph, periodically identifies clusters of strongly connected users within the graph, and co-locates such users on the same server. Co-locating the users reduces storage requirements: senders and recipients who reside on the same server can share a single copy of an e-mail. Co-location also reduces inter-server bandwidth usage. We evaluate Hermes using a trace of all e-mails within a major corporation over a five month period. The e-mail service supports over 120,000 users on 68 servers. Our evaluation shows that using Hermes results in storage savings of 37% and bandwidth savings of 50% compared to current approaches. The overheads are low: a single commodity server can run the optimization for the entire system.}, publisher = {Association for Computing Machinery, Inc.}, url = {http://approjects.co.za/?big=en-us/research/publication/hermes-clustering-users-in-large-scale-e-mail-services/}, edition = {ACM Symposium on Cloud Computing 2010 (ACM SOCC 2010)}, }