@inproceedings{mahabadi2023core-sets, author = {Mahabadi, Sepideh and Trajanovski, Stojan}, title = {Core-sets for Fair and Diverse Data Summarization}, booktitle = {NeurIPS 2023}, year = {2023}, month = {December}, abstract = {We study core-set construction algorithms for the task of Diversity Maximization under fairness constraint. Given a set of points in a metric space partitioned into groups, and given , the goal of this problem is to pick points from each group such that the overall diversity of the picked points is maximized. We consider two natural diversity measures: sum-of-pairwise distances and sum-of-nearest-neighbor distances, and show improved core-set construction algorithms with respect to these measures. More precisely, we show the first constant factor core-set w.r.t. sum-of-pairwise distances whose size is independent of the size of the dataset and the aspect ratio. Second, we show the first core-set w.r.t. the sum-of-nearest-neighbor distances. Finally, we run several experiments showing the effectiveness of our core-set approach. In particular, we apply constrained diversity maximization to summarize a set of timed messages that takes into account the messages' recency. Specifically, the summary should include more recent messages compared to older ones. This is a real task in one of the largest communication platforms, affecting the experience of hundreds of millions daily active users. By utilizing our core-set method for this task, we achieve a 100x speed-up while losing the diversity by only a few percent. Moreover, our approach allows us to improve the space usage of the algorithm in the streaming setting. poster (.pdf)}, url = {http://approjects.co.za/?big=en-us/research/publication/core-sets-for-fair-and-diverse-data-summarization/}, }