@inproceedings{wang2010learning, author = {Wang, Fei and Li, Ping and König, Arnd Christian and König, Arnd Christian}, title = {Learning a Bi-Stochastic Data Similarity Matrix}, booktitle = {The 10th International Conference on Data Mining (ICDM)}, year = {2010}, month = {December}, abstract = {An idealized clustering algorithm seeks to learn a cluster-adjacency matrix such that, if two data points belong to the same cluster, the corresponding entry would be 1; otherwise the entry would be 0. This integer (1/0) constraint makes it difficult to find the optimal solution. We propose a relaxation on the cluster-adjacency matrix, by deriving a bi-stochastic matrix from a data similarity (e.g., kernel) matrix according to the Bregman divergence. Our general method is named the Bregmanian Bi-Stochastication (BBS) algorithm. We focus on two popular choices of the Bregman divergence: the Euclidian distance and he KL divergence. Interestingly, the BBS algorithm using the KL divergence is equivalent to the Sinkhorn-Knopp (SK) algorithm for deriving bi-stochastic matrices. We show that the BBS algorithm using the Euclidian distance is closely related to the relaxed k-means clustering and can often produce noticeably superior clustering results than the SK algorithm (and other algorithms such as Normalized Cut), through extensive experiments on public data sets.}, publisher = {IEEE}, url = {http://approjects.co.za/?big=en-us/research/publication/learning-a-bi-stochastic-data-similarity-matrix/}, edition = {The 10th International Conference on Data Mining (ICDM)}, }