@inproceedings{dwivedi2021kernel, author = {Dwivedi, Raaz and Mackey, Lester}, title = {Kernel Thinning}, booktitle = {COLT 2021}, year = {2021}, month = {May}, abstract = {We introduce kernel thinning, a new procedure for compressing a distribution $\mathbb[P]$ more effectively than i.i.d. sampling or standard thinning. Given a suitable reproducing kernel $\mathbf[k]$ and $\mathcal[O](n^2)$ time, kernel thinning compresses an $n$-point approximation to $\mathbb[P]$ into a $\sqrt[n]$-point approximation with comparable worst-case integration error in the associated reproducing kernel Hilbert space. With high probability, the maximum discrepancy in integration error is $\mathcal[O]_d(n^[-\frac[1][2]]\sqrt[\log n])$ for compactly supported $\mathbb[P]$ and $\mathcal[O]_d(n^[-\frac[1][2]] \sqrt[(\log n)^[d+1]\log\log n])$ for sub-exponential $\mathbb[P]$ on $\mathbb[R]^d$. In contrast, an equal-sized i.i.d. sample from $\mathbb[P]$ suffers $\Omesdfsdfn^[-\frac14])$ integration error. Our sub-exponential guarantees resemble the classical quasi-Monte Carlo error rates for uniform $\mathbb[P]$ on $[0,1]^d$ but apply to general distributions on $\mathbb[R]^d$ and a wide range of common kernels. We use our results to derive explicit non-asymptotic maximum mean discrepancy bounds for Gaussian, Mat\'ern, and B-spline kernels and present two vignettes illustrating the practical benefits of kernel thinning over i.i.d. sampling and standard Markov chain Monte Carlo thinning.}, url = {http://approjects.co.za/?big=en-us/research/publication/kernel-thinning/}, }