@inproceedings{lcanonne2021the, author = {L. Canonne, Clément and Jain, Ayush and Kamath, Gautam and Li, Jerry}, title = {The Price of Tolerance in Distribution Testing}, booktitle = {COLT 2022}, year = {2021}, month = {June}, abstract = {We revisit the problem of tolerant distribution testing. That is, given samples from an unknown distribution over , is it -close to or -far from a reference distribution (in total variation distance)? Despite significant interest over the past decade, this problem is well understood only in the extreme cases. In the noiseless setting (i.e., ) the sample complexity is , strongly sublinear in the domain size. At the other end of the spectrum, when , the sample complexity jumps to the barely sublinear . However, very little is known about the intermediate regime. We fully characterize the price of tolerance in distribution testing as a function of , , , up to a single factor. Specifically, we show the sample complexity to be \[\tilde \Theta\left(\frac[\sqrt[n]][\varepsilon_2^[2]] + \frac[n][\log n] \cdot \max \left\[\frac[\varepsilon_1][\varepsilon_2^2],\left(\frac[\varepsilon_1][\varepsilon_2^2]\right)^[\!\!2]\right\]\right),\] providing a smooth tradeoff between the two previously known cases. We also provide a similar characterization for the problem of tolerant equivalence testing, where both and are unknown. Surprisingly, in both cases, the main quantity dictating the sample complexity is the ratio , and not the more intuitive . Of particular technical interest is our lower bound framework, which involves novel approximation-theoretic tools required to handle the asymmetry between and , a challenge absent from previous works.}, url = {http://approjects.co.za/?big=en-us/research/publication/the-price-of-tolerance-in-distribution-testing/}, }