@inproceedings{dutta2026hobit,
author = {Dutta, Himanshu and Nagalapatti, Lokesh and Prabhu, Yashoteja},
title = {HOBIT: Hardness Optimized Batch Sampling for InfoNCE Training},
booktitle = {2026 International Conference on Machine Learning},
year = {2026},
month = {July},
abstract = {Contrastive training with InfoNCE loss and in-batch negatives is the standard approach for learning dual-encoder models. Its effectiveness, however, critically depends on the availability of hard negatives; in their absence, learning quickly saturates. Existing methods address this via explicit hard-negative mining, which is often costly or heuristic-driven. We introduce HOBIT, a principled mini-batch construction method that improves in-batch negative quality by reordering training examples at every epoch. HOBIT solves an optimization problem motivated by the InfoNCE objective to yield mini-batches such that each query in the batch is exposed to hard yet non-contradictory, informative negative examples. We show that the optimization objective is monotone and submodular which in turn leads us to a greedy algorithm that admits the standard O(1-1/e) approximation guarantee.​ Empirically, we show that HOBIT incurs negligible computational overhead while significantly outperforming state-of-the-art batching methods, and remains complementary to existing hard negative mining techniques.},
url = {http://approjects.co.za/?big=en-us/research/publication/hobit-hardness-optimized-batch-sampling-for-infonce-training/},
}