@misc{bee2020content-based, author = {Bee, Callista and Chen, Yuan-Jyue and Ward, David and Liu, Xiaomeng and Seelig, Georg and Strauss, Karin and Ceze, Luis}, title = {Content-Based Similarity Search in Large-Scale DNA Data Storage Systems}, howpublished = {bioRxiv}, year = {2020}, month = {May}, abstract = {Synthetic DNA has the potential to store the world’s continuously growing amount of data in an extremely dense and durable medium. Current proposals for DNA-based digital storage systems include the ability to retrieve individual files by their unique identifier, but not by their content. Here, we demonstrate content-based retrieval from a DNA database by learning a mapping from images to DNA sequences such that an encoded query image will retrieve visually similar images from the database via DNA hybridization. We encoded and synthesized a database of 1.6 million images and queried it with a variety of images, showing that each query retrieves a sample of the database containing visually similar images are retrieved at a rate much greater than chance. We compare our results with several algorithms for similarity search in electronic systems, and demonstrate that our molecular approach is competitive with state-of-the-art electronics.}, url = {http://approjects.co.za/?big=en-us/research/publication/content-based-similarity-search-in-large-scale-dna-data-storage-systems/}, }