@inproceedings{kennedy2011jigsaw, author = {Kennedy, Oliver and Nath, Suman}, title = {Jigsaw: Efficient optimization over uncertain enterprise data}, booktitle = {SIGMOD'11: Proceedings of the 2011 ACM SIGMOD international conference on Management of data}, year = {2011}, month = {January}, abstract = {Probabilistic databases, in particular ones that allow users to externally define models or probability distributions – so called VG-Functions – are an ideal tool for constructing, simulating and analyzing hypothetical business scenarios. Enterprises often use such tools with parameterized models and need to explore a large parameter space in order to discover parameter values that optimize for a given goal. Parameter space is usually very large, making such exploration extremely expensive. We present Jigsaw, a probabilistic database-based simulation framework that addresses this performance problem. In Jigsaw, users define what-if style scenarios as parameterized probabilistic database queries and identify parameter values that achieve desired properties. Jigsaw uses a novel “fingerprinting” technique that efficiently identifies correlations between a query’s output distribution for different parameter values. Using fingerprints, Jigsaw is able to reuse work performed for different parameter values, and obtain speedups of as much as 2 orders of magnitude for several real business scenarios.}, publisher = {ACM}, url = {http://approjects.co.za/?big=en-us/research/publication/jigsaw-efficient-optimization-over-uncertain-enterprise-data/}, edition = {SIGMOD'11: Proceedings of the 2011 ACM SIGMOD international conference on Management of data}, }