{"id":688089,"date":"2020-08-28T10:30:03","date_gmt":"2020-08-28T17:30:03","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=688089"},"modified":"2020-10-26T11:42:12","modified_gmt":"2020-10-26T18:42:12","slug":"efficiently-approximating-selectivity-functions-using-low-overhead-regression-models","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/efficiently-approximating-selectivity-functions-using-low-overhead-regression-models\/","title":{"rendered":"Efficiently Approximating Selectivity Functions using Low Overhead Regression Models"},"content":{"rendered":"<p>Today\u2019s query optimizers use fast selectivity estimation techniques but are known<br \/>\nto be susceptible to large estimation errors. Recent work on supervised learned<br \/>\nmodels for selectivity estimation significantly improves accuracy while ensuring<br \/>\nrelatively low estimation overhead. However, these models impose significant model<br \/>\nconstruction cost as they need large numbers of training examples and computing<br \/>\nselectivity labels is costly for large datasets. We propose a novel model construction<br \/>\nmethod that incrementally generates training data and uses approximate selectivity labels,<br \/>\nthat reduces total construction cost by an order of magnitude while preserving most of<br \/>\nthe accuracy gains. The proposed method is particularly attractive for model designs<br \/>\nthat are faster-to-train for a given number of training examples, but such models are<br \/>\nknown to support a limited class of query expressions. We broaden the applicability of such<br \/>\nsupervised models to the class of select-project-join query expressions with range predicates<br \/>\nand IN clauses. Our extensive evaluation on synthetic benchmark and real-world queries shows<br \/>\nthat the 95th-percentile error of our proposed models is 10-100\u00d7 better than traditional selectivity<br \/>\nestimators. We also demonstrate significant gains in plan quality as a result of improved<br \/>\nselectivity estimates.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Today\u2019s query optimizers use fast selectivity estimation techniques but are known to be susceptible to large estimation errors. Recent work on supervised learned models for selectivity estimation significantly improves accuracy while ensuring relatively low estimation overhead. However, these models impose significant model construction cost as they need large numbers of training examples and computing selectivity [&hellip;]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"footnotes":""},"msr-content-type":[3],"msr-research-highlight":[],"research-area":[13556,13563],"msr-publication-type":[193716],"msr-product-type":[],"msr-focus-area":[],"msr-platform":[],"msr-download-source":[],"msr-locale":[268875],"msr-field-of-study":[],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-688089","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-artificial-intelligence","msr-research-area-data-platform-analytics","msr-locale-en_us"],"msr_publishername":"","msr_edition":"","msr_affiliation":"","msr_published_date":"2020-8-31","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"http:\/\/www.vldb.org\/pvldb\/vol13\/p2215-dutt.pdf","label_id":"243109","label":0}],"msr_related_uploader":"","msr_attachments":[],"msr-author-ordering":[{"type":"user_nicename","value":"Anshuman Dutt","user_id":35537,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Anshuman Dutt"},{"type":"user_nicename","value":"Chi Wang","user_id":31406,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Chi Wang"},{"type":"user_nicename","value":"Vivek Narasayya","user_id":34602,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Vivek Narasayya"},{"type":"user_nicename","value":"Surajit Chaudhuri","user_id":33764,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Surajit Chaudhuri"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[957177],"msr_project":[967236],"publication":[],"video":[],"download":[],"msr_publication_type":"inproceedings","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/688089"}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/688089\/revisions"}],"predecessor-version":[{"id":688287,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/688089\/revisions\/688287"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=688089"}],"wp:term":[{"taxonomy":"msr-content-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-content-type?post=688089"},{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=688089"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=688089"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=688089"},{"taxonomy":"msr-product-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-product-type?post=688089"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=688089"},{"taxonomy":"msr-platform","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-platform?post=688089"},{"taxonomy":"msr-download-source","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-download-source?post=688089"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=688089"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=688089"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=688089"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=688089"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=688089"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=688089"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}