@inproceedings{polozov2014laseweb, author = {Polozov, Oleksandr and Gulwani, Sumit}, title = {LaSEWeb: Automating Search Strategies Over Semi-structured Web Data}, booktitle = {KDD'14, August 24-27, 2014, New York, NY, USA}, year = {2014}, month = {August}, abstract = {We show how to programmatically model processes that humans use when extracting answers to queries (e.g., “Who invented typewriter?”, “List of Washington national parks”) from semi-structured Web pages returned by a search engine. This modeling enables various applications including automating repetitive search tasks, and helping search engine developers design micro-segments of factoid questions. We describe the design and implementation of a domain-specific language that enables extracting data from a webpage based on its structure, visual layout, and linguistic patterns. We also describe an algorithm to rank multiple answers extracted from multiple webpages. On 100,000+ queries (across 7 micro-segments) obtained from Bing logs, our system LaSEWeb answered queries with an average recall of 71%. Also, the desired answer(s) were present in top-3 suggestions for 95%+ cases.}, url = {http://approjects.co.za/?big=en-us/research/publication/laseweb-automating-search-strategies-semi-structured-web-data/}, edition = {KDD’14, August 24–27, 2014, New York, NY, USA}, }