@inproceedings{crestan2011web-scale, author = {Crestan, Eric and Pantel, Patrick}, title = {Web-Scale Table Census and Classification}, booktitle = {Proceedings of Web Search and Data Mining (WSDM-11)}, year = {2011}, month = {January}, abstract = {We report on a census of the types of HTML tables on the Web according to a fine-grained classification taxonomy describing the semantics that they express. For each relational table type, we describe open challenges for extracting from them semantic triples, i.e., knowledge. We also present TabEx, a supervised framework for web-scale HTML table classification and apply it to the task of classifying HTML tables into our taxonomy. We show empirical evidence, through a large-scale experimental analysis over a crawl of the Web, that classification accuracy significantly outperforms several baselines. We present a detailed feature analysis and outline the most salient features for each table type.}, url = {http://approjects.co.za/?big=en-us/research/publication/web-scale-table-census-and-classification/}, pages = {545-554}, edition = {Proceedings of Web Search and Data Mining (WSDM-11)}, }