@inproceedings{yamada2004testbed,
author = {Yamada, Yasuhiro and Craswell, Nick and Nakatoh, Tetsuya and Hirokawa, Sachio},
title = {Testbed for Information Extraction from Deep Web},
booktitle = {WWW2004 Poster Proceedings},
year = {2004},
month = {January},
abstract = {Search results generated by searchable databases are served dynamically and far larger than the static documents on the Web. These results pages have been referred to as the Deep Web [1]. We need to extract the target data in results pages to integrate them on different searchable databases. We propose a testbed for information extraction from search results. We chose 100 databases randomly from 114,540 pages with search forms. Therefore, these databases have a good variety. We selected 51 databases which include URLs in a results page and manually identify target information to be extracted. We also suggest evaluation measures for comparing extraction methods and methods for extending the target data.},
url = {http://approjects.co.za/?big=en-us/research/publication/testbed-for-information-extraction-from-deep-web/},
edition = {WWW2004 Poster Proceedings},
note = {\urlhttp://research.microsoft.com/users/nickcr/pubs/yamada_www2004poster.pdf},
}