@inproceedings{yamada2004testbed, author = {Yamada, Yasuhiro and Craswell, Nick and Nakatoh, Tetsuya and Hirokawa, Sachio}, title = {Testbed for Information Extraction from Deep Web}, booktitle = {WWW2004 Poster Proceedings}, year = {2004}, month = {January}, abstract = {Search results generated by searchable databases are served dynamically and far larger than the static documents on the Web. These results pages have been referred to as the Deep Web [1]. We need to extract the target data in results pages to integrate them on different searchable databases. We propose a testbed for information extraction from search results. We chose 100 databases randomly from 114,540 pages with search forms. Therefore, these databases have a good variety. We selected 51 databases which include URLs in a results page and manually identify target information to be extracted. We also suggest evaluation measures for comparing extraction methods and methods for extending the target data.}, url = {http://approjects.co.za/?big=en-us/research/publication/testbed-for-information-extraction-from-deep-web/}, edition = {WWW2004 Poster Proceedings}, note = {\urlhttp://research.microsoft.com/users/nickcr/pubs/yamada_www2004poster.pdf}, }