@article{junczys-dowmunt2012phrasal, author = {Junczys-Dowmunt, Marcin}, title = {Phrasal rank-encoding: Exploiting phrase redundancy and translational relations for phrase table compression}, year = {2012}, month = {October}, abstract = {We describe Phrasal Rank-Encoding (PR-Enc), a novel method for the compression of wordaligned target language data in phrase tables as used in phrase-based SMT. This method reduces the redundancy in phrase tables which is a direct effect of the phrase-based approach. A combination of PR-Enc with Huffman coding allows to reduce the size of an aggressively compressed phrase table by another 39 percent. Using this and other methods for space reduction in a new binary phrase table implementation, a size reduction by an order of magnitude is achieved when comparing to the Moses on-disk phrase table implementation. Concerning decoding speed, all variants of the new phrase table are faster than the Moses binary phrase table implementation while the PR-Enc encoded variant outperforms all other methods.}, url = {http://approjects.co.za/?big=en-us/research/publication/phrasal-rank-encoding-exploiting-phrase-redundancy-translational-relations-phrase-table-compression/}, pages = {63-74}, journal = {The Prague Bulletin of Mathematical Linguistics}, volume = {98}, }