@techreport{li2009compiler-assisted, author = {Li, Dong and Govindan, Madhu Saravana Sibi and Smith, Aaron and Burger, Doug and Keckler, Steve}, title = {Compiler-Assisted Hybrid Operand Communication}, year = {2009}, month = {November}, abstract = {Communication of operands among in-flight instructions can be power intensive, especially in superscalar processors where all result tags are broadcast to a small number of consumers through a multi-entry CAM. Token-based point-to-point communication of operands in dataflow architectures is highly efficient when each produced token has only one consumer, but inefficient when there are many consumers due to the construction of software fanout trees. Placing operands in registers is efficient for broadcasting the values which have consumers spread over a long lifetime, but inefficient for shorter-lived operations. This paper evaluates a compilerassisted hybrid instruction communication model that combine tokens instruction communication with statically assigned broadcast tags. Each fixed-size block of code is given a small number of architectural broadcast identifiers, which the compiler can assign to producers that have many consumers. Producers with few consumers rely on point-to-point communication through tokens. Producers whose result is live past the instruction block communicate with distant consumers through a register. Selecting the mechanism statically by the compiler relieves the hardware from categorizing instructions at runtime. At the same time, a compiler can categorize instructions better than dynamic selection does because the compiler analyzes a larger range of instructions. Furthermore, compiler could perform complex optimizations without hardware cost and execution-time penalty. We propose a compiler optimization to reuse broadcast tags for instructions with non-overlapping broadcast live ranges, the speedup is further improved without spending more power . The results show that this compiler-assisted hybrid token/broadcast model requires only eight architectural broadcasts per block, enabling highly efficient CAMs. This hybrid model reduces instruction communication energy by 28% compared to a strictly token-based dataflow model (and by over 2.7X compared to a hybrid model without compiler support), while simultaneously increasing performance by 8% on average across the SPECINT and EEMBC benchmarks, running as single threads on 16 composed, dual-issue EDGE cores.}, publisher = {UT-Austin, Department of Computer Science}, url = {http://approjects.co.za/?big=en-us/research/publication/compiler-assisted-hybrid-operand-communication/}, number = {TR-09-33}, }