Chetan Bansal<\/a>.<\/p>\n","protected":false},"excerpt":{"rendered":" Expanded LLM use creates new demands on cloud GPU capacity. Splitwise presents an efficient solution by separating the two essential phases of LLM inference, achieving higher throughput within a limited power budget.<\/p>\n","protected":false},"author":42735,"featured_media":996810,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"footnotes":""},"categories":[1],"tags":[],"research-area":[13556],"msr-region":[],"msr-event-type":[],"msr-locale":[268875],"msr-post-option":[243984],"msr-impact-theme":[264846],"msr-promo-type":[],"msr-podcast-series":[],"class_list":["post-995736","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-research-blog","msr-research-area-artificial-intelligence","msr-locale-en_us","msr-post-option-blog-homepage-featured"],"msr_event_details":{"start":"","end":"","location":""},"podcast_url":"","podcast_episode":"","msr_research_lab":[],"msr_impact_theme":["Computing foundations"],"related-publications":[],"related-downloads":[],"related-videos":[],"related-academic-programs":[],"related-groups":[],"related-projects":[],"related-events":[],"related-researchers":[{"type":"user_nicename","value":"Esha Choukse","user_id":40417,"display_name":"Esha Choukse","author_link":"Esha Choukse<\/a>","is_active":false,"last_first":"Choukse, Esha","people_section":0,"alias":"eschouks"},{"type":"user_nicename","value":"Chaojie Zhang","user_id":42705,"display_name":"Chaojie Zhang","author_link":"Chaojie Zhang<\/a>","is_active":false,"last_first":"Zhang, Chaojie","people_section":0,"alias":"chaojiezhang"},{"type":"user_nicename","value":"\u00cd\u00f1igo Goiri","user_id":32102,"display_name":"Íñigo Goiri","author_link":"Íñigo Goiri<\/a>","is_active":false,"last_first":"Goiri, \u00cd\u00f1igo","people_section":0,"alias":"inigog"},{"type":"user_nicename","value":"Aashaka Shah","user_id":43056,"display_name":"Aashaka Shah","author_link":"Aashaka Shah<\/a>","is_active":false,"last_first":"Shah, Aashaka","people_section":0,"alias":"aashakashah"},{"type":"user_nicename","value":"Rodrigo Fonseca","user_id":40429,"display_name":"Rodrigo Fonseca","author_link":"Rodrigo Fonseca<\/a>","is_active":false,"last_first":"Fonseca, Rodrigo","people_section":0,"alias":"rofons"},{"type":"user_nicename","value":"Ricardo Bianchini","user_id":33393,"display_name":"Ricardo Bianchini","author_link":"Ricardo Bianchini<\/a>","is_active":false,"last_first":"Bianchini, Ricardo","people_section":0,"alias":"ricardob"}],"msr_type":"Post","featured_image_thumbnail":"","byline":"","formattedDate":"January 4, 2024","formattedExcerpt":"Expanded LLM use creates new demands on cloud GPU capacity. Splitwise presents an efficient solution by separating the two essential phases of LLM inference, achieving higher throughput within a limited power budget.","locale":{"slug":"en_us","name":"English","native":"","english":"English"},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/995736"}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/users\/42735"}],"replies":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/comments?post=995736"}],"version-history":[{"count":35,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/995736\/revisions"}],"predecessor-version":[{"id":997956,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/995736\/revisions\/997956"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media\/996810"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=995736"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/categories?post=995736"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/tags?post=995736"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=995736"},{"taxonomy":"msr-region","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-region?post=995736"},{"taxonomy":"msr-event-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event-type?post=995736"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=995736"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=995736"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=995736"},{"taxonomy":"msr-promo-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-promo-type?post=995736"},{"taxonomy":"msr-podcast-series","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-podcast-series?post=995736"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}