{"id":1041966,"date":"2024-05-31T18:59:19","date_gmt":"2024-06-01T01:59:19","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-research-item&#038;p=1041966"},"modified":"2025-10-21T11:07:07","modified_gmt":"2025-10-21T18:07:07","slug":"retrospective-dark-silicon-and-the-end-of-multicore-scaling","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/retrospective-dark-silicon-and-the-end-of-multicore-scaling\/","title":{"rendered":"RETROSPECTIVE: Dark Silicon and the End of Multicore Scaling"},"content":{"rendered":"<p>An invited author retrospective on &#8220;Dark Silicon and the End of Multicore Scaling&#8221; originally published at ISCA 2014.<\/p>\n<p>Included in the ISCA@50 25-Year Retrospective: 1996-2020.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>An invited author retrospective on &#8220;Dark Silicon and the End of Multicore Scaling&#8221; originally published at ISCA 2014. Included in the ISCA@50 25-Year Retrospective: 1996-2020.<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"ACM SIGARCH and IEEE TCCA","msr_publisher_other":"","msr_booktitle":"ISCA@50 25-Year Retrospective: 1996-2020","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"","msr_page_range_end":"","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"","msr_pubmed_id":"","msr_other_authors":"","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":"","msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2023-6-1","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":false,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":null,"footnotes":""},"msr-research-highlight":[],"research-area":[13552],"msr-publication-type":[193721],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[],"msr-field-of-study":[249619,246691],"msr-conference":[],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-1041966","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-hardware-devices","msr-locale-en_us","msr-field-of-study-computer-architecture","msr-field-of-study-computer-science"],"msr_publishername":"ACM SIGARCH and IEEE TCCA","msr_edition":"","msr_affiliation":"","msr_published_date":"2023-6-1","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"ISCA@50 25-Year Retrospective: 1996-2020","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":0,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"https:\/\/bpb-us-w2.wpmucdn.com\/sites.coecis.cornell.edu\/dist\/7\/587\/files\/2023\/06\/ESMAEILZADEH_2011_DARK.pdf","label_id":"243109","label":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[],"msr-author-ordering":[{"type":"text","value":"Hadi Esmaeilzadeh","user_id":0,"rest_url":false},{"type":"text","value":"Emily Blem","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Renee St. Amant","user_id":43080,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Renee St. Amant"},{"type":"text","value":"Karthikeyan Sankaralingam","user_id":0,"rest_url":false},{"type":"user_nicename","value":"Doug Burger","user_id":31582,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=Doug Burger"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[793670],"msr_project":[1150284,1150288],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"inbook","related_content":{"projects":[{"ID":1150284,"post_title":"Kernel\u2011level innovation and hardware\u2011aware modeling\u00a0","post_name":"kernel%e2%80%91level-innovation-and-hardware%e2%80%91aware-modeling","post_type":"msr-project","post_date":"2025-10-22 14:31:38","post_modified":"2025-10-22 14:31:41","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/kernel%e2%80%91level-innovation-and-hardware%e2%80%91aware-modeling\/","post_excerpt":"We design and optimize GPU kernels and model\u2011execution strategies to maximize throughput and minimize latency for real\u2011world LLM workloads. Interactive enterprise scenarios often run at low batch sizes, interleave very long contexts, and have strict latency targets\u2014exposing different bottlenecks than training. Our work includes attention\u2011kernel optimization for both prefill and decode, sampling and logit\u2011processing improvements, and auto\u2011tuning at the PTX level to balance occupancy, register usage, and memory traffic. We also explore dynamic kernel selection&hellip;","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/1150284"}]}},{"ID":1150288,"post_title":"System\u2011level innovation for inference at scale\u00a0","post_name":"system%e2%80%91level-innovation-for-inference-at-scale","post_type":"msr-project","post_date":"2025-10-22 10:22:38","post_modified":"2025-11-07 05:24:36","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/system%e2%80%91level-innovation-for-inference-at-scale\/","post_excerpt":"We reimagine the AI inference stack to be workload-aware, cost-aware, and resilient at a global scale. Our research explores innovative resource allocation, request scheduling, batching, routing, and KV caching techniques, which directly benefit Microsoft's inference infrastructure. Our goal is to bridge the gap between deployed AI models and underlying hardware through a holistic, full-stack approach. We leverage not only the diversity across workloads (e.g., agentic vs. non-agentic, stringent vs. relaxed latency requirements), model architectures and&hellip;","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/1150288"}]}}]},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1041966","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1041966\/revisions"}],"predecessor-version":[{"id":1041975,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/1041966\/revisions\/1041975"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1041966"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=1041966"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1041966"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=1041966"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=1041966"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=1041966"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1041966"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1041966"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=1041966"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=1041966"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=1041966"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=1041966"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=1041966"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}