A real-world experiment deployed in the Microsoft image recommendation product Windows Spotlight showcases that the proposed method outperforms the hand-engineered reward baseline and succeeds in a practical application serving millions of people.<\/li>\n<\/ul>\n","protected":false},"excerpt":{"rendered":"In reinforcement learning, handcrafting reward functions is difficult and can yield algorithms that don\u2019t generalize well. IGL-P, an interaction-grounded learning strategy, learns personalized rewards for different people in recommender system scenarios.<\/p>\n","protected":false},"author":42183,"featured_media":937641,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","footnotes":""},"categories":[1],"tags":[],"research-area":[13556],"msr-region":[],"msr-event-type":[],"msr-locale":[268875],"msr-post-option":[243984],"msr-impact-theme":[],"msr-promo-type":[],"msr-podcast-series":[],"class_list":["post-937629","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-research-blog","msr-research-area-artificial-intelligence","msr-locale-en_us","msr-post-option-blog-homepage-featured"],"msr_event_details":{"start":"","end":"","location":""},"podcast_url":"","podcast_episode":"","msr_research_lab":[199571],"msr_impact_theme":[],"related-publications":[],"related-downloads":[],"related-videos":[],"related-academic-programs":[],"related-groups":[144902],"related-projects":[],"related-events":[],"related-researchers":[{"type":"guest","value":"jessica-maghakian","user_id":"935250","display_name":"Jessica Maghakian","author_link":"Jessica Maghakian<\/a>","is_active":true,"last_first":"Maghakian, Jessica","people_section":0,"alias":"jessica-maghakian"},{"type":"user_nicename","value":"Cheng Tan","user_id":37953,"display_name":"Cheng Tan","author_link":"Cheng Tan<\/a>","is_active":false,"last_first":"Tan, Cheng","people_section":0,"alias":"chetan"},{"type":"user_nicename","value":"Paul Mineiro","user_id":33272,"display_name":"Paul Mineiro","author_link":"Paul Mineiro<\/a>","is_active":false,"last_first":"Mineiro, Paul","people_section":0,"alias":"pmineiro"}],"msr_type":"Post","featured_image_thumbnail":"","byline":"Jessica Maghakian<\/a>, Akanksha Saran, Cheng Tan<\/a>, and Paul Mineiro<\/a>","formattedDate":"May 4, 2023","formattedExcerpt":"In reinforcement learning, handcrafting reward functions is difficult and can yield algorithms that don\u2019t generalize well. IGL-P, an interaction-grounded learning strategy, learns personalized rewards for different people in recommender system scenarios.","locale":{"slug":"en_us","name":"English","native":"","english":"English"},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/937629"}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/users\/42183"}],"replies":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/comments?post=937629"}],"version-history":[{"count":24,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/937629\/revisions"}],"predecessor-version":[{"id":938997,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/posts\/937629\/revisions\/938997"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media\/937641"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=937629"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/categories?post=937629"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/tags?post=937629"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=937629"},{"taxonomy":"msr-region","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-region?post=937629"},{"taxonomy":"msr-event-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event-type?post=937629"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=937629"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=937629"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=937629"},{"taxonomy":"msr-promo-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-promo-type?post=937629"},{"taxonomy":"msr-podcast-series","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-podcast-series?post=937629"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}