{"id":166142,"date":"2014-06-01T00:00:00","date_gmt":"2014-06-01T00:00:00","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/msr-research-item\/structured-generative-models-of-natural-source-code\/"},"modified":"2022-03-01T12:52:07","modified_gmt":"2022-03-01T20:52:07","slug":"structured-generative-models-of-natural-source-code","status":"publish","type":"msr-research-item","link":"https:\/\/www.microsoft.com\/en-us\/research\/publication\/structured-generative-models-of-natural-source-code\/","title":{"rendered":"Structured Generative Models of Natural Source Code"},"content":{"rendered":"
We study the problem of building generative models of natural source code (NSC); that is, source code written by humans and meant to be understood by humans. Our primary contribution is to describe new generative models that are tailored to NSC. The models are based on probabilistic context free grammars (PCFGs) and neuro-probabilistic language models (Mnih & Teh, 2012), which are extended to incorporate additional source code-specific structure. These models can be efficiently trained on a corpus of source code and outperform a variety of less structured baselines in terms of predictive log likelihoods on held-out data.<\/p>\n","protected":false},"excerpt":{"rendered":"
We study the problem of building generative models of natural source code (NSC); that is, source code written by humans and meant to be understood by humans. Our primary contribution is to describe new generative models that are tailored to NSC. The models are based on probabilistic context free grammars (PCFGs) and neuro-probabilistic language models […]<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-author-ordering":null,"msr_publishername":"JMLR.org","msr_publisher_other":"","msr_booktitle":"","msr_chapter":"","msr_edition":"","msr_editors":"","msr_how_published":"","msr_isbn":"","msr_issue":"","msr_journal":"","msr_number":"","msr_organization":"","msr_pages_string":"","msr_page_range_start":"649","msr_page_range_end":"657","msr_series":"","msr_volume":"","msr_copyright":"","msr_conference_name":"The 31st International Conference on Machine Learning (ICML)","msr_doi":"","msr_arxiv_id":"","msr_s2_paper_id":"","msr_mag_id":"2962725091","msr_pubmed_id":"","msr_other_authors":"Chris J Maddison","msr_other_contributors":"","msr_speaker":"","msr_award":"","msr_affiliation":"","msr_institution":"","msr_host":"","msr_version":"","msr_duration":"","msr_original_fields_of_study":null,"msr_release_tracker_id":"","msr_s2_match_type":"","msr_citation_count_updated":"","msr_published_date":"2014-6-20","msr_highlight_text":"","msr_notes":"","msr_longbiography":"","msr_publicationurl":"","msr_external_url":"","msr_secondary_video_url":"","msr_conference_url":"","msr_journal_url":"","msr_s2_pdf_url":"","msr_year":0,"msr_citation_count":0,"msr_influential_citations":0,"msr_reference_count":0,"msr_s2_match_confidence":0,"msr_microsoftintellectualproperty":true,"msr_s2_open_access":false,"msr_s2_author_ids":[],"msr_pub_ids":[],"msr_hide_image_in_river":0,"footnotes":""},"msr-research-highlight":[],"research-area":[13556],"msr-publication-type":[193716],"msr-publisher":[],"msr-focus-area":[],"msr-locale":[268875],"msr-post-option":[],"msr-field-of-study":[246694,249193,246691,263395,246748,248353,246808,248296,249181,255244,263392],"msr-conference":[260284],"msr-journal":[],"msr-impact-theme":[],"msr-pillar":[],"class_list":["post-166142","msr-research-item","type-msr-research-item","status-publish","hentry","msr-research-area-artificial-intelligence","msr-locale-en_us","msr-field-of-study-artificial-intelligence","msr-field-of-study-code-cryptography","msr-field-of-study-computer-science","msr-field-of-study-context-free-grammar","msr-field-of-study-generative-grammar","msr-field-of-study-language-model","msr-field-of-study-natural-language-processing","msr-field-of-study-probabilistic-logic","msr-field-of-study-source-code","msr-field-of-study-structure-mathematical-logic","msr-field-of-study-variety-linguistics"],"msr_publishername":"JMLR.org","msr_edition":"","msr_affiliation":"","msr_published_date":"2014-6-20","msr_host":"","msr_duration":"","msr_version":"","msr_speaker":"","msr_other_contributors":"","msr_booktitle":"","msr_pages_string":"","msr_chapter":"","msr_isbn":"","msr_journal":"","msr_volume":"","msr_number":"","msr_editors":"","msr_series":"","msr_issue":"","msr_organization":"","msr_how_published":"","msr_notes":"","msr_highlight_text":"","msr_release_tracker_id":"","msr_original_fields_of_study":"","msr_download_urls":"","msr_external_url":"","msr_secondary_video_url":"","msr_longbiography":"","msr_microsoftintellectualproperty":1,"msr_main_download":"","msr_publicationurl":"","msr_doi":"","msr_publication_uploader":[{"type":"url","viewUrl":"false","id":"false","title":"http:\/\/proceedings.mlr.press\/v32\/maddison14.pdf","label_id":"243132","label":0},{"type":"url","viewUrl":"false","id":"false","title":"http:\/\/jmlr.org\/proceedings\/papers\/v32\/maddison14.pdf","label_id":"243109","label":0}],"msr_related_uploader":"","msr_citation_count":0,"msr_citation_count_updated":"","msr_s2_paper_id":"","msr_influential_citations":0,"msr_reference_count":0,"msr_arxiv_id":"","msr_s2_author_ids":[],"msr_s2_open_access":false,"msr_s2_pdf_url":null,"msr_attachments":[],"msr-author-ordering":[{"type":"guest","value":"chris-j-maddison","user_id":823009,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=chris-j-maddison"},{"type":"guest","value":"daniel-tarlow","user_id":823012,"rest_url":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/microsoft-research\/v1\/researchers?person=daniel-tarlow"}],"msr_impact_theme":[],"msr_research_lab":[],"msr_event":[],"msr_group":[],"msr_project":[323543],"publication":[],"video":[],"msr-tool":[],"msr_publication_type":"inproceedings","related_content":{"projects":[{"ID":323543,"post_title":"Deep Program Understanding","post_name":"deep-program-understanding","post_type":"msr-project","post_date":"2017-05-02 03:55:00","post_modified":"2022-03-03 01:28:05","post_status":"publish","permalink":"https:\/\/www.microsoft.com\/en-us\/research\/project\/deep-program-understanding\/","post_excerpt":"The Deep Program Understanding project aims to teach machines to understand complex algorithms, combining methods from the programming languages, software engineering and the machine learning communities.","_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-project\/323543"}]}}]},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/166142","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-research-item"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/166142\/revisions"}],"predecessor-version":[{"id":823018,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-item\/166142\/revisions\/823018"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=166142"}],"wp:term":[{"taxonomy":"msr-research-highlight","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-research-highlight?post=166142"},{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=166142"},{"taxonomy":"msr-publication-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publication-type?post=166142"},{"taxonomy":"msr-publisher","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-publisher?post=166142"},{"taxonomy":"msr-focus-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-focus-area?post=166142"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=166142"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=166142"},{"taxonomy":"msr-field-of-study","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-field-of-study?post=166142"},{"taxonomy":"msr-conference","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-conference?post=166142"},{"taxonomy":"msr-journal","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-journal?post=166142"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=166142"},{"taxonomy":"msr-pillar","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-pillar?post=166142"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}