3. \u4f18\u4e8e\u9ed1\u7bb1\u7684\u5b9e\u4f8b\u7ea7\u6027\u80fd\u9884\u6d4b\u6a21\u578b<\/strong><\/p>\n\n\n\n\u7814\u7a76\u5458\u4eec\u4ee5\u9700\u6c42\u5c42\u7ea7\u5411\u91cf\u4e3a\u8f93\u5165\u7279\u5f81\uff0c\u8bad\u7ec3\u4e86\u968f\u673a\u68ee\u6797\u5206\u7c7b\u5668\u4f5c\u4e3a\u8bc4\u4f30\u5458\u53bb\u9884\u6d4b LLMs \u5728\u65b0\u4efb\u52a1\u5b9e\u4f8b\u4e2d\u7684\u8868\u73b0\u3002\u5b9e\u9a8c\u8868\u660e\uff0c\u8be5\u6a21\u578b\u5728\u5206\u5e03\u5185\u5916\u6570\u636e\u4e2d\u5747\u5b9e\u73b0\u4e86\u4f18\u5f02\u7684\u9884\u6d4b\u6027\u80fd\uff1a\u5728\u5bf9\u524d\u6cbf\u6a21\u578b\u7684\u9884\u6d4b\u4e2d\uff0cAUROC\uff08Area Under the Receiver Operating Characteristic Curve\uff09\u200c\u503c\u6700\u9ad8\u8fbe0.88\uff0c\u4e14\u6821\u51c6\u8bef\u5dee\u63a5\u8fd1\u5b8c\u7f8e\uff0c\u663e\u8457\u4f18\u4e8e\u57fa\u4e8e GloVe \u8bcd\u5411\u91cf\u4e0e\u5fae\u8c03 LLaMA-3.1-8B \u7684\u9ed1\u7bb1\u57fa\u7ebf\u65b9\u6cd5\uff0c\u5c24\u5176\u662f\u5728\u5206\u5e03\u5916\u6570\u636e\u4e0a\u3002\u8fd9\u8fdb\u4e00\u6b65\u9a8c\u8bc1\u4e86\u8be5\u65b0\u8303\u5f0f\u7684\u79d1\u5b66\u6027\u3002<\/p>\n\n\n\n
\u76ee\u524d\uff0c\u8be5\u65b9\u6cd5\u5df2\u6210\u529f\u5e94\u7528\u4e8e15\u4e2a\u4e3b\u6d41 LLMs \u7684\u8bc4\u4f30\u3002\u7814\u7a76\u56e2\u961f\u8ba1\u5212\u5c06\u5176\u6269\u5c55\u81f3\u591a\u6a21\u6001\u3001\u5177\u8eab\u667a\u80fd\u7b49\u573a\u666f\uff0c\u4e3a\u4eba\u5de5\u667a\u80fd\u7814\u53d1\u3001\u653f\u7b56\u5236\u5b9a\u4e0e\u5b89\u5168\u5ba1\u8ba1\u63d0\u4f9b\u79d1\u5b66\u5316\u3001\u6807\u51c6\u5316\u7684\u8bc4\u4f30\u57fa\u7840\u8bbe\u65bd\u3002<\/p>\n\n\n\n
\u6b64\u9879\u5de5\u4f5c\u9996\u6b21\u5b9e\u73b0\u4e86\u89e3\u91ca\u529b\u4e0e\u9884\u6d4b\u529b\u7684\u534f\u540c\u7a81\u7834\uff0c\u6807\u5fd7\u7740\u4eba\u5de5\u667a\u80fd\u8bc4\u4f30\u79d1\u5b66\u7684\u4e00\u4e2a\u91cd\u8981\u8fdb\u5c55\u3002\u901a\u8fc7\u6784\u5efa\u53ef\u6269\u5c55\u7684\u534f\u4f5c\u793e\u533a\uff0c\u8be5\u65b9\u6cd5\u5c06\u6301\u7eed\u63a8\u52a8\u4eba\u5de5\u667a\u80fd\u7cfb\u7edf\u6027\u80fd\u4e0e\u5b89\u5168\u6027\u7684\u53ef\u89e3\u91ca\u6027\u4e0e\u53ef\u9884\u6d4b\u6027\uff0c\u4e3a\u5e94\u5bf9\u901a\u7528\u578b\u4eba\u5de5\u667a\u80fd\u5feb\u901f\u53d1\u5c55\u5e26\u6765\u7684\u8bc4\u4f30\u6311\u6218\u63d0\u4f9b\u5173\u952e\u7684\u65b9\u6cd5\u8bba\u652f\u6491\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"
\u4f5c\u8005\uff1a\u5468\u4e50\u946b \u968f\u7740\u4eba\u5de5\u667a\u80fd\u7684\u5feb\u901f\u53d1\u5c55\uff0c\u901a\u7528\u578b\u4eba\u5de5\u667a\u80fd\uff08\u5982\u5927\u8bed\u8a00\u6a21\u578b\uff09\u5728\u8bf8\u591a\u9886\u57df\u90fd\u8868\u73b0\u51fa\u8272\uff0c\u5305\u62ec\u53ef\u4ee5\u89e3\u51b3\u590d\u6742\u7684\u6570\u5b66\u95ee\u9898\uff0c\u4f46\u7531\u4e8e\u5176\u5b58\u5728\u4e0d\u53ef\u89e3\u91ca\u6027\u4ee5\u53ca\u4e0d\u53ef\u9884\u6d4b\u6027\uff0c\u6240\u4ee5\u5728\u57fa\u7840\u7b97\u672f\u7b49\u7b80\u5355\u4efb\u52a1\u4e0a\u4ecd\u53ef\u80fd\u51fa\u73b0\u5931\u8bef\u3002\u8fd9\u5bf9\u4eba\u5de5\u667a\u80fd\u7684\u8bc4\u4f30\u63d0\u51fa\u4e86\u91cd\u5927\u6311\u6218\u2014\u2014\u4e9f\u9700\u53d1\u5c55\u53ef\u89e3\u91ca\u4e0e\u53ef\u9884\u6d4b\u7684\u8bc4\u4f30\u65b9\u6cd5\uff0c\u4ee5\u660e\u786e\u7cfb\u7edf\u5931\u8d25\u7684\u539f\u56e0\uff0c\u5e76\u6307\u5bfc\u8fdb\u884c\u53ef\u9760\u90e8\u7f72\u3002\u7136\u800c\uff0c\u76ee\u524d\u5c1a\u65e0\u53ef\u4ee5\u540c\u65f6\u6ee1\u8db3\u8fd9\u4e24\u70b9\u8981\u6c42\u7684\u8bc4\u4f30\u8303\u5f0f\u3002 \u4f20\u7edf\u4ee5\u6027\u80fd\u4e3a\u5bfc\u5411\u7684\u8bc4\u4f30\u65b9\u6cd5\u5728\u4e2a\u4f53\u4efb\u52a1\u5b9e\u4f8b\u5c42\u9762\u5747\u7f3a\u4e4f\u89e3\u91ca\u529b\u4e0e\u9884\u6d4b\u529b\u3002\u4f8b\u5982\uff0c\u67d0\u4e00\u6a21\u578b\u5728 AIME (American Invitational Mathematics Examination) \u7b49\u6d41\u884c\u7684\u6570\u5b66\u57fa\u51c6\u6d4b\u8bd5\u4e0a\u53d6\u5f97\u4e8679.8%\u7684\u5e73\u5747\u6027\u80fd\uff0c\u4f46\u8fd9\u4e00\u6570\u636e\u65e0\u6cd5\u9884\u6d4b\u6216\u89e3\u91ca\u5176\u5bf9\u5355\u4e2a\u4efb\u52a1\u7684\u8868\u73b0\uff0c\u4e5f\u96be\u4ee5\u63a8\u6d4b\u5176\u5728\u5176\u4ed6\u6d4b\u8bd5\u4e2d\u7684\u80fd\u529b\u3002\u76f8\u6bd4\u5355\u7eaf\u7684\u5206\u6570\u6c47\u603b\uff0c\u5b66\u672f\u754c\u4e5f\u66fe\u63a2\u7d22\u5982\u5fc3\u7406\u6d4b\u91cf\u5b66\u7b49\u65b9\u6cd5\u6765\u523b\u753b\u4eba\u5de5\u667a\u80fd\u7684\u80fd\u529b\uff0c\u4f46\u8fd9\u4e9b\u65b9\u6cd5\u4ecd\u672a\u80fd\u517c\u987e\u53ef\u89e3\u91ca\u6027\u548c\u9884\u6d4b\u6027\u3002 \u7531\u5251\u6865\u5927\u5b66\u3001\u5fae\u8f6f\u4e9a\u6d32\u7814\u7a76\u9662\u3001\u74e6\u4f26\u897f\u4e9a\u7406\u5de5\u5927\u5b66\u3001\u7f8e\u56fd\u6559\u80b2\u8003\u8bd5\u670d\u52a1\u4e2d\u5fc3\u3001\u5361\u5185\u57fa\u6885\u9686\u5927\u5b66\u3001\u666e\u6797\u65af\u987f\u5927\u5b66\u7b49\u673a\u6784\u7ec4\u6210\u7684\u8de8\u5b66\u79d1\u7814\u7a76\u56e2\u961f\uff0c\u8fd1\u671f\u63d0\u51fa\u4e86\u4e00\u79cd\u521b\u65b0\u6027\u7684\u4eba\u5de5\u667a\u80fd\u8bc4\u4f30\u8303\u5f0f\uff1a\u901a\u8fc7\u5236\u5b9a\u901a\u7528\u80fd\u529b\u91cf\u8868\u6765\u8be6\u7ec6\u523b\u753b\u57fa\u51c6\u548c\u5927\u6a21\u578b\uff0c\u6765\u5b9e\u73b0\u89e3\u91ca\u548c\u9884\u6d4b (opens in new tab)\u3002\u8be5\u7814\u7a76\u7a81\u7834\u4e86\u4f20\u7edf\u8bc4\u4f30\u65b9\u6cd5\u7684\u5c40\u9650\uff0c\u4e3a\u4eba\u5de5\u667a\u80fd\u7684\u53ef\u9760\u90e8\u7f72\u5960\u5b9a\u4e86\u575a\u5b9e\u7684\u57fa\u7840\u3002 \u7814\u7a76\u5458\u4eec\u9996\u5148\u6784\u5efa\u4e8618\u4e2a\u4eba\u7c7b\u53ef\u7406\u89e3\u7684\u901a\u7528\u80fd\u529b\u91cf\u8868\uff0c\u5176\u4e2d\u6db5\u76d611\u9879\u57fa\u7840\u8ba4\u77e5\u80fd\u529b\u30015\u7c7b\u77e5\u8bc6\u9886\u57df\u53ca2\u9879\u5916\u90e8\u5e72\u6270\u56e0\u7d20\uff08\u8be6\u89c1\u88681\uff09\u3002\u6bcf\u4e2a\u91cf\u8868\u5b9a\u4e49\u4e86\u4ece\u5c42\u7ea70\u5230\u5c42\u7ea75\u7684\u9012\u8fdb\u5f0f\u9700\u6c42\u6807\u51c6\uff0c\u5c42\u7ea7\u8d8a\u9ad8\u8868\u660e\u4efb\u52a1\u5bf9\u8be5\u80fd\u529b\u7684\u9ad8\u9636\u8981\u6c42\u8d8a\u5f3a\u3002\u4f8b\u5982\uff0c\u5728”\u5f62\u5f0f\u79d1\u5b66\u77e5\u8bc6\uff08KNf\uff09”\u91cf\u8868\u4e2d\uff0c\u5c42\u7ea70\u8868\u793a\u4efb\u52a1\u65e0\u9700\u5f62\u5f0f\u79d1\u5b66\u77e5\u8bc6\u5373\u53ef\u89e3\u51b3\uff0c\u800c\u5c42\u7ea75\u5219\u8981\u6c42\u5177\u5907\u7814\u7a76\u751f\u53ca\u4ee5\u4e0a\u6c34\u5e73\u7684\u4e13\u4e1a\u77e5\u8bc6\u3002 \u57fa\u4e8e\u4e0a\u8ff0\u6846\u67b6\uff0c\u7814\u7a76\u5458\u4eec\u91c7\u7528 GPT-4o \u5bf9\u6765\u81ea20\u4e2a\u57fa\u51c6\u6d4b\u8bd5\u768463\u9879\u4e0b\u6e38\u4efb\u52a1\u5171\u8ba11.6\u4e07\u4e2a\u5b9e\u4f8b\u8fdb\u884c\u5168\u7ef4\u5ea6\u9700\u6c42\u5c42\u7ea7\u6807\u6ce8\uff0c\u6784\u5efa\u4e86 ADeLe\uff08Annotated-Demand-Levels\uff09v1.0 \u6570\u636e\u96c6\uff0c\u5176\u5305\u542b\u6240\u67091.6\u4e07\u4e2a\u4efb\u52a1\u5b9e\u4f8b\u548c\u9700\u6c42\u6807\u6ce8\u3002ADeLe \u6570\u636e\u96c6\u5de7\u5999\u5730\u5c06\u5927\u91cf\u4e0d\u540c\u57fa\u51c6\u7684\u4efb\u52a1\u5b9e\u4f8b\u653e\u7f6e\u5728\u540c\u4e00\u4e2a\u53ef\u6bd4\u7a7a\u95f4\u4e2d\uff0c\u4f7f\u79d1\u7814\u4eba\u5458\u80fd\u591f\u5728\u8bc4\u4f30\u4efb\u4f55\u5927\u8bed\u8a00\u6a21\u578b\u7684\u80fd\u529b\u548c\u5c40\u9650\u6027\u65f6\uff0c\u89e3\u9501\u89e3\u91ca\u529b\u548c\u9884\u6d4b\u529b\u3002\u56fe2\u76f4\u89c2\u5c55\u793a\u4e86 ADeLe \u6570\u636e\u96c6\u4e2d\u4e94\u4e2a\u5b9e\u4f8b\u548c\u5b83\u4eec\u7684\u6807\u6ce8\u3002 \u57fa\u4e8e ADeLe \u6d4b\u8bd5\u96c6\uff0c\u7814\u7a76\u56e2\u961f\u5f00\u5c55\u4e86\u4e09\u9879\u6838\u5fc3\u5206\u6790\uff0c\u63ed\u793a\u4e86\u82e5\u5e72\u91cd\u8981\u53d1\u73b0\uff1a 1. \u901a\u8fc7\u4efb\u52a1\u9700\u6c42\u6982\u51b5\u63ed\u793aAI\u57fa\u51c6\u6d4b\u8bd5\u7684\u56fa\u6709\u7f3a\u9677 \u901a\u8fc7\u5bf920\u4e2a\u57fa\u51c6\u6d4b\u8bd5\u7684\u9700\u6c42\u5c42\u7ea7\u8fdb\u884c\u5206\u6790\uff0c\u7814\u7a76\u53d1\u73b0\u6240\u6709\u57fa\u51c6\u5747\u5b58\u5728\u6982\u5ff5\u6548\u5ea6\u7f3a\u5931\u95ee\u9898\u2014\u2014\u65e2\u4e0d\u80fd\u6709\u6548\u6d4b\u91cf\u5176\u5ba3\u79f0\u7684\u76ee\u6807\u80fd\u529b\uff08\u7279\u5f02\u6027\u4e0d\u8db3\uff09\uff0c\u4e5f\u672a\u80fd\u5728\u76ee\u6807\u80fd\u529b\u7ef4\u5ea6\u4e0a\u8986\u76d6\u8db3\u591f\u7684\u96be\u5ea6\u8303\u56f4\uff08\u7075\u654f\u5ea6\u4e0d\u8db3\uff09\u3002\u4f8b\u5982\uff0c\u201c\u516c\u52a1\u5458\u8003\u8bd5\u201d\u57fa\u51c6 \uff08Civil Service Examination\uff09\u58f0\u79f0\u80fd\u591f\u6d4b\u91cf\u903b\u8f91\u63a8\u7406\u80fd\u529b\uff0c\u4f46\u4ece\u4efb\u52a1\u9700\u6c42\u6982\u51b5\uff08\u56fe3\uff09\u6765\u770b\uff0c\u4efb\u52a1\u7684\u6210\u529f\u5b8c\u6210\u540c\u65f6\u9ad8\u5ea6\u4f9d\u8d56\u77e5\u8bc6\u50a8\u5907\u3001\u5143\u8ba4\u77e5\u7b49\u5176\u4ed6\u80fd\u529b\u3002\u53e6\u5916\u4e00\u4e2a\u4f8b\u5b50\u662f\u201c\u65f6\u95f4\u63a8\u7406\u201d\u57fa\u51c6 \uff08TimeQA\uff09\uff0c\u5176\u63a8\u7406\u80fd\u529b\u9700\u6c42\u5c42\u7ea7\u5206\u5e03\u8fc7\u4e8e\u96c6\u4e2d\uff0c\u65e0\u6cd5\u6709\u6548\u533a\u5206\u4e0d\u540c\u4efb\u52a1\u9700\u6c42\u5c42\u7ea7\u6216\u96be\u5ea6\u6c34\u5e73\u3002\u91c7\u7528 ADeLe \u65b9\u6cd5\u8bba\u4f18\u5316\u57fa\u51c6\u8bbe\u8ba1\uff0c\u53ef\u4ee5\u901a\u8fc7\u6784\u5efa\u7cbe\u786e\u7684\u4efb\u52a1\u9700\u6c42\u6982\u51b5\u786e\u4fdd\u57fa\u51c6\u6d4b\u8bd5\u7684\u7ed3\u6784\u6548\u5ea6\uff0c\u660e\u786e\u754c\u5b9a\u5176\u6d4b\u91cf\u76ee\u6807\u5e76\u8bc4\u4f30\u5176\u9002\u7528\u8fb9\u754c\u3002 2. \u5927\u8bed\u8a00\u6a21\u578b\u80fd\u529b\u6982\u51b5\u7684\u6df1\u5ea6\u89e3\u6790 \u7814\u7a76\u5458\u4eec\u7ed8\u5236\u4e8615\u4e2a\u4e3b\u6d41\u5927\u8bed\u8a00\u6a21\u578b\u572818\u4e2a\u80fd\u529b\u7ef4\u5ea6\u7684\u53d7\u8bd5\u8005\u7279\u5f81\u66f2\u7ebf\uff08SCC\uff09\uff0c\u6b64\u66f2\u7ebf\u63cf\u8ff0\u4e86\u5927\u6a21\u578b\u5728\u4e0d\u540c\u80fd\u529b\u9700\u6c42\u5c42\u7ea7\u4e0a\u7684\u6b63\u786e\u7387\u5e76\u62df\u5408\u4e86\u903b\u8f91\u51fd\u6570\u3002\u8fd9\u7c7b\u66f2\u7ebf\u53ef\u4ee5\u4f7f15\u4e2a\u4e0d\u540c LLMs \u7684\u80fd\u529b\u4f18\u7f3a\u70b9\u5f97\u5230\u5168\u9762\u63cf\u8ff0\uff08\u56fe4\uff09\u3002 \u6b64\u5916\uff0c\u901a\u8fc7\u8ba1\u7b97\u6bcf\u4e2a\u7ef4\u5ea6\u7684\u80fd\u529b\u5f97\u5206\u603b\u7ed3 SCC\uff0c\u518d\u6309\u7167\u5fc3\u7406\u6d4b\u91cf\u5b66\u7684\u4f20\u7edf\uff0c\u5c06\u5176\u5b9a\u4e49\u4e3a SCC \u4e2d\u6210\u529f\u6982\u7387\u4e3a0.5\u7684 x \u503c\uff08\u659c\u7387\/\u4fe1\u606f\u91cf\u6700\u5927\u7684\u70b9\uff09\uff0c\u5c31\u4ea7\u751f\u4e86\u8bb8\u591a\u89c1\u89e3\u3002\u8fd9\u4e9b\u89c1\u89e3\u4e3b\u8981\u6709\uff1a1. \u65b0\u7684 LLMs \u6574\u4f53\u80fd\u529b\u4f18\u4e8e\u65e7\u7684 LLMs\uff0c\u4f46\u5bf9\u4e8e\u6240\u6709\u80fd\u529b\uff0c\u8fd9\u4e00\u7ed3\u8bba\u5e76\u975e\u603b\u662f\u6210\u7acb\uff1b2. \u77e5\u8bc6\u80fd\u529b\u4e3b\u8981\u7531\u6a21\u578b\u5927\u5c0f\u548c\u84b8\u998f\u8fc7\u7a0b\u7684\u53d8\u5316\u51b3\u5b9a\u548c\u9650\u5236\uff1b3. \u63a8\u7406\u3001\u5b66\u4e60\u548c\u62bd\u8c61\u4ee5\u53ca\u793e\u4ea4\u80fd\u529b\u5728\u201c\u63a8\u7406\u201d\u6a21\u578b\u4e2d\u5f97\u5230\u4e86\u6539\u5584\uff1b4. \u89c4\u6a21\u6cd5\u5219\uff08Scaling Law\uff09\u5bf9\u975e\u63a8\u7406\u7c7b\u6a21\u578b\u7684\u8fb9\u9645\u6548\u76ca\u5448\u9012\u51cf\u8d8b\u52bf\u3002 3. […]<\/p>\n","protected":false},"author":34512,"featured_media":1135355,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr-content-parent":1012650,"footnotes":""},"research-area":[13556],"msr-locale":[268881],"msr-post-option":[],"class_list":["post-1135280","msr-blog-post","type-msr-blog-post","status-publish","has-post-thumbnail","hentry","msr-research-area-artificial-intelligence","msr-locale-zh_cn"],"msr_assoc_parent":{"id":1012650,"type":"lab"},"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-blog-post\/1135280","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-blog-post"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-blog-post"}],"author":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/users\/34512"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-blog-post\/1135280\/revisions"}],"predecessor-version":[{"id":1135356,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-blog-post\/1135280\/revisions\/1135356"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media\/1135355"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=1135280"}],"wp:term":[{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=1135280"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=1135280"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=1135280"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}