{"id":6213,"date":"2024-07-02T15:38:03","date_gmt":"2024-07-02T07:38:03","guid":{"rendered":"https:\/\/t.n-years.com\/?p=6213"},"modified":"2024-07-02T15:38:07","modified_gmt":"2024-07-02T07:38:07","slug":"%e6%a8%a1%e5%9e%8b%e8%ae%ad%e7%bb%83-post-pretrain%e3%80%81sft%e3%80%81dpo-%e5%ae%9a%e4%b9%89%e5%92%8c%e9%80%82%e7%94%a8%e5%9c%ba%e6%99%af%e6%98%af%e4%bb%80%e4%b9%88","status":"publish","type":"post","link":"https:\/\/t.n-years.com\/?p=6213","title":{"rendered":"\u6a21\u578b\u8bad\u7ec3 Post-pretrain\u3001SFT\u3001DPO \u5b9a\u4e49\u548c\u9002\u7528\u573a\u666f\u662f\u4ec0\u4e48"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\">\u975e\u539f\u521b\u3001ChatGPT \u95ee\u7b54\u751f\u6210<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u5728\u8bad\u7ec3\u5927\u578b\u8bed\u8a00\u6a21\u578b\uff08LLMs\uff09\u65f6\uff0cPost-pretrain\u3001SFT\uff08Supervised Fine-Tuning\uff09\u548c DPO\uff08Direct Preference Optimization\uff09\u662f\u4e09\u79cd\u4e0d\u540c\u7684\u8bad\u7ec3\u9636\u6bb5\u548c\u65b9\u6cd5\uff0c\u6bcf\u79cd\u65b9\u6cd5\u5728\u4e0d\u540c\u7684\u9636\u6bb5\u548c\u573a\u666f\u4e2d\u90fd\u6709\u5176\u72ec\u7279\u7684\u4f5c\u7528\u3002\u4ee5\u4e0b\u662f\u5b83\u4eec\u7684\u5b9a\u4e49\u548c\u9002\u7528\u573a\u666f\uff1a<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">1. Post-pretrain<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u5b9a\u4e49\uff1a<\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">Post-pretrain \u662f\u6307\u5728\u9884\u8bad\u7ec3\uff08Pretraining\uff09\u4e4b\u540e\uff0c\u8fdb\u884c\u7684\u4e00\u7cfb\u5217\u8fdb\u4e00\u6b65\u8bad\u7ec3\u6b65\u9aa4\uff0c\u4ee5\u63d0\u9ad8\u6a21\u578b\u5728\u7279\u5b9a\u9886\u57df\u6216\u4efb\u52a1\u4e0a\u7684\u8868\u73b0\u3002\u9884\u8bad\u7ec3\u901a\u5e38\u5728\u5927\u89c4\u6a21\u7684\u901a\u7528\u6587\u672c\u6570\u636e\u4e0a\u8fdb\u884c\uff0c\u800c Post-pretrain \u5219\u5728\u66f4\u5177\u9488\u5bf9\u6027\u7684\u6570\u636e\u4e0a\u8fdb\u884c\u8bad\u7ec3\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u9002\u7528\u573a\u666f\uff1a<\/h4>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u9886\u57df\u9002\u5e94<\/strong>\uff1a\u5f53\u9700\u8981\u8ba9\u6a21\u578b\u9002\u5e94\u67d0\u4e2a\u7279\u5b9a\u9886\u57df\uff08\u5982\u533b\u5b66\u3001\u6cd5\u5f8b\u3001\u91d1\u878d\u7b49\uff09\u65f6\uff0c\u53ef\u4ee5\u4f7f\u7528\u9886\u57df\u7279\u5b9a\u7684\u6570\u636e\u8fdb\u884c Post-pretrain\u3002<\/li>\n\n\n\n<li><strong>\u8bed\u8a00\u9002\u5e94<\/strong>\uff1a\u5f53\u9700\u8981\u8ba9\u6a21\u578b\u66f4\u597d\u5730\u7406\u89e3\u67d0\u79cd\u7279\u5b9a\u8bed\u8a00\u6216\u65b9\u8a00\u65f6\uff0c\u4f7f\u7528\u8be5\u8bed\u8a00\u7684\u6570\u636e\u8fdb\u884c Post-pretrain\u3002<\/li>\n\n\n\n<li><strong>\u63d0\u9ad8\u57fa\u7840\u8868\u73b0<\/strong>\uff1a\u5728\u9884\u8bad\u7ec3\u4e4b\u540e\uff0c\u8fdb\u4e00\u6b65\u63d0\u5347\u6a21\u578b\u7684\u57fa\u7840\u6027\u80fd\uff0c\u4f7f\u5176\u5728\u4e0b\u6e38\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\u66f4\u4f73\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">2. SFT\uff08Supervised Fine-Tuning\uff09<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u5b9a\u4e49\uff1a<\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">SFT \u662f\u6307\u5728\u6709\u76d1\u7763\u6570\u636e\u4e0a\u5bf9\u6a21\u578b\u8fdb\u884c\u5fae\u8c03\uff0c\u4ee5\u63d0\u5347\u6a21\u578b\u5728\u7279\u5b9a\u4efb\u52a1\u4e0a\u7684\u8868\u73b0\u3002\u8fd9\u4e2a\u8fc7\u7a0b\u4f7f\u7528\u6807\u6ce8\u597d\u7684\u6570\u636e\u8fdb\u884c\u8bad\u7ec3\uff0c\u4f7f\u6a21\u578b\u5b66\u4f1a\u5b8c\u6210\u7279\u5b9a\u7684\u4efb\u52a1\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u9002\u7528\u573a\u666f\uff1a<\/h4>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u7279\u5b9a\u4efb\u52a1\u4f18\u5316<\/strong>\uff1a\u5982\u6587\u672c\u5206\u7c7b\u3001\u547d\u540d\u5b9e\u4f53\u8bc6\u522b\u3001\u673a\u5668\u7ffb\u8bd1\u7b49\u4efb\u52a1\uff0c\u5728\u8fd9\u4e9b\u4efb\u52a1\u4e0a\u6709\u5927\u91cf\u6807\u6ce8\u597d\u7684\u6570\u636e\u3002<\/li>\n\n\n\n<li><strong>\u6027\u80fd\u63d0\u5347<\/strong>\uff1a\u5728\u7279\u5b9a\u4efb\u52a1\u4e0a\u901a\u8fc7\u76d1\u7763\u5b66\u4e60\u8fdb\u4e00\u6b65\u63d0\u5347\u6a21\u578b\u6027\u80fd\u3002<\/li>\n\n\n\n<li><strong>\u4efb\u52a1\u8fc1\u79fb<\/strong>\uff1a\u5c06\u9884\u8bad\u7ec3\u6a21\u578b\u5e94\u7528\u4e8e\u5177\u4f53\u7684\u4e0b\u6e38\u4efb\u52a1\uff0c\u4f7f\u5176\u80fd\u591f\u6839\u636e\u4efb\u52a1\u8981\u6c42\u8fdb\u884c\u4f18\u5316\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">3. DPO\uff08Direct Preference Optimization\uff09<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\">\u5b9a\u4e49\uff1a<\/h4>\n\n\n\n<p class=\"wp-block-paragraph\">DPO \u662f\u4e00\u79cd\u4f18\u5316\u65b9\u6cd5\uff0c\u57fa\u4e8e\u7528\u6237\u504f\u597d\u6216\u53cd\u9988\u5bf9\u6a21\u578b\u8fdb\u884c\u5fae\u8c03\uff0c\u4f7f\u6a21\u578b\u7684\u8f93\u51fa\u66f4\u7b26\u5408\u7528\u6237\u7684\u671f\u671b\u548c\u9700\u6c42\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u9002\u7528\u573a\u666f\uff1a<\/h4>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u4e2a\u6027\u5316\u63a8\u8350<\/strong>\uff1a\u5982\u4e2a\u6027\u5316\u804a\u5929\u673a\u5668\u4eba\u3001\u63a8\u8350\u7cfb\u7edf\u7b49\uff0c\u9700\u8981\u6839\u636e\u7528\u6237\u7684\u5177\u4f53\u504f\u597d\u8fdb\u884c\u8c03\u6574\u3002<\/li>\n\n\n\n<li><strong>\u7528\u6237\u4ea4\u4e92\u4f18\u5316<\/strong>\uff1a\u901a\u8fc7\u7528\u6237\u53cd\u9988\u4e0d\u65ad\u8c03\u6574\u6a21\u578b\uff0c\u4f7f\u5176\u66f4\u597d\u5730\u6ee1\u8db3\u7528\u6237\u9700\u6c42\u3002<\/li>\n\n\n\n<li><strong>\u52a8\u6001\u8c03\u6574<\/strong>\uff1a\u5728\u7528\u6237\u9700\u6c42\u548c\u504f\u597d\u4e0d\u65ad\u53d8\u5316\u7684\u573a\u666f\u4e0b\uff0cDPO \u53ef\u4ee5\u901a\u8fc7\u5b9e\u65f6\u53cd\u9988\u8fdb\u884c\u52a8\u6001\u4f18\u5316\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u9002\u7528\u573a\u666f\u603b\u7ed3<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>Post-pretrain<\/strong> \u9002\u7528\u4e8e\u9700\u8981\u8ba9\u6a21\u578b\u9002\u5e94\u7279\u5b9a\u9886\u57df\u6216\u8bed\u8a00\u7684\u573a\u666f\uff0c\u901a\u8fc7\u8fdb\u4e00\u6b65\u8bad\u7ec3\u63d0\u5347\u57fa\u7840\u6027\u80fd\u3002<\/li>\n\n\n\n<li><strong>SFT<\/strong> \u9002\u7528\u4e8e\u6709\u76d1\u7763\u6570\u636e\u5145\u8db3\u7684\u5177\u4f53\u4efb\u52a1\uff0c\u901a\u8fc7\u76d1\u7763\u5b66\u4e60\u63d0\u5347\u4efb\u52a1\u6027\u80fd\u3002<\/li>\n\n\n\n<li><strong>DPO<\/strong> \u9002\u7528\u4e8e\u4e2a\u6027\u5316\u548c\u7528\u6237\u53cd\u9988\u9a71\u52a8\u7684\u573a\u666f\uff0c\u901a\u8fc7\u7528\u6237\u504f\u597d\u8fdb\u884c\u52a8\u6001\u4f18\u5316\u3002<\/li>\n<\/ul>\n\n\n\n<p class=\"wp-block-paragraph\">\u8fd9\u4e09\u79cd\u65b9\u6cd5\u53ef\u4ee5\u5728\u4e0d\u540c\u7684\u9636\u6bb5\u548c\u573a\u666f\u4e2d\u7ed3\u5408\u4f7f\u7528\uff0c\u5f62\u6210\u4e00\u4e2a\u5b8c\u6574\u7684\u8bad\u7ec3\u6d41\u7a0b\uff0c\u4ece\u800c\u6253\u9020\u6027\u80fd\u5f3a\u5927\u4e14\u7b26\u5408\u5b9e\u9645\u5e94\u7528\u9700\u6c42\u7684\u5927\u578b\u8bed\u8a00\u6a21\u578b\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u975e\u539f\u521b\u3001ChatGPT \u95ee\u7b54\u751f\u6210 \u5728\u8bad\u7ec3\u5927\u578b\u8bed\u8a00\u6a21\u578b\uff08LLMs\uff09\u65f6\uff0cPost-pretrain\u3001SFT\uff08Sup&#8230; <\/p>\n<div class=\"read-more navbutton\"><a href=\"https:\/\/t.n-years.com\/?p=6213\">\u9605\u8bfb\u66f4\u591a<i class=\"fa fa-angle-double-right\"><\/i><\/a><\/div>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[5],"tags":[],"class_list":["post-6213","post","type-post","status-publish","format-standard","hentry","category-5"],"_links":{"self":[{"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/posts\/6213","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/t.n-years.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=6213"}],"version-history":[{"count":1,"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/posts\/6213\/revisions"}],"predecessor-version":[{"id":6214,"href":"https:\/\/t.n-years.com\/index.php?rest_route=\/wp\/v2\/posts\/6213\/revisions\/6214"}],"wp:attachment":[{"href":"https:\/\/t.n-years.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=6213"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/t.n-years.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=6213"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/t.n-years.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=6213"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}