{"id":3163,"date":"2025-07-09T18:54:45","date_gmt":"2025-07-09T10:54:45","guid":{"rendered":"https:\/\/www.gnn.club\/?p=3163"},"modified":"2025-07-14T14:48:47","modified_gmt":"2025-07-14T06:48:47","slug":"non-local-neural-networks","status":"publish","type":"post","link":"http:\/\/www.gnn.club\/?p=3163","title":{"rendered":"Non-local Neural Networks"},"content":{"rendered":"<h1>\u57fa\u672c\u4fe1\u606f<\/h1>\n<ul>\n<li>\ud83d\udcf0\u6807\u9898: \u975e\u5c40\u90e8\u795e\u7ecf\u7f51\u7edc\uff08Non-local Neural Networks\uff09<\/li>\n<li>\ud83d\udd8b\ufe0f\u4f5c\u8005: Xiaolong Wang<\/li>\n<li>\ud83c\udfdb\ufe0f\u673a\u6784: Carnegie Mellon University\uff08\u5361\u5185\u57fa\u6885\u9686\u5927\u5b66\uff09<\/li>\n<li>\ud83d\udd25\u5173\u952e\u8bcd: Non-local operations, self-attention, video recognition, image classification<\/li>\n<\/ul>\n<h2>\u6458\u8981\u6982\u8ff0<\/h2>\n<table style=\"border-collapse: collapse; width: 100%; max-width: 800px; margin: 0 auto; font-family: Arial, sans-serif; box-shadow: 0 2px 3px rgba(0,0,0,0.1);\">\n<thead>\n<tr>\n<th style=\"border: 1px solid #ddd; padding: 12px; text-align: left; background-color: #f2f2f2;\">\u9879\u76ee<\/th>\n<th style=\"border: 1px solid #ddd; padding: 12px; text-align: left; background-color: #f2f2f2;\">\u5185\u5bb9<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\ud83d\udcd6 \u7814\u7a76\u80cc\u666f<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u4f20\u7edf\u6df1\u5ea6\u5377\u79ef\u7f51\u7edc\u96be\u4ee5\u5efa\u6a21\u957f\u8ddd\u79bb\u4f9d\u8d56\u5173\u7cfb<\/td>\n<\/tr>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\ud83c\udfaf \u7814\u7a76\u76ee\u7684<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u63d0\u51fa\u901a\u7528\u975e\u5c40\u90e8\u8fd0\u7b97\u6a21\u5757\uff0c\u6355\u83b7\u65f6\u7a7a\u8fdc\u7a0b\u4f9d\u8d56<\/td>\n<\/tr>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u270d\ufe0f \u7814\u7a76\u65b9\u6cd5<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u8bbe\u8ba1\u975e\u5c40\u90e8\u7b97\u5b50\uff08\u542b4\u79cd\u53d8\u4f53\uff09\uff0c\u7ed3\u5408\u6b8b\u5dee\u8fde\u63a5<\/td>\n<\/tr>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\ud83d\udd4a\ufe0f \u7814\u7a76\u5bf9\u8c61<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u89c6\u9891\u52a8\u4f5c\u8bc6\u522b\uff08Kinetics\uff09\u4e0e\u56fe\u50cf\u5206\u7c7b\uff08ImageNet\uff09<\/td>\n<\/tr>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\ud83d\udd0d \u7814\u7a76\u7ed3\u8bba<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u5728\u89c6\u9891\u5206\u7c7b\u4efb\u52a1\u4e2d\u63d0\u53471.5%\u51c6\u786e\u7387\uff08\u4f18\u4e8e3D\u5377\u79ef\uff09<\/td>\n<\/tr>\n<tr style=\"border: 1px solid #ddd;\">\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u2b50 \u521b\u65b0\u70b9<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u9996\u6b21\u5c06\u975e\u5c40\u90e8\u5747\u503c\u601d\u60f3\u6269\u5c55\u4e3a\u53ef\u5fae\u5206\u795e\u7ecf\u7f51\u7edc\u7ec4\u4ef6<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h1>\u80cc\u666f<\/h1>\n<ul>\n<li>\n<p><strong>\u7814\u7a76\u80cc\u666f<\/strong>\uff1a\u6df1\u5ea6\u795e\u7ecf\u7f51\u7edc\u4e2d\u957f\u7a0b\u4f9d\u8d56\u5efa\u6a21\u662f\u5173\u952e\u6311\u6218\uff0c\u73b0\u6709\u65b9\u6cd5\u5728\u65f6\u7a7a\u6570\u636e\uff08\u5982\u89c6\u9891\u3001\u56fe\u50cf\uff09\u4e2d\u5b58\u5728\u663e\u8457\u5c40\u9650\u6027\u3002<\/p>\n<\/li>\n<li>\n<p><strong>\u8fc7\u53bb\u65b9\u6848<\/strong>\uff1a<\/p>\n<ul>\n<li>\n<p><strong>RNN<\/strong>\uff1a\u9002\u7528\u4e8e\u5e8f\u5217\u6570\u636e\u4f46\u5b58\u5728\u68af\u5ea6\u6d88\u5931\/\u7206\u70b8\u95ee\u9898<\/p>\n<\/li>\n<li>\n<p><strong>CNN<\/strong>\uff1a\u4f9d\u8d56\u6df1\u5c42\u5806\u53e0\u6269\u5927\u611f\u53d7\u91ce\uff0c\u8ba1\u7b97\u6548\u7387\u4f4e\u4e14\u4f18\u5316\u56f0\u96be<\/p>\n<\/li>\n<li>\n<p><strong>\u6838\u5fc3\u7f3a\u9677<\/strong>\uff1a\u5c40\u90e8\u64cd\u4f5c\u7684\u91cd\u590d\u7d2f\u79ef\u5bfc\u81f4\u591a\u8df3\u4f9d\u8d56\u5efa\u6a21\u5931\u6548\uff08\u5982\u8fdc\u8ddd\u79bb\u50cf\u7d20\u95f4\u53cc\u5411\u4ea4\u4e92\uff09<\/p>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p><strong>\u7814\u7a76\u52a8\u673a<\/strong>\uff1a<\/p>\n<ul>\n<li>\n<p>\u63d0\u51fa\u53ef\u5fae\u5206<strong>non-local operations<\/strong>\uff0c\u76f4\u63a5\u5efa\u6a21\u4efb\u610f\u4f4d\u7f6e\u95f4\u5168\u5c40\u4f9d\u8d56<\/p>\n<\/li>\n<li>\n<p>\u7a81\u7834\u4f20\u7edf\u5377\u79ef\/\u5faa\u73af\u7ed3\u6784\u7684\u6e10\u8fdb\u5f0f\u7279\u5f81\u4f20\u64ad\u5c40\u9650\uff0c\u5efa\u7acb\u8de8\u65f6\u7a7a\u7684\u4e00\u6b65\u4ea4\u4e92\u673a\u5236<\/p>\n<\/li>\n<li>\n<p>\u901a\u8fc7\u7edf\u4e00\u6846\u67b6\u517c\u5bb9\u56fe\u50cf\/\u89c6\u9891\/\u5e8f\u5217\u4efb\u52a1\uff0c\u9a8c\u8bc1\u5176\u5728\u89c6\u9891\u5206\u7c7b\uff08Kinetics\uff09\u548cCOCO\u591a\u4efb\u52a1\u4e2d\u7684\u6cdb\u5316\u6027<\/p>\n<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<h1>\u65b9\u6cd5<\/h1>\n<ul>\n<li>\n<p><strong>\u7406\u8bba\u80cc\u666f<\/strong>\uff1a<br \/>\n\u57fa\u4e8e\u4f20\u7edf\u5377\u79ef\u795e\u7ecf\u7f51\u7edc\u5728\u957f\u7a0b\u4f9d\u8d56\u5efa\u6a21\u4e2d\u7684\u5c40\u9650\u6027\uff0c\u7814\u7a76\u501f\u9274\u975e\u5c40\u90e8\u5747\u503c\u6ee4\u6ce2\uff08non-local means\uff09\u601d\u60f3\uff0c\u5c06\u56fe\u50cf\u5904\u7406\u4e2d\u7684\u975e\u5c40\u90e8\u76f8\u4f3c\u6027\u8ba1\u7b97\u6269\u5c55\u4e3a\u53ef\u5fae\u5206\u795e\u7ecf\u7f51\u7edc\u7b97\u5b50\u3002\u8be5\u5de5\u4f5c\u53d7self-attention\u673a\u5236\u542f\u53d1\uff0c\u4f46\u7a81\u7834\u5176\u5e8f\u5217\u5efa\u6a21\u9650\u5236\uff0c\u63d0\u51fa\u9002\u7528\u4e8e\u65f6\u7a7a\u6570\u636e\u7684\u901a\u7528\u4ea4\u4e92\u8303\u5f0f\u3002<\/p>\n<\/li>\n<li>\n<p><strong>\u6280\u672f\u8def\u7ebf<\/strong>\uff1a<\/p>\n<ol>\n<li>\n<p><strong>\u7b97\u5b50\u8bbe\u8ba1<\/strong>\uff1a\u6784\u5efa\u975e\u5c40\u90e8\u8fd0\u7b97\u6a21\u5757\uff08 $\\mathcal{NL}(\u00b7)$ \uff09\uff0c\u901a\u8fc7\u9ad8\u65af\u6838\u51fd\u6570\u8ba1\u7b97\u4efb\u610f\u4e24\u70b9\u95f4\u5173\u8054\u6743\u91cd\uff0c\u652f\u6301Embedded Gaussian\/Dot Product\/Concatenation\u7b494\u79cd\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u53d8\u4f53<\/p>\n<\/li>\n<li>\n<p><strong>\u7ed3\u6784\u5b9e\u73b0<\/strong>\uff1a<\/p>\n<ul>\n<li>\n<p>\u91c7\u7528\u6b8b\u5dee\u8fde\u63a5\u5f62\u5f0f\uff08 $\\mathbf{z}_i = \\mathcal{NL}(\\mathbf{x}_i) + \\mathbf{x}_i$ \uff09\u907f\u514d\u68af\u5ea6\u6d88\u5931<\/p>\n<\/li>\n<li>\n<p>\u901a\u8fc71\u00d71\u5377\u79ef\u538b\u7f29\u7279\u5f81\u7ef4\u5ea6\u964d\u4f4e\u8ba1\u7b97\u590d\u6742\u5ea6<\/p>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p><strong>\u4efb\u52a1\u9002\u914d<\/strong>\uff1a<\/p>\n<ul>\n<li>\u89c6\u9891\u4efb\u52a1\u4e2d\u76f4\u63a5\u5904\u74063D\u7279\u5f81\u56fe\uff08T\u00d7H\u00d7W\uff09<\/li>\n<li>\u56fe\u50cf\u4efb\u52a1\u4e2d\u901a\u8fc7\u7a7a\u95f4\u4f4d\u7f6e\u6620\u5c04\u5b9e\u73b0\u5168\u5c40\u4e0a\u4e0b\u6587\u5efa\u6a21<\/li>\n<\/ul>\n<\/li>\n<\/ol>\n<\/li>\n<li>\n<p><strong>\u8be6\u7ec6\u89e3\u8bfb<\/strong>\uff1a<br \/>\n\u975e\u5c40\u90e8\u795e\u7ecf\u7f51\u7edc\uff08Non\uff0dlocal Neural Networks\uff09\u7684\u56fe\u89e3\u5982\u4e0b\uff1a<\/p>\n<\/li>\n<\/ul>\n<p><img decoding=\"async\" src=\"https:\/\/gnnclub-1311496010.cos.ap-beijing.myqcloud.com\/wp-content\/uploads\/2025\/07\/20250709181951251.png\" width=\"400\" style=\"display: block; margin: 0 auto;\" \/><\/p>\n<p>\u6838\u5fc3\u516c\u5f0f\u8868\u793a\u5982\u4e0b\uff1a<\/p>\n<p>$$<br \/>\ny_i=\\frac{1}{C(x)} \\sum_{\\forall j} f\\left(x_i, x_j\\right) g\\left(x_j\\right)<br \/>\n$$<\/p>\n<p>\u516c\u5f0f\u7ec4\u6210\u89e3\u6790<br \/>\n1\uff0e\u8f93\u5165\u4e0e\u8f93\u51fa<\/p>\n<ul>\n<li>$x$ \uff1a\u8f93\u5165\u7279\u5f81\uff08\u56fe\u50cf\uff0f\u89c6\u9891\uff0f\u5e8f\u5217\uff09\uff0c$y$ \uff1a\u4e0e $x$ \u540c\u5c3a\u5bf8\u7684\u8f93\u51fa<\/li>\n<li>$i$ \uff1a\u76ee\u6807\u4f4d\u7f6e\u7d22\u5f15\uff08\u7a7a\u95f4\uff0f\u65f6\u95f4\uff0f\u65f6\u7a7a\uff09\uff0c$j:$ \u679a\u4e3e\u6240\u6709\u53ef\u80fd\u4f4d\u7f6e\u7684\u7d22\u5f15<\/li>\n<\/ul>\n<p>2\uff0e\u6838\u5fc3\u51fd\u6570<\/p>\n<ul>\n<li>\u76f8\u4f3c\u6027\u51fd\u6570 $f\\left(x_i, x_j\\right)$ \uff1a\u8ba1\u7b97\u4f4d\u7f6e $i$ \u4e0e\u6240\u6709\u4f4d\u7f6e $j$ \u7684\u5173\u8054\u6743\u91cd\uff08\u6807\u91cf\uff09\uff0c\u652f\u6301\u56db\u79cd\u53d8\u4f53\uff1a<\/li>\n<li>\u9ad8\u65af\u51fd\u6570\uff1a$f\\left(x_i, x_j\\right)=e^{x_i^T x_j}$\uff08\u7ecf\u5178\u975e\u5c40\u90e8\u5747\u503c\u6ee4\u6ce2\u6269\u5c55\uff09<\/li>\n<li>\u5d4c\u5165\u9ad8\u65af\uff1a$f\\left(x_i, x_j\\right)=e^{\\theta\\left(x_i\\right)^T \\phi\\left(x_j\\right)} \\quad(\\theta \u3001 \\varphi$ \u4e3a\u7ebf\u6027\u5d4c\u5165\uff0c\u4e0e\u81ea\u6ce8\u610f\u529b\u673a\u5236\u7b49\u6548\uff09<\/li>\n<li>\u70b9\u79ef\uff1a$f\\left(x_i, x_j\\right)=\\theta\\left(x_i\\right)^T \\phi\\left(x_j\\right)$\uff08\u65e0softmax\uff0c\u5f52\u4e00\u5316\u56e0\u5b50 $C(x)=N$ \uff09<\/li>\n<li>\u62fc\u63a5\uff1a$f\\left(x_i, x_j\\right)=\\operatorname{ReLU}\\left(w_f^T\\left[\\theta\\left(x_i\\right), \\phi\\left(x_j\\right)\\right]\\right)$.<\/li>\n<li>\u7279\u5f81\u53d8\u6362\u51fd\u6570 $g\\left(x_j\\right)$ \uff1a\u7ebf\u6027\u5d4c\u5165 $g\\left(x_j\\right)=W_g x_j \\quad$\uff08\u901a\u8fc7 $1 \\times 1$ \u5377\u79ef\u5b9e\u73b0\uff09<\/li>\n<li>\u5f52\u4e00\u5316\u56e0\u5b50 $C(x)$ \uff1a\u901a\u5e38\u53d6 $\\sum_{\\forall j} f\\left(x_i, x_j\\right)$ \u6216\u6837\u672c\u6570 $N$<\/li>\n<\/ul>\n<p>3\uff0e\u6b8b\u5dee\u8fde\u63a5\u8bbe\u8ba1<br \/>\n\u975e\u5c40\u90e8\u5757\uff08Non\uff0dlocal Block\uff09\u901a\u8fc7\u6b8b\u5dee\u8fde\u63a5\u6574\u5408\u8f93\u51fa\uff1a$z_i=W_z y_i+x_i$<\/p>\n<ul>\n<li>$W_z$ \u4e3a\u53ef\u5b66\u4e60\u6743\u91cd\u77e9\u9635\uff0c\u521d\u59cb\u5316\u4e3a\u96f6\u4ee5\u4fdd\u6301\u521d\u59cb\u884c\u4e3a<\/li>\n<li>\u6b8b\u5dee\u7ed3\u6784\u5141\u8bb8\u76f4\u63a5\u63d2\u5165\u9884\u8bad\u7ec3\u6a21\u578b\u800c\u4e0d\u7834\u574f\u5176\u6027\u80fd<\/li>\n<\/ul>\n<h1>\u7ed3\u8bba<\/h1>\n<ul>\n<li>\n<p>\u63d0\u51fa\u975e\u5c40\u90e8\u8fd0\u7b97\uff08non-local operations\uff09\u4f5c\u4e3a\u5efa\u6a21\u957f\u7a0b\u4f9d\u8d56\u7684\u65b0\u8303\u5f0f\uff0c\u4e3a\u795e\u7ecf\u7f51\u7edc\u67b6\u6784\u8bbe\u8ba1\u63d0\u4f9b\u901a\u7528\u7ec4\u4ef6\uff0c\u5728\u89c6\u9891\u7406\u89e3\u4e0e\u56fe\u50cf\u5206\u6790\u591a\u4efb\u52a1\u4e2d\u9a8c\u8bc1\u5176\u666e\u9002\u4ef7\u503c<\/p>\n<\/li>\n<li>\n<p><strong>\u4f18\u70b9<\/strong>\uff1a<\/p>\n<ul>\n<li>\n<p>\u6a21\u5757\u5316\u8bbe\u8ba1\u517c\u5bb9\u73b0\u6709\u67b6\u6784\uff08\u5982CNN\/RNN\uff09<\/p>\n<\/li>\n<li>\n<p>\u5728\u89c6\u9891\u5206\u7c7b\u3001\u76ee\u6807\u68c0\u6d4b\u3001\u59ff\u6001\u4f30\u8ba1\u7b49\u4efb\u52a1\u4e2d\u5747\u5e26\u6765\u7a33\u5b9a\u6027\u80fd\u63d0\u5347\\<br \/>\n<strong>\u5c40\u9650<\/strong>\uff1a<\/p>\n<\/li>\n<li>\n<p>\u672a\u8ba8\u8bba\u8ba1\u7b97\u590d\u6742\u5ea6\u4e0e\u5b9e\u65f6\u6027\u6743\u8861<\/p>\n<\/li>\n<li>\n<p>\u591a\u4efb\u52a1\u6cdb\u5316\u6027\u9700\u66f4\u591a\u57fa\u51c6\u6d4b\u8bd5\u9a8c\u8bc1<\/p>\n<\/li>\n<\/ul>\n<\/li>\n<li>\n<p>\u4e3b\u8981\u7ed3\u8bba\uff1a<br \/>\n(1) \u975e\u5c40\u90e8\u6a21\u5757\u901a\u8fc7\u76f4\u63a5\u5efa\u6a21\u4efb\u610f\u4f4d\u7f6e\u95f4\u4f9d\u8d56\u5173\u7cfb\uff0c\u663e\u8457\u4f18\u4e8e\u4f20\u7edf\u5c40\u90e8\u8fd0\u7b97\u7684\u7d2f\u79ef\u6548\u5e94<br \/>\n(2) \u8be5\u7ec4\u4ef6\u4ee5\u5373\u63d2\u5373\u7528\u65b9\u5f0f\u63d0\u5347\u89c6\u9891\u5206\u7c7b\u7b49\u4efb\u52a1\u7684baseline\u6027\u80fd<br \/>\n(3) \u9884\u793a\u975e\u5c40\u90e8\u5c42\uff08non-local layers\uff09\u5c06\u6210\u4e3a\u672a\u6765\u7f51\u7edc\u67b6\u6784\u7684\u6838\u5fc3\u8981\u7d20<\/p>\n<\/li>\n<\/ul>\n<h1>Non-local Networks VS Transformer<\/h1>\n<p>\u975e\u5c40\u90e8\u795e\u7ecf\u7f51\u7edc\uff08Non-local Networks\uff09\u786e\u5b9e\u53ef\u4ee5\u89c6\u4e3aTransformer\u7684\u201c\u8fd1\u4eb2\u201d\uff0c\u4e24\u8005\u5728\u6838\u5fc3\u601d\u60f3\u548c\u6280\u672f\u8def\u7ebf\u4e0a\u9ad8\u5ea6\u76f8\u4f3c\uff0c\u4f46\u5b58\u5728\u4e00\u4e9b\u5173\u952e\u5dee\u5f02\u3002\u4ee5\u4e0b\u662f\u8be6\u7ec6\u5bf9\u6bd4\u5206\u6790\uff1a<\/p>\n<table style=\"border-collapse: collapse; width: 100%; max-width: 800px; margin: 0 auto; font-family: Arial, sans-serif;\">\n<thead>\n<tr>\n<th style=\"border: 1px solid #ddd; padding: 12px; text-align: center; background-color: #f2f2f2;\">\u7ef4\u5ea6<\/th>\n<th style=\"border: 1px solid #ddd; padding: 12px; text-align: center; background-color: #f2f2f2;\">\u975e\u5c40\u90e8\u7f51\u7edc\uff082018\uff09<\/th>\n<th style=\"border: 1px solid #ddd; padding: 12px; text-align: center; background-color: #f2f2f2;\">Transformer\uff082017\uff09<\/th>\n<\/tr>\n<\/thead>\n<tbody>\n<tr>\n<td style=\"border: 1px solid #ddd; padding: 12px;\"><strong>\u57fa\u7840\u601d\u60f3<\/strong><\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u901a\u8fc7\u5168\u5c40\u4ea4\u4e92\u5efa\u6a21\u957f\u7a0b\u4f9d\u8d56<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u901a\u8fc7\u81ea\u6ce8\u610f\u529b\u673a\u5236\u6355\u83b7\u5e8f\u5217\u5185\u4efb\u610f\u4f4d\u7f6e\u7684\u5173\u8054<\/td>\n<\/tr>\n<tr>\n<td style=\"border: 1px solid #ddd; padding: 12px;\"><strong>\u6570\u5b66\u5f62\u5f0f<\/strong><\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px; font-family: 'Times New Roman', serif;\">\\(y_i=\\sum_j \\operatorname{softmax}\\left(x_i^T x_j\\right) g\\left(x_j\\right)\\)<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px; font-family: 'Times New Roman', serif;\">\\(y_i=\\sum_j \\operatorname{softmax}\\left(Q K^T\\right) V\\)<\/td>\n<\/tr>\n<tr>\n<td style=\"border: 1px solid #ddd; padding: 12px;\"><strong>\u5f52\u4e00\u5316\u65b9\u5f0f<\/strong><\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u53ef\u9009softmax\u6216\u76f4\u63a5\u70b9\u79ef<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px;\">\u5fc5\u987b\u4f7f\u7528softmax<\/td>\n<\/tr>\n<tr>\n<td style=\"border: 1px solid #ddd; padding: 12px;\"><strong>\u6b8b\u5dee\u8bbe\u8ba1<\/strong><\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px; font-family: 'Times New Roman', serif;\">\\(z_i=W_z y_i+x_i\\)<\/td>\n<td style=\"border: 1px solid #ddd; padding: 12px; font-family: 'Times New Roman', serif;\">LayerNorm\uff08\\(x+\\operatorname{Attention}(x)\\)\uff09<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<h3>\u5386\u53f2\u8109\u7edc<\/h3>\n<ul>\n<li>2017\u5e74\uff1aVaswani\u7b49\u63d0\u51faTransformer\uff0c\u4f46\u6700\u521d\u4ec5\u7528\u4e8eNLP<\/li>\n<li>2018\u5e74\uff1aWang\u7b49\u63d0\u51fa\u975e\u5c40\u90e8\u7f51\u7edc\uff0c\u9996\u6b21\u5c06\u6ce8\u610f\u529b\u5f0f\u64cd\u4f5c\u7cfb\u7edf\u5f15\u5165\u89c6\u89c9\u4efb\u52a1<\/li>\n<li>2020\u5e74\u540e\uff1aViT\u8bc1\u660e\u7eafTransformer\u5728\u89c6\u89c9\u4e2d\u53ef\u884c\uff0c\u975e\u5c40\u90e8\u7f51\u7edc\u7684\u601d\u60f3\u88abTransformer\u67b6\u6784\u5438\u6536<\/li>\n<\/ul>\n<h3>\u5982\u4f55\u7406\u89e3\u4e24\u8005\u7684\u5173\u7cfb\uff1f<\/h3>\n<ul>\n<li>\u975e\u5c40\u90e8\u7f51\u7edc \u2248 \u89c6\u89c9\u9886\u57df\u7684\u201c\u5355\u5934\u81ea\u6ce8\u610f\u529b\u201d<br \/>\n\u53ef\u4ee5\u770b\u4f5c\u662f\u4e3aCNN\u5b9a\u5236\u7684\u7b80\u5316\u7248Transformer\u6a21\u5757\uff0c\u4fdd\u7559\u4e86\u7a7a\u95f4\u7ed3\u6784 inductive bias\u3002<\/li>\n<li>Transformer \u2192 \u66f4\u901a\u7528\u7684\u975e\u5c40\u90e8\u64cd\u4f5c<br \/>\n\u901a\u8fc7\u591a\u5934\u673a\u5236\u3001\u5c42\u5f52\u4e00\u5316\u7b49\u8bbe\u8ba1\uff0c\u5f62\u6210\u4e86\u66f4\u5f3a\u5927\u7684\u901a\u7528\u67b6\u6784\u3002<\/li>\n<\/ul>\n<h3>\u53ef\u89c6\u5316\u7406\u89e3\uff08\u4ee5\u89c6\u9891\u4efb\u52a1\u4e3a\u4f8b\uff09<\/h3>\n<p><img decoding=\"async\" src=\"https:\/\/gnnclub-1311496010.cos.ap-beijing.myqcloud.com\/wp-content\/uploads\/2025\/07\/20250709185336765.png\" alt=\"file\" \/><\/p>\n<ul>\n<li>\u8f93\u5165\uff1a\u89c6\u9891\u5e27\u5e8f\u5217\uff08T\u00d7H\u00d7W\u00d7C\uff09<\/li>\n<li>\u975e\u5c40\u90e8\u64cd\u4f5c\uff1a<\/li>\n<li>\u8ba1\u7b97\u7b2c $t$ \u5e27\u67d0\u50cf\u7d20\u4e0e\u6240\u6709\u5e27\u6240\u6709\u50cf\u7d20\u7684\u5173\u8054\u6743\u91cd\uff08\u65f6\u7a7a\u5168\u5c40\uff09<\/li>\n<li>\u8f93\u51fa\u805a\u5408\u540e\u7684\u7279\u5f81\uff08\u5982\u70b9$x_i$\u4f4d\u7f6e\u589e\u5f3a\u4e0e\u8fdc\u5904\u7403$x_j$\u7684\u5173\u8054\uff09<\/li>\n<li>CNN\u540e\u7eed\u5904\u7406\uff1a\u5377\u79ef\u5c42\u8fdb\u4e00\u6b65\u878d\u5408\u5c40\u90e8\uff0d\u5168\u5c40\u4fe1\u606f\u3002<\/li>\n<\/ul>\n<h1>Pytorch code<\/h1>\n<pre><code class=\"language-python\">import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass NonLocalBlock(nn.Module):\n    def __init__(self, in_channels, inter_channels=None, sub_sample=True, mode=&#039;embedded&#039;):\n        &quot;&quot;&quot;\n        Non-local Block \u5b9e\u73b0\n        Args:\n            in_channels: \u8f93\u5165\u7279\u5f81\u56fe\u7684\u901a\u9053\u6570\n            inter_channels: \u4e2d\u95f4\u7279\u5f81\u901a\u9053\u6570\uff08\u9ed8\u8ba4\u4e3ain_channels\/\/2\uff09\n            sub_sample: \u662f\u5426\u5728\u5206\u652f\u4e2d\u4f7f\u7528\u6700\u5927\u6c60\u5316\u8fdb\u884c\u4e0b\u91c7\u6837\n            mode: \u975e\u5c40\u90e8\u64cd\u4f5c\u7c7b\u578b\uff0c\u53ef\u9009 &#039;embedded&#039;\/&#039;dot&#039;\/&#039;concatenate&#039;\n        &quot;&quot;&quot;\n        super(NonLocalBlock, self).__init__()\n        self.mode = mode\n        self.in_channels = in_channels\n        self.inter_channels = inter_channels if inter_channels is not None else in_channels \/\/ 2\n\n        # \u5b9a\u4e49\u5404\u5206\u652f\u7684\u5377\u79ef\u5c42\n        self.g = nn.Conv2d(in_channels, self.inter_channels, kernel_size=1)\n        self.theta = nn.Conv2d(in_channels, self.inter_channels, kernel_size=1)\n        self.phi = nn.Conv2d(in_channels, self.inter_channels, kernel_size=1)\n\n        # \u8f93\u51fa\u8f6c\u6362\u5c42\n        self.W = nn.Conv2d(self.inter_channels, in_channels, kernel_size=1)\n        nn.init.constant_(self.W.weight, 0)\n        nn.init.constant_(self.W.bias, 0)\n\n        # \u4e0b\u91c7\u6837\u76f8\u5173\n        self.sub_sample = sub_sample\n        if sub_sample:\n            self.g = nn.Sequential(self.g, nn.MaxPool2d(kernel_size=2))\n            self.phi = nn.Sequential(self.phi, nn.MaxPool2d(kernel_size=2))\n\n    def forward(self, x):\n        &quot;&quot;&quot;\n        \u524d\u5411\u4f20\u64ad\n        Args:\n            x: \u8f93\u5165\u7279\u5f81\u56fe [batch, channels, height, width]\n        Returns:\n            \u7ecf\u8fc7non-local\u64cd\u4f5c\u7684\u7279\u5f81\u56fe [batch, channels, height, width]\n        &quot;&quot;&quot;\n        batch_size = x.size(0)\n\n        # g\u5206\u652f\n        g_x = self.g(x).view(batch_size, self.inter_channels, -1)\n        g_x = g_x.permute(0, 2, 1)\n\n        # theta\u5206\u652f\n        theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)\n        theta_x = theta_x.permute(0, 2, 1)\n\n        # phi\u5206\u652f\n        phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)\n\n        # \u8ba1\u7b97\u6ce8\u610f\u529b\u56fe\n        f = torch.matmul(theta_x, phi_x)\n        f = F.softmax(f, dim=-1)\n\n        # \u6ce8\u610f\u529b\u52a0\u6743\n        y = torch.matmul(f, g_x)\n        y = y.permute(0, 2, 1).contiguous()\n        y = y.view(batch_size, self.inter_channels, *x.size()[2:])\n\n        # \u8f93\u51fa\u8f6c\u6362\n        z = self.W(y)\n        return z + x  # \u6b8b\u5dee\u8fde\u63a5\n\n# ------------------- \u7528\u6cd5\u793a\u4f8b -------------------\nif __name__ == &quot;__main__&quot;:\n    # 1. \u521d\u59cb\u5316Non-local Block\uff08\u8f93\u5165\u901a\u9053\u6570\u4e3a256\uff09\n    nl_block = NonLocalBlock(in_channels=256, mode=&#039;embedded&#039;)\n\n    # 2. \u6a21\u62df\u8f93\u5165\u6570\u636e\uff08batch_size=4, \u901a\u9053=256, \u7279\u5f81\u56fe\u5c3a\u5bf8=56x56\uff09\n    dummy_input = torch.randn(4, 256, 56, 56)\n\n    # 3. \u524d\u5411\u4f20\u64ad\n    output = nl_block(dummy_input)\n\n    print(f&quot;\u8f93\u5165\u5f62\u72b6: {dummy_input.shape}&quot;)\n    print(f&quot;\u8f93\u51fa\u5f62\u72b6: {output.shape}&quot;)  # \u5e94\u4e0e\u8f93\u5165\u5f62\u72b6\u4e00\u81f4<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u57fa\u672c\u4fe1\u606f \ud83d\udcf0\u6807\u9898: \u975e\u5c40\u90e8\u795e\u7ecf\u7f51\u7edc\uff08Non-local Neural Networks\uff09 \ud83d\udd8b\ufe0f\u4f5c\u8005: Xiao [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3168,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[30,18],"tags":[],"class_list":["post-3163","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-30","category-18"],"_links":{"self":[{"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/posts\/3163","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.gnn.club\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3163"}],"version-history":[{"count":16,"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/posts\/3163\/revisions"}],"predecessor-version":[{"id":3229,"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/posts\/3163\/revisions\/3229"}],"wp:featuredmedia":[{"embeddable":true,"href":"http:\/\/www.gnn.club\/index.php?rest_route=\/wp\/v2\/media\/3168"}],"wp:attachment":[{"href":"http:\/\/www.gnn.club\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3163"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.gnn.club\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3163"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.gnn.club\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3163"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}