关键词:
摘要
这篇文章告诉大家如何在DBnet中加入新的主干网络。通过这篇文章你可以学到如何将现有的主干网络加入到DBNet中,提高DBNet的检测能力
主干网络
我加入的网络是ConvNext。代码详见:
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import trunc_normal_, DropPath
from timm.models.registry import register_model
class Block(nn.Module):
r""" ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch
Args:
dim (int): Number of input channels.
drop_path (float): Stochastic depth rate. Default: 0.0
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
"""
def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
super().__init__()
self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
self.norm = LayerNorm(dim, eps=1e-6)
self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
self.act = nn.GELU()
self.pwconv2 = nn.Linear(4 * dim, dim)
self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
def forward(self, x):
input = x
x = self.dwconv(x)
x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
x = self.norm(x)
x = self.pwconv1(x)
x = self.act(x)
x = self.pwconv2(x)
if self.gamma is not None:
x = self.gamma * x
x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
x = input + self.drop_path(x)
return x
class ConvNeXt(nn.Module):
r""" ConvNeXt
A PyTorch impl of : `A ConvNet for the 2020s` -
https://arxiv.org/pdf/2201.03545.pdf
Args:
in_chans (int): Number of input image channels. Default: 3
num_classes (int): Number of classes for classification head. Default: 1000
depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
drop_path_rate (float): Stochastic depth rate. Default: 0.
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
"""
def __init__(self, in_chans=3, num_classes=1000,
depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
layer_scale_init_value=1e-6, head_init_scale=1.,
):
super().__init__()
self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
stem = nn.Sequential(
nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
)
self.downsample_layers.append(stem)
for i in range(3):
downsample_layer = nn.Sequential(
LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
)
self.downsample_layers.append(downsample_layer)
self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
cur = 0
for i in range(4):
stage = nn.Sequential(
*[Block(dim=dims[i], drop_path=dp_rates[cur + j],
layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
)
self.stages.append(stage)
cur += depths[i]
self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
self.head = nn.Linear(dims[-1], num_classes)
self.apply(self._init_weights)
self.head.weight.data.mul_(head_init_scale)
self.head.bias.data.mul_(head_init_scale)
def _init_weights(self, m):
if isinstance(m, (nn.Conv2d, nn.Linear)):
trunc_normal_(m.weight, std=.02)
nn.init.constant_(m.bias, 0)
def forward_features(self, x):
for i in range(4):
x = self.downsample_layers[i](x)
x = self.stages[i](x)
return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
def forward(self, x):
x = self.forward_features(x)
x = self.head(x)
return x
class LayerNorm(nn.Module):
r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
shape (batch_size, height, width, channels) while channels_first corresponds to inputs
with shape (batch_size, channels, height, width).
"""
def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
super().__init__()
self.weight = nn.Parameter(torch.ones(normalized_shape))
self.bias = nn.Parameter(torch.zeros(normalized_shape))
self.eps = eps
self.data_format = data_format
if self.data_format not in ["channels_last", "channels_first"]:
raise NotImplementedError
self.normalized_shape = (normalized_shape, )
def forward(self, x):
if self.data_format == "channels_last":
return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
elif self.data_format == "channels_first":
u = x.mean(1, keepdim=True)
s = (x - u).pow(2).mean(1, keepdim=True)
x = (x - u) / torch.sqrt(s + self.eps)
x = self.weight[:, None, None] * x + self.bias[:, None, None]
return x
model_urls =
"convnext_tiny_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_tiny_1k_224_ema.pth",
"convnext_small_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_small_1k_224_ema.pth",
"convnext_base_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_1k_224_ema.pth",
"convnext_large_1k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_1k_224_ema.pth",
"convnext_base_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_base_22k_224.pth",
"convnext_large_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_large_22k_224.pth",
"convnext_xlarge_22k": "https://dl.fbaipublicfiles.com/convnext/convnext_xlarge_22k_224.pth",
@register_model
def convnext_tiny(pretrained=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
if pretrained:
url = model_urls['convnext_tiny_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_small(pretrained=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[96, 192, 384, 768], **kwargs)
if pretrained:
url = model_urls['convnext_small_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_base(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[128, 256, 512, 1024], **kwargs)
if pretrained:
url = model_urls['convnext_base_22k'] if in_22k else model_urls['convnext_base_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_large(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[192, 384, 768, 1536], **kwargs)
if pretrained:
url = model_urls['convnext_large_22k'] if in_22k else model_urls['convnext_large_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
@register_model
def convnext_xlarge(pretrained=False, in_22k=False, **kwargs):
model = ConvNeXt(depths=[3, 3, 27, 3], dims=[256, 512, 1024, 2048], **kwargs)
if pretrained:
url = model_urls['convnext_xlarge_22k'] if in_22k else model_urls['convnext_xlarge_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"])
return model
操作过程
修改主干网络
我们把上面的代码改造成DBNet的主干网络。
DBNet的代码:https://github.com/WenmuZhou/DBNet.pytorch
将其下载下来。
然后在./models/backbone下面新建convnext.py脚本。
将上面的代码,插入进来。
然后修改ConvNeXt的 forward()函数,修改为:
def forward(self, x):
x= self.downsample_layers[0](x)
x2 = self.stages[0](x)
x = self.downsample_layers[1](x2)
x3 = self.stages[1](x)
x = self.downsample_layers[2](x3)
x4 = self.stages[2](x)
x = self.downsample_layers[3](x4)
x5 = self.stages[3](x)
return x2,x3,x4,x5
和网络图对应起来。
在 def __init__函数中 增加out_channels属性
self.out_channels=dims
将in_chans改为in_channels,这样就能和配置文件的字段对上了。
删除@register_model
接下来修改创建模型的部分,用convnext_tiny举例
def convnext_tiny(pretrained=False, **kwargs):
model = ConvNeXt(depths=[2, 2, 3, 3], dims=[96, 192, 384, 768], **kwargs)
if pretrained:
url = model_urls['convnext_tiny_1k']
checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", check_hash=True)
model.load_state_dict(checkpoint["model"],strict=False)
return model
原始的convnext_tiny很大,我在这里把depths做了修改,
将depths=[3, 3, 9, 3]改为depths=[2, 2, 3, 3],设置strict为False,防止预训练权重对不上报错。
在代码的开始部分增加:
__all__ = ['convnext_tiny', 'convnext_small', 'convnext_base']
这样可以访问的方法就这三个了。
修改完成后测一下:
if __name__ == '__main__':
import torch
x = torch.zeros(2, 3, 640, 640)
net = convnext_tiny(pretrained=False)
y = net(x)
for u in y:
print(u.shape)
print(net.out_channels)
完整代码:
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.layers import trunc_normal_, DropPath
__all__ = ['convnext_tiny', 'convnext_small', 'convnext_base']
class Block(nn.Module):
r""" ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch
Args:
dim (int): Number of input channels.
drop_path (float): Stochastic depth rate. Default: 0.0
layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
"""
def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
super().__init__()
self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
self.norm = LayerNorm(dim, eps=1e-6)
self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
self.act = nn.GELU()
self.pwconv2 = nn.Linear(4 * dim, dim)
self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
requires_grad=True) if layer_scale_init_value > 0 else None
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
def forward(self, x):
input = x
x = self.dwconv(x)
x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
x = selfdbnet实战:详解dbnet训练与测试(pytorch)(代码片段)
...b链接:github.com网络结构首先,图像输入特征提取主干,提取特征;其次,特征金字塔上采样到相同的尺寸,并进行特征级联得到特征F;然后,特征F用于预测概率图(pr 查看详情
tar(代码片段)
...案(备份文件),也可以在档案中改变文件,或者向档案中加入新的文件。tar最初被用来在磁带上创建档案,现在,用户可以在任何设备上创建档案。利用tar命令,可以把一大堆的文件和目录全部打包成一个文件,这对于备份文... 查看详情
论文笔记系列:经典主干网络--vgg(代码片段)
✨写在前面:强烈推荐给大家一个优秀的人工智能学习网站,内容包括人工智能基础、机器学习、深度学习神经网络等,详细介绍各部分概念及实战教程,通俗易懂,非常适合人工智能领域初学者及研究者学... 查看详情
目标检测算法-yolo-v4代码详解(代码片段)
...arknet53,其中是将激活函数改为Mish激活函数,并且在网络中加入了CSP结构。(2)对特征提取过程的加强,添加了SPP,PANet结构。(3)在数据预处理阶段加入Mosaic方法。(4)在损失函数中做了改 查看详情
数据框中的基本操作
1.加列名2.subset某一列subset函数的应用:http://blog.163.com/jiaqiang_wang/blog/static/11889615320158300180642/3.数据框中加入新的一列(有三种方法)(1)(2)(3)4.数据框中加入新的一行 查看详情
我们如何确保维护分支上的错误修复合并回主干?
】我们如何确保维护分支上的错误修复合并回主干?【英文标题】:Howdoweensurethatbugfixesonmaintenancebrancharemergedbacktotrunk?【发布时间】:2013-08-2216:59:30【问题描述】:我们经营基于主干的开发。我们最新最好的代码不断地集成到我... 查看详情
手把手带你yolov5(v6.1)添加注意力机制(二)(在c3模块中加入注意力机制)(代码片段)
...上30多种顶会Attention原理图)》文章中已经介绍过了如何在主干网络里添加单独的注意力层,今天这篇将会介绍如何在C3模块里面加入注意力层。文章目录1.添加方式介绍1.1C3SE1.2C3CA1.3C3CBAM1.4C3ECA1.添加方式介绍1.1C3SE第一步&... 查看详情
基础网络知识(代码片段)
...不堪重负。 现有的网络带宽是金字塔结构,城际省际主干带宽仅仅相当于其所有用户带宽之和的5%。如果全部使用单播协议,将造成网络主干不堪重负。现在的P2P应用就已经使主干经常阻塞,只要有5%的客户在全速使用网... 查看详情
php如何在wordpress中加载fontawesome5(代码片段)
javascript数组,数组中加入新元素push(),unshift()相当于add()
<1>var a = []; //建立数组 push 方法 将新元素加入到一个数组中,并返回数组的新长度值。 查看详情
求助,求向access2003版本数据库中加入新纪录的程序代码(vc6.0,mfc,ado)
...他地方需要添加的内容,比如连接数据库时需要在头文件中加入#import.....好的话再加分,非常感谢!try myAccess.OnInitADOConn();//连接到数据库 ... 查看详情
svn将主干的代码合并到分支上
...题:开发有了项目主干,再次基础上起了一个分支,开发新的功能;因为业务需要,在上个分支还在测试阶段,还没有和主干合并; 但是新的业务又急着开发,又在上个分支的基础上又打了一个分支 这样我们会遇到... 查看详情
javascript--插入dom(代码片段)
...个DOM节点,想在这个DOM节点内插入新的DOM,应该如何做?1.1innerHTML如果这个DOM节点是空的如果这个DOM节点是空的,直接使用innerHTML='child'就可以修改DOM节点的内容,相当于“插入”了新的DOM节点如果这... 查看详情
jupyter常用技巧总结(代码片段)
...现的一些方便之处,便决定从pycharm转战到Jupyter。进入新的环境在Anaconda里面可能存在多个环境。通过下面的操作,可以在Jupyter中进入新的环境。这里以我的环境pytorch38为例。1.在命令行激活环境activatemxn 查看详情
php通过代码在wordpress中创建一个新的管理员用户。将此文件放在mu-plugins目录中并更新变量,然后在wor中加载页面(代码片段)
openharmony主干代码如何获取(代码片段)
OpenHarmony主干代码获取方式一(推荐):通过repo+ssh下载(需注册公钥,请参考码云帮助中心)。repoinit-ugit@gitee.com:openharmony/manifest.git-bmaster--no-repo-verifyreposync-crepoforall-c'gitlfspull'方式二:通... 查看详情
pytorch不同层设置不同学习率(代码片段)
...其是当我们在使用预训练的模型时,需要对一些除了主干网络以外的分支进行单独修改并进行初始化,其他主干网络层的参数采用预训练的模型参数进行初始化,这个时候我们希望在进行训练过程中,除主干网络... 查看详情
tar命令
...案(备份文件),也可以在档案中改变文件,或者向档案中加入新的文件。tar最初被用来在磁带上创建档案,现在,用户可以在任何设备上创建档案。利用tar命令,可以把一大堆的文件和目录全部打包成一个文件,这对于备份文... 查看详情