怎么仅加载一部分内容的预训练模型参数

合集下载

1、下载文档前请自行甄别文档内容的完整性，平台不提供额外的编辑、内容补充、找答案等附加服务。
2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
3、如文档侵犯您的权益，请联系客服反馈,我们会尽快为您处理(人工客服工作时间：9:00-18:30)。

怎么仅加载⼀部分内容的预训练模型参数
在pytorch中提供了很多预训练好的模型，以分类为例，基本上都是⽤ImageNet数据集来训练的，分为1000类。

但是很多时候我们要实现的分类项⽬可能并没有这么简单，⽐如我们可能并不仅仅只是实现单分类，可能想实现双分类或者是多分类，这个时候就需要对模型进⾏⼀定的修改
修改的同时还希望该修改后的模型中与预训练模型相同的部分仍能够使⽤预训练的参数来初始化，这时候应该怎么做？
1.单分类
这是最简单的情况，就是将1000类更改为⾃⼰想要分的类别数即可。

⽐如你想要对性别分类，分两类，使⽤pytorch中的预训练模型resnet18
#coding:utf-8
import torch
from torchvision import models
from torch import nn
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
# 然后选择使⽤的模型
model_conv = models.resnet18(pretrained=True)
# resnet18仅有⼀个全连接层
# 得到该全连接层输⼊神经元数.in_features
fc_features = model_conv.fc.in_features
# 默认的输出神经元数为1000
# 这⾥修改为⾃⼰想进⾏的⼆分类，类别为2,即man和woman
model_conv.fc = nn.Linear(fc_features, 2)
model_conv.to(device)
这样模型就设置成功了
2.双分类或多分类
这⾥以双分类为例，在上⾯的单分类中，我们仅是在原有的模型上修改了参数值，并没有改变整个模型的结构
但是单我们要实现双分类，如同时进⾏性别和⼈种分类，这个时候就需要在原来代码的基础上添加⼀些新的层，构造⼀个新的模型
如下⾯代码：
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1 self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, layers, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None ,gender_classes=2, race_classes=4):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
yer1 = self._make_layer(block, 64, layers[0])
yer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
yer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
yer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# 注释掉之前的全连接层
# self.fc = nn.Linear(512 * block.expansion, num_classes)
# 变成两个并⾏的全连接层
self.gen_fc = nn.Linear(512 * block.expansion, gender_classes)
self.race_fc = nn.Linear(512 * block.expansion, race_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https:///abs/1706.02677 if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = yer1(x)
x = yer2(x)
x = yer3(x)
x = yer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
#变成两个并⾏的全连接层
gender = F.softmax(self.gen_fc(x), 1)
race = F.softmax(self.race_fc(x), 1)
return gender, race
def resnet18Owned(**kwargs):
"""Constructs a ResNet-18 model.
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
def test():
net = resnet18Owned(gender_classes=2,race_classes=4)
gender, race = net(Variable(torch.randn(2,3,224,224)))
print('gender :', gender.size(),gender)
print('race :', race.size(), race)
if __name__ == '__main__':
test()
这⾥举的是⼀个⽐较简单的例⼦，仅是将⼀个全连接层的resnet18更改为了两个并⾏全连接层的resnet18，那么这个时候怎么使⽤之前训练的resnet18模型参数呢？
#coding:utf-8
import torch
from torchvision import models
from torch import nn
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
#导⼊预训练模型，得到结构和参数
pretrained_resnet18 = models.resnet18(pretrained=True)
pretrained_resnet18_dict = pretrained_resnet18.state_dict()
#调⽤⾃⼰设置的模型，也得到结构即相应参数
model_conv = resnet18Owned(gender_classes=2, race_classes=3)
model_conv_dict = model_conv.state_dict()
#当模型中的某层是同时在两个模型中共有时才取出，即得到除了全连接层以外的所有层次对应的参数
pretrained_resnet18_dict = {k:v for k,v in pretrained_resnet18_dict.items() if k in model_conv_dict}
#然后⽤该新参数的值取更新你⾃⼰的模型的参数
#这样，除了你修改的全连接层外，其他层次的参数就都是预训练模型的参数了
model_conv_dict.update(pretrained_resnet18_dict)
#然后将参数导⼊你的模型即可
model_conv.load_state_dict(model_conv_dict)
model_conv.to(device)
后⾯了解到有⼀种更简单的⽅法：
就是当你设置好你⾃⼰的模型后，如果仅想使⽤预训练模型相同结构处的参数，即在加载的时候将参数strict设置为False即可。

该参数值默认为True，表⽰预训练模型的层和⾃⼰定义的⽹络结构层严格对应相等（⽐如层名和维度），否则⽆法加载，实现如下：
model_conv.load_state_dict(torch.utils.model_zoo.load_url('https:///models/resnet18-5c106cde.pth'), strict=False)
看看是否仅将strict设置为False即可
#coding:utf-8
import torch
from torchvision import models
from torch import nn
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
# 调⽤⾃⼰设置的模型，也得到结构即相应参数
model_conv = resnet18Owned(gender_classes=2, race_classes=3)
model_conv.load_state_dict(torch.utils.model_zoo.load_url('https:///models/resnet18-5c106cde.pth'), strict=False)
model_conv.to(device)
这个是官⽅给的预训练模型的下载地址https:///models/resnet18-5c106cde.pth
⚠ 如果你的torch版本是1.0.1及以下，那就使⽤torch.utils.model_zoo.load_url()；如果是1.1.0及以上，可以使⽤新⽅法
torch.hub.load_state_dict_from_url()。