文章目录
1. train 训练模式
- 将模型设置成训练模式,本质上式将
self.training=True - 启用dropout类和batchnorm类
def train(self: T, mode: bool = True) -> T:
r"""Sets the module in training mode.
This has any effect only on certain modules. See documentations of
particular modules for details of their behaviors in training/evaluation
mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,
etc.
Args:
mode (bool): whether to set training mode (``True``) or evaluation
mode (``False``). Default: ``True``.
Returns:
Module: self
"""
# 判断模式是否是布尔类型,如果不是就报错
if not isinstance(mode, bool):
raise ValueError("training mode is expected to be boolean")
# 设置self.training=mode
self.training = mode
# 遍历模型中所有的子模块,将其子模块的self.training=mode
for module in self.children():
module.train(mode)
return self
2. eval 推理模式
- 将模型设置成训练模式,本质上式将
self.training=False - 不启用dropout类和batchnorm类,
def eval(self: T) -> T:
r"""Sets the module in evaluation mode.
This has any effect only on certain modules. See documentations of
particular modules for details of their behaviors in training/evaluation
mode, if they are affected, e.g. :class:`Dropout`, :class:`BatchNorm`,
etc.
This is equivalent with :meth:`self.train(False) <torch.nn.Module.train>`.
See :ref:`locally-disable-grad-doc` for a comparison between
`.eval()` and several similar mechanisms that may be confused with it.
Returns:
Module: self
"""
return self.train(False)
3. requires_grad_
- 作用:对模型中的所有的参数设置是否需要计算梯度
def requires_grad_(self: T, requires_grad: bool = True) -> T:
r"""Change if autograd should record operations on parameters in this
module.
This method sets the parameters' :attr:`requires_grad` attributes
in-place.
This method is helpful for freezing part of the module for finetuning
or training parts of a model individually (e.g., GAN training).
See :ref:`locally-disable-grad-doc` for a comparison between
`.requires_grad_()` and several similar mechanisms that may be confused with it.
Args:
requires_grad (bool): whether autograd should record operations on
parameters in this module. Default: ``True``.
Returns:
Module: self
"""
# 批量递归模型中的参数,并将图形中的参数梯度逐个设置True或False
for p in self.parameters():
p.requires_grad_(requires_grad)
return self
- 案例代码
# 1.导入相关库
import torch
from torch import nn
# 2.定义模型
class MyTest(nn.Module):
def __init__(self):
super(MyTest, self).__init__()
self.linear1 = nn.Linear(2, 3)
self.linear2 = nn.Linear(3, 4)
self.batchnorm = nn.BatchNorm2d(4)
# 3.实例化神经网络
mymodel = MyTest()
# 4. 查看模型参数梯度
for param in mymodel.parameters():
print(f"original_param={param}")
print("*"*100)
# 5. 设置模型参数梯度为False
mymodel.requires_grad_(False)
# 6. 查看模型参数梯度
for param in mymodel.named_parameters():
print(f"False:param={param}")
print("*"*100)
# 7. 设置模型参数梯度为False
mymodel.requires_grad_(True)
# 8. 查看模型参数梯度
for param in mymodel.named_parameters():
print(f"True:param={param}")
- 结果
original_param=Parameter containing:
tensor([[-0.5034, -0.6599],
[ 0.4451, 0.1687],
[-0.6548, -0.6644]], requires_grad=True)
original_param=Parameter containing:
tensor([-0.4746, 0.4611, 0.0340], requires_grad=True)
original_param=Parameter containing:
tensor([[ 0.2367, -0.4441, 0.4117],
[-0.4328, -0.5242, -0.4260],
[ 0.0096, -0.3758, 0.2389],
[ 0.4779, -0.5718, 0.4700]], requires_grad=True)
original_param=Parameter containing:
tensor([ 0.4026, -0.1612, -0.2042, -0.5499], requires_grad=True)
original_param=Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True)
original_param=Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True)
****************************************************************************************************
False:param=('linear1.weight', Parameter containing:
tensor([[-0.5034, -0.6599],
[ 0.4451, 0.1687],
[-0.6548, -0.6644]]))
False:param=('linear1.bias', Parameter containing:
tensor([-0.4746, 0.4611, 0.0340]))
False:param=('linear2.weight', Parameter containing:
tensor([[ 0.2367, -0.4441, 0.4117],
[-0.4328, -0.5242, -0.4260],
[ 0.0096, -0.3758, 0.2389],
[ 0.4779, -0.5718, 0.4700]]))
False:param=('linear2.bias', Parameter containing:
tensor([ 0.4026, -0.1612, -0.2042, -0.5499]))
False:param=('batchnorm.weight', Parameter containing:
tensor([1., 1., 1., 1.]))
False:param=('batchnorm.bias', Parameter containing:
tensor([0., 0., 0., 0.]))
****************************************************************************************************
True:param=('linear1.weight', Parameter containing:
tensor([[-0.5034, -0.6599],
[ 0.4451, 0.1687],
[-0.6548, -0.6644]], requires_grad=True))
True:param=('linear1.bias', Parameter containing:
tensor([-0.4746, 0.4611, 0.0340], requires_grad=True))
True:param=('linear2.weight', Parameter containing:
tensor([[ 0.2367, -0.4441, 0.4117],
[-0.4328, -0.5242, -0.4260],
[ 0.0096, -0.3758, 0.2389],
[ 0.4779, -0.5718, 0.4700]], requires_grad=True))
True:param=('linear2.bias', Parameter containing:
tensor([ 0.4026, -0.1612, -0.2042, -0.5499], requires_grad=True))
True:param=('batchnorm.weight', Parameter containing:
tensor([1., 1., 1., 1.], requires_grad=True))
True:param=('batchnorm.bias', Parameter containing:
tensor([0., 0., 0., 0.], requires_grad=True))
4. zero_grad
- 作用:梯度清零,常用作优化器的梯度清零工作
def zero_grad(self, set_to_none: bool = False) -> None:
r"""Sets gradients of all model parameters to zero. See similar function
under :class:`torch.optim.Optimizer` for more context.
Args:
set_to_none (bool): instead of setting to zero, set the grads to None.
See :meth:`torch.optim.Optimizer.zero_grad` for details.
"""
if getattr(self, '_is_replica', False):
warnings.warn(
"Calling .zero_grad() from a module created with nn.DataParallel() has no effect. "
"The parameters are copied (in a differentiable manner) from the original module. "
"This means they are not leaf nodes in autograd and so don't accumulate gradients. "
"If you need gradients in your forward method, consider using autograd.grad instead.")
for p in self.parameters():
if p.grad is not None:
if set_to_none:
p.grad = None
else:
if p.grad.grad_fn is not None:
p.grad.detach_()
else:
p.grad.requires_grad_(False)
p.grad.zero_()
5. str(mymodel)
通过nn.Module中的魔法方法__repr__我们可以简单的通过str(mymodel)来查看模型
def __repr__(self):
# We treat the extra repr like the sub-module, one item per line
extra_lines = []
extra_repr = self.extra_repr()
# empty string will be split into list ['']
if extra_repr:
extra_lines = extra_repr.split('\n')
child_lines = []
for key, module in self._modules.items():
mod_str = repr(module)
mod_str = _addindent(mod_str, 2)
child_lines.append('(' + key + '): ' + mod_str)
lines = extra_lines + child_lines
main_str = self._get_name() + '('
if lines:
# simple one-liner info, which most builtin Modules will use
if len(extra_lines) == 1 and not child_lines:
main_str += extra_lines[0]
else:
main_str += '\n ' + '\n '.join(lines) + '\n'
main_str += ')'
return main_str
- 案例
# 1.导入相关库
import torch
from torch import nn
# 2.定义模型
class MyTest(nn.Module):
def __init__(self):
super(MyTest, self).__init__()
self.linear1 = nn.Linear(2, 3)
self.linear2 = nn.Linear(3, 4)
self.batchnorm = nn.BatchNorm2d(4)
# 3.实例化神经网络
mymodel = MyTest()
# 4.直接通过str查看模型
print(str(mymodel))
- 结果
MyTest(
(linear1): Linear(in_features=2, out_features=3, bias=True)
(linear2): Linear(in_features=3, out_features=4, bias=True)
(batchnorm): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
6. nn.Sequential
- 作用:提供一个有序的模型序列,方便将各个模块依次进行组合。是一个有序的容器
class Sequential(Module):
r"""A sequential container.
Modules will be added to it in the order they are passed in the
constructor. Alternatively, an ``OrderedDict`` of modules can be
passed in. The ``forward()`` method of ``Sequential`` accepts any
input and forwards it to the first module it contains. It then
"chains" outputs to inputs sequentially for each subsequent module,
finally returning the output of the last module.
The value a ``Sequential`` provides over manually calling a sequence
of modules is that it allows treating the whole container as a
single module, such that performing a transformation on the
``Sequential`` applies to each of the modules it stores (which are
each a registered submodule of the ``Sequential``).
What's the difference between a ``Sequential`` and a
:class:`torch.nn.ModuleList`? A ``ModuleList`` is exactly what it
sounds like--a list for storing ``Module`` s! On the other hand,
the layers in a ``Sequential`` are connected in a cascading way.
Example::
# Using Sequential to create a small model. When `model` is run,
# input will first be passed to `Conv2d(1,20,5)`. The output of
# `Conv2d(1,20,5)` will be used as the input to the first
# `ReLU`; the output of the first `ReLU` will become the input
# for `Conv2d(20,64,5)`. Finally, the output of
# `Conv2d(20,64,5)` will be used as input to the second `ReLU`
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)
# Using Sequential with OrderedDict. This is functionally the
# same as the above code
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))
"""
@overload
def __init__(self, *args: Module) -> None:
...
@overload
def __init__(self, arg: 'OrderedDict[str, Module]') -> None:
...
def __init__(self, *args):
super(Sequential, self).__init__()
if len(args) == 1 and isinstance(args[0], OrderedDict):
for key, module in args[0].items():
self.add_module(key, module)
else:
for idx, module in enumerate(args):
self.add_module(str(idx), module)
def _get_item_by_idx(self, iterator, idx) -> T:
"""Get the idx-th item of the iterator"""
size = len(self)
idx = operator.index(idx)
if not -size <= idx < size:
raise IndexError('index {} is out of range'.format(idx))
idx %= size
return next(islice(iterator, idx, None))
@_copy_to_script_wrapper
def __getitem__(self, idx) -> Union['Sequential', T]:
if isinstance(idx, slice):
return self.__class__(OrderedDict(list(self._modules.items())[idx]))
else:
return self._get_item_by_idx(self._modules.values(), idx)
def __setitem__(self, idx: int, module: Module) -> None:
key: str = self._get_item_by_idx(self._modules.keys(), idx)
return setattr(self, key, module)
def __delitem__(self, idx: Union[slice, int]) -> None:
if isinstance(idx, slice):
for key in list(self._modules.keys())[idx]:
delattr(self, key)
else:
key = self._get_item_by_idx(self._modules.keys(), idx)
delattr(self, key)
@_copy_to_script_wrapper
def __len__(self) -> int:
return len(self._modules)
@_copy_to_script_wrapper
def __dir__(self):
keys = super(Sequential, self).__dir__()
keys = [key for key in keys if not key.isdigit()]
return keys
@_copy_to_script_wrapper
def __iter__(self) -> Iterator[Module]:
return iter(self._modules.values())
# NB: We can't really type check this function as the type of input
# may change dynamically (as is tested in
# TestScript.test_sequential_intermediary_types). Cannot annotate
# with Any as TorchScript expects a more precise type
# sequential 的前向传播函数,就是简单的循环迭代,将输入的参数通过nn.Sequential进行传播
def forward(self, input):
for module in self:
input = module(input)
return input
7. nn.ModuleList
(1)为什么有ModuleList,为啥不用python自带的List
nn.ModuleList 是一个能储存每个module的容器,并且能够将ModuleList里面的每个模块的parameters自动注入到网络模型中;而python自带的List不会
# 定义一个神经网络,模块用ModuleList进行组合
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
# ModuleList组合
self.module_list = nn.ModuleList([nn.Linear(3, 5), nn.ReLU()])
def forward(self, x):
for m in self.module_list:
x = m(x)
return x
print("*"*100)
print("ModelList"*10)
my_ModelList = MyModel()
print(f"my_ModelList={my_ModelList}")
# 查看参数
print("ModelList for param")
for param in my_ModelList.parameters():
print(f"my_modellist_param={param}")
print("*"*100)
print("PythonList"*10)
# 定义一个神经网络,模块用Python自带的list进行组合
class PythonList(nn.Module):
def __init__(self):
super(PythonList, self).__init__()
self.list = [nn.Linear(3, 5), nn.ReLU()]
def forward(self, x):
for m in self.list:
x = m(x)
my_python_list = PythonList()
print(f"my_python_list={my_python_list}")
print("pythonlist for param")
for param in my_python_list.parameters():
print(f"param_python={param}")
- 结果:
****************************************************************************************************
ModelListModelListModelListModelListModelListModelListModelListModelListModelListModelList
my_ModelList=MyModel(
(module_list): ModuleList(
(0): Linear(in_features=3, out_features=5, bias=True)
(1): ReLU()
)
)
ModelList for param
my_modellist_param=Parameter containing:
tensor([[ 0.5168, 0.5762, 0.1003],
[-0.4560, -0.0956, 0.4347],
[-0.1871, -0.3226, 0.0094],
[ 0.3742, 0.1279, -0.3885],
[ 0.5290, -0.3003, -0.1993]], requires_grad=True)
my_modellist_param=Parameter containing:
tensor([ 0.4282, 0.1626, 0.0872, -0.5053, 0.4860], requires_grad=True)
****************************************************************************************************
PythonListPythonListPythonListPythonListPythonListPythonListPythonListPythonListPythonListPythonList
my_python_list=PythonList()
pythonlist for param
8. nn.ModuleDict
nn.ModuleDict是nn.module的容器,用于包装一组网络层,以索引方式调用网络层
我们可以定义包装一组网络层,在传播的时候可以自主选择新的网络,根据键值进行选择
- 主要方法:
(1)clear():清空ModuleDict
(2)items():返回可迭代的键值对(key-value pairs)
(3)keys():返回字典的键(key)
(4)values():返回字典的值(value)
(5)pop():返回一对键值,并从字典中删除
class ModuleDict(nn.Module):
def __init__(self):
super(ModuleDict, self).__init__()
self.choices = nn.ModuleDict({
'conv': nn.Conv2d(10, 10, 3),
'pool': nn.MaxPool2d(3)
})
self.activation = nn.ModuleDict({
'relu': nn.ReLU(),
'prelu': nn.PReLU()
})
def forward(self, x, choice, act):
# choice='conv';act='relu'
# self.choices[choice]=nn.Conv2d(10, 10, 3)
# self.activation[act]=nn.Relu()
x = self.choices[choice](x)
x = self.activation[act](x)
return x
# 网络实例化(用的是__init__部分,构建子网络)
net = ModuleDict()
# 输入数据
fake_img = torch.randn((4, 10, 32, 32))
# 调用函数
output = net(fake_img, 'conv', 'relu')
9. nn.ParameterList
- 创建一个list类型的ParametersList
class ParameterList(Module):
r"""Holds parameters in a list.
:class:`~torch.nn.ParameterList` can be indexed like a regular Python
list, but parameters it contains are properly registered, and will be
visible by all :class:`~torch.nn.Module` methods.
Args:
parameters (iterable, optional): an iterable of :class:`~torch.nn.Parameter` to add
Example::
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.params = nn.ParameterList([nn.Parameter(torch.randn(10, 10)) for i in range(10)])
def forward(self, x):
# ParameterList can act as an iterable, or be indexed using ints
for i, p in enumerate(self.params):
x = self.params[i // 2].mm(x) + p.mm(x)
return x
"""
def __init__(self, parameters: Optional[Iterable['Parameter']] = None) -> None:
super(ParameterList, self).__init__()
self._initialized = True
if parameters is not None:
self += parameters
def __setstate__(self, state):
state['_initialized'] = False
super(ParameterList, self).__setstate__(state)
self._initialized = True
def _get_abs_string_index(self, idx):
"""Get the absolute index for the list of modules"""
idx = operator.index(idx)
if not (-len(self) <= idx < len(self)):
raise IndexError('index {} is out of range'.format(idx))
if idx < 0:
idx += len(self)
return str(idx)
@overload
def __getitem__(self, idx: int) -> 'Parameter':
...
@overload
def __getitem__(self: T, idx: slice) -> T:
...
def __getitem__(self, idx):
if isinstance(idx, slice):
return self.__class__(list(self._parameters.values())[idx])
else:
idx = self._get_abs_string_index(idx)
return self._parameters[str(idx)]
def __setitem__(self, idx: int, param: 'Parameter') -> None:
idx = self._get_abs_string_index(idx)
return self.register_parameter(str(idx), param)
def __setattr__(self, key: Any, value: Any) -> None:
if getattr(self, "_initialized", False):
if not hasattr(self, key) and not isinstance(value, torch.nn.Parameter):
warnings.warn("Setting attributes on ParameterList is not supported.")
super(ParameterList, self).__setattr__(key, value)
def __len__(self) -> int:
return len(self._parameters)
def __iter__(self) -> Iterator['Parameter']:
return iter(self._parameters.values())
def __iadd__(self, parameters: Iterable['Parameter']) -> 'ParameterList':
return self.extend(parameters)
def __dir__(self):
keys = super(ParameterList, self).__dir__()
keys = [key for key in keys if not key.isdigit()]
return keys
def append(self, parameter: 'Parameter') -> 'ParameterList':
"""Appends a given parameter at the end of the list.
Args:
parameter (nn.Parameter): parameter to append
"""
self.register_parameter(str(len(self)), parameter)
return self
def extend(self, parameters: Iterable['Parameter']) -> 'ParameterList':
"""Appends parameters from a Python iterable to the end of the list.
Args:
parameters (iterable): iterable of parameters to append
"""
if not isinstance(parameters, container_abcs.Iterable):
raise TypeError("ParameterList.extend should be called with an "
"iterable, but got " + type(parameters).__name__)
offset = len(self)
for i, param in enumerate(parameters):
self.register_parameter(str(offset + i), param)
return self
def extra_repr(self) -> str:
child_lines = []
for k, p in self._parameters.items():
size_str = 'x'.join(str(size) for size in p.size())
device_str = '' if not p.is_cuda else ' (GPU {})'.format(p.get_device())
parastr = 'Parameter containing: [{} of size {}{}]'.format(
torch.typename(p), size_str, device_str)
child_lines.append(' (' + str(k) + '): ' + parastr)
tmpstr = '\n'.join(child_lines)
return tmpstr
def __call__(self, input):
raise RuntimeError('ParameterList should not be called.')
def _replicate_for_data_parallel(self):
warnings.warn("nn.ParameterList is being used with DataParallel but this is not "
"supported. This list will appear empty for the models replicated "
"on each GPU except the original one.")
return super(ParameterList, self)._replicate_for_data_parallel()
- 案例
import torch
from torch import nn
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
self.params = nn.ParameterList([nn.Parameter(torch.randn(3, 3)) for i in range(5)])
def forward(self, x):
# ParameterList can act as an iterable, or be indexed using ints
for i, p in enumerate(self.params):
x = self.params[i // 2].mm(x) + p.mm(x)
return x
mymodel = MyModule()
for param in mymodel.named_parameters():
print(param)
- 结果
('params.0', Parameter containing:
tensor([[ 0.4520, -0.7078, 1.5575],
[ 1.2892, -1.3094, -1.1212],
[-1.2048, 1.3236, 1.6908]], requires_grad=True))
('params.1', Parameter containing:
tensor([[-0.7633, -1.3577, 0.4661],
[-0.1935, -0.1821, 0.0935],
[-0.3414, -0.2055, 2.2441]], requires_grad=True))
('params.2', Parameter containing:
tensor([[-1.3977, -1.2781, 0.3150],
[-0.0079, -0.3423, -0.0806],
[ 0.4114, 0.2381, -1.7208]], requires_grad=True))
('params.3', Parameter containing:
tensor([[-1.7887, -0.4023, -0.0706],
[ 0.1060, -1.3700, -0.0148],
[ 0.0578, -0.0219, 1.1389]], requires_grad=True))
('params.4', Parameter containing:
tensor([[ 0.3666, -0.3358, -0.1044],
[ 0.3157, -1.0280, -0.1464],
[ 0.7625, -1.9047, 0.2317]], requires_grad=True))
10. nn.ParameterDict
保存字典中的参数;ParameterDict可以像一个普通的Python字典一样被索引,但是参数化它
包含被正确注册,并且将被所有Module方法可见。
import torch
from torch import nn
class MyModule(nn.Module):
def __init__(self):
super(MyModule, self).__init__()
# 定义一个ParameterDict()字典,
self.params = nn.ParameterDict({
'left': nn.Parameter(torch.randn(5, 10)),
'right': nn.Parameter(torch.randn(5, 10))
})
def forward(self, x, choice):
x = self.params[choice].mm(x)
return x

本文详细介绍了PyTorch中模型的训练模式(train和eval)、参数梯度控制(requires_grad_)、梯度清零(zero_grad)以及模型结构可视化(str和nn.Sequential)。通过实例展示了如何在训练过程中切换模型状态,控制参数梯度,以及如何构建和理解模型的内部结构。

3602

被折叠的 条评论
为什么被折叠?



