Day1


torch数组基本操作

1:3表示左闭右开,::3表示三个一跳
要想改变一个张量的形状而不改变元素数量和元素值,可以调用reshape函数
X = x.reshape(3,4)
X
tensor([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
使用全0、全1、其他常量,或者从特定分布中随机采样的数字
torch.zeros(3, 4)
tensor([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
torch.ones((2, 3, 4))
tensor([[[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]],
[[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]]])
torch.randn(3, 4)正态分布
tensor([[ 0.1720, 0.7953, 0.7613, -0.2457],
[ 0.0556, 0.0223, -0.3805, 0.0315],
[ 0.3078, 1.3836, 0.1121, -0.2739]])
常见的标准算术运算符(+、-、*、/和**)都可以被升级为按元素运算
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y
(tensor([ 3., 4., 6., 10.]), tensor([-1., 0., 2., 6.]), tensor([ 2., 4., 8., 16.]), tensor([0.5000, 1.0000, 2.0000, 4.0000]), tensor([ 1., 4., 16., 64.]))

dim=0在0维合并(按行),dim=1按列
通过逻辑运算符构建二元张量
X == Y
tensor([[False, True, False, True],
[False, False, False, False],
[False, False, False, False]])
对张量中的所有元素进行求和,会产生一个单元素张量
X.sum()
tensor(66.)
、
即使形状不同,我们仍然可以通过调用 广播机制(broadcasting mechanism)来执行按元素操作
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
a, b
(tensor([[0],
[1],
[2]]),
tensor([[0, 1]]))
a + b
tensor([[0, 1],
[1, 2],
[2, 3]])
pandas
从创建的CSV文件中加载原始数据集
import pandas as pd
data = pd.read_csv(data_file)
为了处理缺失的数据,典型的方法包括插值法和删除法, 这里,我们将考虑插值法
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs = inputs.fillna(inputs.mean())
print(inputs)
NumRooms Alley 0 3.0 Pave 1 2.0 NaN 2 4.0 NaN 3 3.0 NaN
这里data.iloc是index of location,inputs.fillna是填充NaN,用mean平均值填充
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)
pd.get_dummies:对分类字符串列做独热编码,把文本类别转为 0/1 数字列;
dummy_na=True:缺失值 NaN 也单独生成一列标识,避免缺失信息丢失。
线性代数






A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[12., 13., 14., 15.],
[16., 17., 18., 19.]])
A.shape, A.sum()
(torch.Size([5, 4]), tensor(190.))
指定张量沿哪一个轴来通过求和降低维度

A_sum_axis0 = A.sum(axis=0) 沿着0维求和
A_sum_axis0, A_sum_axis0.shape
(tensor([40., 45., 50., 55.]), torch.Size([4]))
A_sum_axis1 = A.sum(axis=1)
A_sum_axis1, A_sum_axis1.shape
(tensor([ 6., 22., 38., 54., 70.]), torch.Size([5]))
A.sum(axis=[0, 1])
tensor(190.)
通过某个维度求和,并且不把维度丢掉,计算总和或均值时保持轴数不变
sum_A = A.sum(axis=1, keepdims=True)
sum_A
tensor([[ 6.],
[22.],
[38.],
[54.],
[70.]])
| 函数 | 全称 | 作用 | 输入要求 |
|---|---|---|---|
torch.mv(A, x) | matrix vector | 矩阵 × 一维向量 | A(2D), x(1D) |
torch.mm(A, B) | matrix matrix | 矩阵 × 矩阵 | A(2D), B(2D) |
torch.matmul(a,b) / @ | general matmul | 通用乘法 | 兼容 1D/2D / 高维,广播 |
torch.norm()求范数
导数


标量和标量求导,得到标量,
标量和向量求导,得到向量,
向量和向量求导,得到矩阵

链式法则:




优化方法


线性回归

Day2
张量基本创建:
import torch
import numpy as np
def dm01():
#标量
t1 = torch.tensor(10)
print(f't1:{t1}',type(t1))
#二维
data = [[1,2,3],[4,5,6]]
t2 = torch.tensor(data)
print(f't2:{t2}',type(t2))
#nd数组
data = np.random.randint(0,10,size=(2,3))
t3 = torch.tensor(data,dtype=torch.float)
print(f't3:{t3}',type(t3))
#报错:
# t4 = torch.tensor(2,3)
# print(t4)
def dm02():
t4 = torch.Tensor(2,3)#不报错,T、可以直接创建
print(t4)
if __name__ == '__main__':
# dm01()
dm02()
创建01张量
import torch
t1 = torch.ones(2,3)
print(f't1:{t1}',type(t1))
t2 = torch.tensor([[1,2],[3,4],[5,6]])
print(f't1:{t2}',type(t2))
t3 = torch.ones_like(t2)
print(f't1:{t3}',type(t3))
print('*'*30)
t1 = torch.zeros(2,3)
print(f't1:{t1}',type(t1))
t2 = torch.tensor([[1,2],[3,4],[5,6]])
print(f't1:{t2}',type(t2))
t3 = torch.zeros_like(t2)
print(f't1:{t3}',type(t3))
print('*'*30)
t1 = torch.full((2,3),6)
print(f't1:{t1}',type(t1))
t2 = torch.tensor([[1,2],[3,4],[5,6]])
print(f't1:{t2}',type(t2))
t3 = torch.full_like(t2,6)
print(f't1:{t3}',type(t3))
t1:tensor([[1., 1., 1.],
[1., 1., 1.]]) <class 'torch.Tensor'>
t1:tensor([[1, 2],
[3, 4],
[5, 6]]) <class 'torch.Tensor'>
t1:tensor([[1, 1],
[1, 1],
[1, 1]]) <class 'torch.Tensor'>
******************************
t1:tensor([[0., 0., 0.],
[0., 0., 0.]]) <class 'torch.Tensor'>
t1:tensor([[1, 2],
[3, 4],
[5, 6]]) <class 'torch.Tensor'>
t1:tensor([[0, 0],
[0, 0],
[0, 0]]) <class 'torch.Tensor'>
******************************
t1:tensor([[6, 6, 6],
[6, 6, 6]]) <class 'torch.Tensor'>
t1:tensor([[1, 2],
[3, 4],
[5, 6]]) <class 'torch.Tensor'>
t1:tensor([[6, 6],
[6, 6],
[6, 6]]) <class 'torch.Tensor'>
创建线性和随机张量
import torch
def dm01():
t1 = torch.arange(0,10,2)
print(f't1: {t1}, type: {type(t1)}')
t2 = torch.linspace(1,10,6)#参数3是元素的个数,不是步长
print(f't1: {t2}, type: {type(t2)}')
def dm02():
torch.manual_seed(1)#固定随机种子,使全局以后每次生成的随机数相同
#均匀分布
t1 = torch.rand((2,3))
print(f't1: {t1}, type: {type(t1)}')
#正态分布
t2 = torch.randn((2,3))
print(f't2: {t2}, type: {type(t2)}')
#随机整数张量
t3 = torch.randint(0,10,(2,3))
print(f't3:{t3},type:{type(t3)}')
if __name__ == '__main__':
# dm01()
dm02()
张量类型转换方法
import torch
#直接创建指定类型的张量
t1 = torch.tensor([1,2,3,4,5],dtype=torch.float)
print(f't1:{t1},元素类型:{type(t1)},张量类型:{t1.dtype}')
#创建好后->类型转换,使用type()函数
t2 = t1.type(torch.int16)
print(f't2:{t2},元素类型:{t2.dtype},张量类型:{type(t2)}')
#或者直接写转换类型
print(t2.half()) #float16
print(t2.float()) #float32
print(t2.double()) #float64
print(t2.short()) #int16
print(t2.int()) #int32
print(t2.long()) #int64
---------------------------
import torch
import numpy as np
def dm01():
t1 = torch.tensor([1,2,3,4,5])
print(f't1:{t1},type:{type(t1)}')
#张量->numpy
n1 = t1.numpy()
print(f'n1:{n1},type:{type(n1)}')
#上面的方法共享内存
# n1[0] = 100
# print(t1)
# print(n1)
n2 = t1.numpy().copy()#这种不共享内存
def dm02():
n1 = np.array([1,2,3])
print(f'n1:{n1},type:{type(n1)}')
#ndarray->张量
t1 = torch.from_numpy(n1)#转换默认之前的int64
print(f'n1:{t1},type:{type(t1)},{t1.dtype}')
#可以通过链式调用转换类型
t1 = torch.from_numpy(n1).type(torch.float32)
print(f'n1:{t1},type:{type(t1)},{t1.dtype}')
#torch.from_numpy()共享内存
t2 = torch.tensor(n1)#这个不共享内存
def dm03():
#从标量(只有1个值)中提取内容
t1 = torch.tensor(100)
print(f'value:{t1},type:{type(t1)}')
a = t1.item()
print(f'value:{a},type:{type(a)}')
if __name__ == '__main__':
# dm01()
# dm02()
dm03()
张量的基本运算
import torch
"""
涉及到的API:
add(), sub(), mul(), div(), neg() -> 加减乘除, 取反, substract, multiply, divide
add_(), sub_(), mul_(), div_(), neg_() -> 功能同上, 只不过可以修改源数据, 类似于 Pandas部分的 inplace = True
1. 可以用 +, -, *, / 符号来替代 上述的 加减乘除函数.
2. 如果是张量 和 数值运算, 则: 该数值会和张量中的每个值依次进行 对应的运算.
"""
t1 = torch.tensor([1,2,3])
t2 = t1.add(10)
print(f't1:{t1}')
print(f't2:{t2}')
张量的点乘和矩阵乘法
import torch
def dm01():
#点乘:要求两个张量行数列数一样
t1 = torch.tensor([[1,2,3],[4,5,6]])
print(f't1:{t1}')
t2 = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(f't2: {t2}')
t3 = t1 * t2
# t3 = t1.mul(t2)
print(f't3:{t3}')
def dm02():
t1 = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(f't1: {t1}')
t2 = torch.tensor([[1, 2], [3, 4], [5, 6]])
print(f't2: {t2}')
t3 = t1 @ t2
# t3 = t1.matmul(t2)
print(f't3: {t3}')
if __name__ == '__main__':
# dm01()
dm02()
张量常用运算函数
import torch
t1 = torch.tensor([
[1,2,3],
[4,5,6]
])
print(f't1:{t1}')
print(t1.sum(dim=0))#按列求和
print(t1.sum(dim=1))#按行求和
print(t1.sum())
print('*'*30)
print(t1.max(dim=0))
print(t1.max(dim=1))
print(t1.max())
t2 = t1.type(torch.float)#求mean只能用float
print('*'*30)
print(t2.mean(dim=0))
print(t2.mean(dim=1))
print(t2.mean())
print('*'*30)
print(t1.pow(2))
print(t1 ** 2)
print(t1.sqrt())
print(t1.exp())
print(t1.log())
张量索引操作
import torch
from torch.distributed.flight_recorder.components.utils import just_print_entries
"""
简单行列索引
列表索引
范围索引
布尔索引
多维索引
"""
torch.manual_seed(24)
t1 = torch.randint(1,10,(5,5))
print(t1)
#场景1 简单行列索引
#获取第二行的数据
print(t1[1])
print(t1[1,:])
#获取第三列的数据
print(t1[:,2])
print('-'*30)
#场景2 列表索引
#返回(0,1) (1,2)两个位置的元素
print(t1[[0,1],[1,2]])
#返回(1,2) (3,4)
print(t1[[1,3],[2,4]])#拿(1,2) (3,4)
#第一个行索引匹配第一个列索引
print(t1[[1,2,3],[1,2,3]])
#获取第0 1行,1 2列共4个元素
print(t1[[0,0,1,1],[1,2,1,2]])
print(t1[[[0],[1]],[1,2]])
print('-'*30)
#场景3 范围索引
#前三行前两列
print(t1[:3,:2])
#第二行到最后,前两列
print(t1[1:,:2])
#奇数行,偶数列
print(t1[1::2,::2])
print('-'*30)
#场景4 布尔索引
#第三列大于5的所有行
print(t1[t1[:,2]>5])
print(t1[:,2])
print('-'*30)
#场景5 多维索引
t2 = torch.randint(1,10,(2,3,4))
print(t2)
#获取0轴上的第一个数据
print(t2[0,:,:])
#获取1轴上的第一个数据
print(t2[:,0,:])
#获取2轴上的第一个数据
print(t2[:,:,0])
张量形状操作
import torch
def dm01():
t1 = torch.randint(1,10,size=(2,3))
print(f't1:{t1},shape:{t1.shape},row:{t1.shape[0]},columns:{t1.shape[1]},')
# 2通过reshape()函数, 把t1 -> 3行2列, 1行6列, 6行1列.
t2 = t1.reshape(3,2)
print(t2)
t3 = t1.reshape(1,6)
print(t3)
t4 = t1.reshape(6,1)
print(t4)
def dm02():
#unsqueeze() 在指定的轴上增加一个(1)维度, 等价于: 升维.
#squeeze() 删除所有为1的维度, 等价于: 降维.
t1 = torch.randint(1, 10, size=(2, 3))
print(f't1: {t1}, shape: {t1.shape}')
t2 = t1.unsqueeze(0)#在0维添加一个维度
print(f't2: {t2}, shape: {t2.shape}')
t3 = t1.unsqueeze(1)#在1维添加一个维度
print(f't3: {t3}, shape: {t3.shape}')
t4 = t1.unsqueeze(2)
print(f't4: {t4}, shape: {t4.shape}')
#删除所有维度为1的
t6 = torch.randint(1,10,size=(2,1,1,3,1))
print(f't6: {t6}, shape: {t6.shape}')
t7 = t6.squeeze()
print(f't7: {t7}, shape: {t7.shape}')
def dm03():
#transpose() 一次只能交换2个维度.
#permute() 一次可以同时交换多个维度.
t1 = torch.randint(1, 10, size=(2, 3,4))
print(f't1: {t1}, shape: {t1.shape}')
#改变维度(2,3,4)->(3,2,4)
t2 = t1.transpose(0,1)
print(f't2: {t2}, shape: {t2.shape}')
#改变维度,(2,3,4)->(4,2,3)
t3 = t1.permute(2,0,1)
print(f't3: {t3}, shape: {t3.shape}')
def dm04():
#view() 只能修改连续的张量的形状, 连续张量 = 内存中存储顺序 和 在张量中显示的顺序相同.
#contiguous() 把不连续的张量 -> 连续的张量, 即: 基于张量中显示的顺序, 修改内存中的存储顺序.
#is_contiguous() 判断张量是否是连续的.即 张量中的顺序 和 内存中的存储顺序是否一致
t1 = torch.randint(1, 10, size=(2, 3))
print(f't1: {t1}, shape: {t1.shape}')
print(t1.is_contiguous())
t2 = t1.view(3, 2)
print(f't2: {t2}, shape: {t2.shape}')
#通过transpose()交换维度,交换之后就不连续了
t3 = t1.transpose(0,1)
print(f't3: {t3}, shape: {t3.shape}')
print(t3.is_contiguous())
#不能通过view交换了
#可以通过contiguous(),把t3变成连续的张量
t4 = t3.contiguous()
print(t4.is_contiguous())
t4.view(2,3)
print(f't4: {t4}, shape: {t4.shape}')
if __name__ == '__main__':
# dm01()
# dm02()
# dm03()
dm04()
张量拼接
import torch
#cat()拼接
t1 = torch.randint(1,10,(2,3))
print(f't1:{t1}, shape: {t1.shape}')
t2 = torch.randint(1,10,(2,3))
print(f't2:{t2}, shape: {t2.shape}')
t3 = torch.cat([t1,t2], dim=0)
print(f't3: {t3}, shape: {t3.shape}')
t4 = torch.cat([t1,t2],dim=1)
print(f't4: {t4}, shape: {t4.shape}')
#stack()拼接
t5 = torch.stack([t1,t2],dim=0)
print(f't5: {t5}, shape: {t5.shape}')
t6 = torch.stack([t1,t2],dim=1)
print(f't6: {t6}, shape: {t6.shape}')
t7 = torch.stack([t1,t2],dim=2)
print(f't7: {t7}, shape: {t7.shape}')
Day3
参数更新一次
import torch
#初始权重w,参1:初始值,参2:是否自动微分,参3:数据类型
w = torch.tensor(10,requires_grad=True,dtype=torch.float)
lr = 0.01
#loss function
loss = 2 *w **2
print(f'梯度函数类型:{type(loss.grad_fn)}')
print(loss.sum())
#计算梯度,即loss function的导数,计算完后会记录到w.grad属性中
loss.backward()
#带入权重更新公式 W新 = W旧 - 学习率*梯度
w.data = w.data - lr * w.grad
print(f'更新后的权重:{w.data}')
自动微分简单案例
"""
求y = x**2 + 20 的极小值点,并打印y是最小值时的w的值
解题步骤:
1.定义w=10
2.定义loss function
3.梯度下降迭代1000次
3.1正向计算(前向传播)
3.2梯度清零 w.grad.zero_()
3.3反向传播
3.4梯度更新 w.data = w.data - 0.01 * w.grad
"""
import torch
w = torch.tensor(10,requires_grad=True,dtype=torch.float)
loss = w**2 + 20
print(f'开始时权重初始值:{w},loss:{loss}')
for i in range(1,459):
#正向计算(前向传播)
loss = w**2 + 20
#梯度清零
# w.grad.zero_(),默认梯度会累加(w越堆越大),要每轮都清零
#第一轮还没有计算梯度,所以w.grad = None,要判断非空才清零
if w.grad is not None:
w.grad.zero_()
#反向传播
loss.sum().backward()
#梯度更新
w.data = w.data - 0.01 * w.grad
#打印本次梯度更新后的权重参数
#print(f'第{i}次,权重初始值:{w},(0.01 * w.grad):{0.01*w.grad:.5f},loss:{loss:.5f}')
if i%10 == 0:
print(f'第{i}次,权重初始值:{w},(0.01 * w.grad):{0.01*w.grad:.5f},loss:{loss:.5f}')
print(f'final res: weight:{w},gradient:{w.grad},loss:{loss}')
开始时权重初始值:10.0,loss:120.0
第10次,权重初始值:8.170727729797363,(0.01 * w.grad):0.16675,loss:89.51353
第20次,权重初始值:6.676079273223877,(0.01 * w.grad):0.13625,loss:66.40778
第30次,权重初始值:5.454843521118164,(0.01 * w.grad):0.11132,loss:50.98222
第40次,权重初始值:4.457004547119141,(0.01 * w.grad):0.09096,loss:40.68398
第50次,权重初始值:3.641697406768799,(0.01 * w.grad):0.07432,loss:33.80879
第60次,权重初始值:2.975531578063965,(0.01 * w.grad):0.06073,loss:29.21885
第70次,权重初始值:2.4312260150909424,(0.01 * w.grad):0.04962,loss:26.15458
第80次,权重初始值:1.986488699913025,(0.01 * w.grad):0.04054,loss:24.10885
第90次,权重初始值:1.6231060028076172,(0.01 * w.grad):0.03312,loss:22.74310
第100次,权重初始值:1.3261957168579102,(0.01 * w.grad):0.02707,loss:21.83132
第110次,权重初始值:1.083598256111145,(0.01 * w.grad):0.02211,loss:21.22260
第120次,权重初始值:0.8853786587715149,(0.01 * w.grad):0.01807,loss:20.81622
第130次,权重初始值:0.7234188318252563,(0.01 * w.grad):0.01476,loss:20.54491
第140次,权重初始值:0.5910858511924744,(0.01 * w.grad):0.01206,loss:20.36379
第150次,权重初始值:0.48296019434928894,(0.01 * w.grad):0.00986,loss:20.24287
第160次,权重初始值:0.394613653421402,(0.01 * w.grad):0.00805,loss:20.16214
第170次,权重初始值:0.3224280774593353,(0.01 * w.grad):0.00658,loss:20.10825
第180次,权重初始值:0.26344722509384155,(0.01 * w.grad):0.00538,loss:20.07227
第190次,权重初始值:0.21525554358959198,(0.01 * w.grad):0.00439,loss:20.04825
第200次,权重初始值:0.17587944865226746,(0.01 * w.grad):0.00359,loss:20.03221
第210次,权重初始值:0.1437063217163086,(0.01 * w.grad):0.00293,loss:20.02150
第220次,权重初始值:0.11741852760314941,(0.01 * w.grad):0.00240,loss:20.01435
第230次,权重初始值:0.09593949466943741,(0.01 * w.grad):0.00196,loss:20.00958
第240次,权重初始值:0.07838954776525497,(0.01 * w.grad):0.00160,loss:20.00640
第250次,权重初始值:0.06404995173215866,(0.01 * w.grad):0.00131,loss:20.00427
第260次,权重初始值:0.05233347415924072,(0.01 * w.grad):0.00107,loss:20.00285
第270次,权重初始值:0.04276026412844658,(0.01 * w.grad):0.00087,loss:20.00190
第280次,权重初始值:0.03493824973702431,(0.01 * w.grad):0.00071,loss:20.00127
第290次,权重初始值:0.028547091409564018,(0.01 * w.grad):0.00058,loss:20.00085
第300次,权重初始值:0.023325050249695778,(0.01 * w.grad):0.00048,loss:20.00057
第310次,权重初始值:0.019058262929320335,(0.01 * w.grad):0.00039,loss:20.00038
第320次,权重初始值:0.015571989119052887,(0.01 * w.grad):0.00032,loss:20.00025
第330次,权重初始值:0.012723450548946857,(0.01 * w.grad):0.00026,loss:20.00017
第340次,权重初始值:0.010395986959338188,(0.01 * w.grad):0.00021,loss:20.00011
第350次,权重初始值:0.00849427841603756,(0.01 * w.grad):0.00017,loss:20.00007
第360次,权重初始值:0.006940444000065327,(0.01 * w.grad):0.00014,loss:20.00005
第370次,权重初始值:0.005670847371220589,(0.01 * w.grad):0.00012,loss:20.00003
第380次,权重初始值:0.0046334946528077126,(0.01 * w.grad):0.00009,loss:20.00002
第390次,权重初始值:0.003785902401432395,(0.01 * w.grad):0.00008,loss:20.00002
第400次,权重初始值:0.0030933578964322805,(0.01 * w.grad):0.00006,loss:20.00001
第410次,权重初始值:0.0025274986401200294,(0.01 * w.grad):0.00005,loss:20.00001
第420次,权重初始值:0.002065150300040841,(0.01 * w.grad):0.00004,loss:20.00000
第430次,权重初始值:0.0016873783897608519,(0.01 * w.grad):0.00003,loss:20.00000
第440次,权重初始值:0.0013787109637632966,(0.01 * w.grad):0.00003,loss:20.00000
第450次,权重初始值:0.0011265072971582413,(0.01 * w.grad):0.00002,loss:20.00000
final res: weight:0.0009583908249624074,gradient:0.001955899642780423,loss:20.000001907348633
detach函数
import torch
#一个张量一旦设置了自动微分,就不能直接被转换成numpy的ndarray了,要通过detach()
t1 = torch.tensor(10,requires_grad=True,dtype=torch.float)
print(f't1: {t1}, type: {type(t1)}')
# n1 = t1.numpy()
# print(f'n1: {n1}, type: {type(n1)}')
#通过detach拷贝一份
t2 = t1.detach()#共享内存
print(f't2: {t2}, type: {type(t2)}')
n2 = t2.numpy()
print(f'n2: {n2}, type: {type(n2)}')
自动微分应用场景
import torch
#定义x:表示特征(输入数据)
x = torch.ones(2,5)
print(f'x:{x}')
#定义y:表示标签(真实值)
y = torch.zeros(2,3)
print(f'y:{y}')
#初始化权重和偏置
w = torch.randn(5,3,requires_grad=True)
print(f'w:{w}')
b = torch.randn(3,requires_grad=True)
print(f'b:{b}')
#前向传播(正向),计算出预测值
z = torch.matmul(x,w)+b
print(f'z:{z}')
#定义损失函数
criterion = torch.nn.MSELoss()
loss = criterion(z, y)
#自动微分,更新权重
loss.backward()
print(f'loss:{loss}')
print(f'w的梯度:{w.grad}')
print(f'b的梯度:{b.grad}')
x:tensor([[1., 1., 1., 1., 1.],
[1., 1., 1., 1., 1.]])
y:tensor([[0., 0., 0.],
[0., 0., 0.]])
w:tensor([[-1.1037, -0.3975, -1.1825],
[ 0.9740, -0.6369, 0.7307],
[-0.2071, -1.2326, -1.1484],
[-1.1996, -0.2710, -0.5757],
[-0.3005, -2.7490, 0.4913]], requires_grad=True)
b:tensor([ 0.8225, 0.1361, -0.7192], requires_grad=True)
z:tensor([[-1.0144, -5.1509, -2.4038],
[-1.0144, -5.1509, -2.4038]], grad_fn=<AddBackward0>)
loss:11.113075256347656
w的梯度:tensor([[-0.6763, -3.4339, -1.6026],
[-0.6763, -3.4339, -1.6026],
[-0.6763, -3.4339, -1.6026],
[-0.6763, -3.4339, -1.6026],
[-0.6763, -3.4339, -1.6026]])
b的梯度:tensor([-0.6763, -3.4339, -1.6026])
模拟线性回归
from cProfile import label
import torch
from pydantic import color
from torch.utils.data import TensorDataset #构造数据集对象
from torch.utils.data import DataLoader #数据加载器
from torch import nn #nn模块有平方损失函数和假设函数
from torch import optim #opti 优化器函数
from sklearn.datasets import make_regression #创建线性回归模型数据集
import matplotlib.pyplot as plt #可视化
plt.rcParams['font.sans-serif'] = ['SimHei']#正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False#正常显示负号
#numpy对象->张量Tensor->数据集对象TensorDataset->数据加载器DataLoader
#创建数据集对象
def create_dataset():
x,y,coef = make_regression(
n_samples=100,
n_features=1,
noise=10,
coef=True,
bias=14.5,
random_state=3
)
print(type(x))
#把上述数据,封装成张量对象
x = torch.tensor(x,dtype=torch.float)
y = torch.tensor(y,dtype=torch.float)
return x,y,coef
#定义训练函数
def train(x,y,coef):
#1.创建数据集对象,把tensor->数据集对象->数据加载器
dataset = TensorDataset(x,y)
#2.创建数据加载器对象
#参1:数据集对象,参2:批次大小,参3:是否打乱数据(训练集打乱,测试集不打乱,测试集一个一个测试,打乱也没有意义)
dataloader = DataLoader(dataset,batch_size=16,shuffle=True)
#3.创建初始的 线性回归模型
#参1:输入特征维度,参2:输出特征维度
model = nn.Linear(1,1)
#4.创建损失函数对象
criterion = nn.MSELoss()
#5.创建优化器对象
#参1:模型参数,参2:学习率
optimizer = optim.SGD(model.parameters(),lr=0.01)
#6.具体的训练过程
#6.1 定义变量:训练轮数,每轮(平均)损失,训练总损失,训练的样本数
epochs,loss_list,total_loss,total_sample = 100,[],0.0,0
#6.2开始训练,按轮训练
for epoch in range(epochs):
total_loss,total_sample = 0.0,0
#6.3每轮分批次训练,要从数据加载器中获取批次
for train_x,train_y in dataloader: #16,16,16,16,16,16,4
#6.4模型预测
y_pred = model(train_x)
#6.5计算(每批的平均)损失值
loss = criterion(y_pred,train_y.reshape(-1,1))#y_pred跟train_x形状一样,要把train_y也改成一样的形状(n行一列)
#6.6计算总损失,样本批次数
total_loss += loss.item() #item是取标量张量里面唯一的值
total_sample += 1
#6.7梯度清零,反向传播,梯度更新
optimizer.zero_grad()
loss.backward()
optimizer.step()#梯度更新
#6.8把本轮的平均损失值添加到loss_list
loss_list.append(total_loss/total_sample)
print(f'轮数:{epoch+1},平均损失值:{total_loss/total_sample}')
#7.打印最终训练结果
print(f'最终{epochs}平均损失分别为{loss_list}')
print(f'模型参数,权重:{model.weight},偏置:{model.bias}')
#8.绘制损失曲线
plt.plot(range(epochs),loss_list)
plt.title('损失曲线变化图')
plt.grid(True)
plt.show()
#9.绘制预测值和真实值的关系
#9.1绘制样本点分布情况
plt.scatter(x,y)
#9.2绘制训练模型的预测值
y_pred = torch.tensor(data = [v * model.weight + model.bias for v in x])
#9.3计算真实值
y_true = torch.tensor(data = [v * coef + 14.5 for v in x])
#9.4绘制预测值和真实值的折线图
plt.plot(x,y_pred,color='red',label='预测值')
plt.plot(x,y_true,color='green',label='真实值')
plt.grid(True)
plt.show()
if __name__ == '__main__':
x,y,coef = create_dataset()
print(f'x:{x}, y:{y}, coef:{coef}')
train(x,y,coef)
<class 'numpy.ndarray'>
x:tensor([[ 0.1506],
[-0.8887],
[-1.8609],
[ 0.1529],
[ 0.8993],
[-0.8038],
[ 1.7746],
[ 0.6467],
[-1.7431],
[-0.0827],
[-0.1605],
[-0.1319],
[ 0.6141],
[-1.3707],
[-1.5465],
[-2.2483],
[ 0.2491],
[ 1.0481],
[-0.5747],
[ 0.8528],
[ 1.7696],
[-0.5454],
[ 0.0500],
[-0.6747],
[-0.5886],
[-1.0642],
[ 0.2386],
[-0.8379],
[-1.1011],
[ 0.6762],
[ 1.0132],
[ 0.7451],
[ 0.0965],
[-0.0241],
[ 0.0297],
[-0.1095],
[-0.5911],
[-0.7688],
[ 1.7096],
[-0.4047],
[-1.0249],
[ 0.3773],
[-0.9600],
[ 1.4861],
[-1.8635],
[ 1.1082],
[ 0.6791],
[-0.6270],
[-0.3563],
[-0.5164],
[-0.1545],
[-0.2300],
[-0.1974],
[-0.6029],
[-2.9157],
[ 1.1194],
[ 1.1679],
[-1.6233],
[ 1.3917],
[ 0.4365],
[ 0.8813],
[-1.2441],
[-2.4191],
[-0.7130],
[-0.0438],
[-0.2678],
[-0.2056],
[ 1.9390],
[-0.8739],
[ 0.4379],
[-0.6264],
[ 1.1240],
[ 1.3337],
[-0.8554],
[-0.4772],
[-1.9145],
[-0.3548],
[-1.3139],
[-1.1183],
[-1.1850],
[ 2.1581],
[ 0.8458],
[-0.2774],
[ 0.6252],
[-1.0238],
[ 0.8846],
[ 0.6432],
[ 1.9761],
[-0.5966],
[ 0.4838],
[ 0.9824],
[ 0.1451],
[ 0.2367],
[-0.9718],
[-0.3002],
[-1.0239],
[-1.3958],
[ 1.4875],
[-0.9238],
[ 1.7886]]), y:tensor([ 22.5139, -11.2928, -65.3266, 11.4912, 34.3622, -4.7371, 55.7981,
34.1948, -21.1198, 8.8681, 4.8401, -12.6738, 27.7491, -10.2920,
-20.5800, -34.3397, 37.0163, 57.3231, 6.9397, 29.9672, 53.9756,
4.4474, 20.7192, -8.5406, 0.7447, -22.5050, 4.0683, -13.3933,
-24.1197, 46.6569, 41.9460, 35.2172, 9.7509, 20.5186, -3.9652,
1.3670, 4.0815, -25.0052, 52.6946, -5.1169, -14.0601, 26.0171,
-16.5974, 65.3023, -37.8152, 37.5357, 31.6372, 0.7058, -2.9643,
17.8506, 15.1214, 6.6686, 4.1857, 16.1612, -72.6875, 44.3273,
54.4379, -14.8631, 46.8485, 27.7344, 48.8398, -20.5679, -49.2582,
-10.4936, 22.0268, 7.2506, 0.7285, 67.7584, -7.8556, 26.0243,
-3.9790, 58.8114, 36.1283, -12.2570, -9.5121, -25.5052, 10.3903,
-24.5829, -24.2417, -22.2563, 72.4724, 36.7423, 7.3779, 38.3146,
-25.6796, 46.2081, 23.3987, 69.0189, -0.8138, 31.3881, 54.9516,
12.6113, 18.8799, -16.6771, 23.5101, -23.1808, -24.6395, 56.1088,
-22.2519, 67.3660]), coef:27.478050549563925
轮数:1,平均损失值:1005.3584856305804
轮数:2,平均损失值:753.6461879185268
轮数:3,平均损失值:550.310786655971
轮数:4,平均损失值:436.4559064592634
轮数:5,平均损失值:353.28519330705916
轮数:6,平均损失值:265.3744386945452
轮数:7,平均损失值:233.3917192731585
轮数:8,平均损失值:204.02883802141463
轮数:9,平均损失值:165.04705374581474
轮数:10,平均损失值:145.10754067557198
轮数:11,平均损失值:130.88368334089006
轮数:12,平均损失值:133.30799647739954
轮数:13,平均损失值:110.22104317801339
轮数:14,平均损失值:95.59957449776786
轮数:15,平均损失值:94.76232310703823
轮数:16,平均损失值:87.888185773577
轮数:17,平均损失值:83.23655101231166
轮数:18,平均损失值:81.2883665902274
轮数:19,平均损失值:102.31726782662528
轮数:20,平均损失值:79.96381786891392
轮数:21,平均损失值:82.31244386945453
轮数:22,平均损失值:86.40313502720424
轮数:23,平均损失值:84.2410272870745
轮数:24,平均损失值:74.07170976911273
轮数:25,平均损失值:76.87670789446149
轮数:26,平均损失值:79.66712079729352
轮数:27,平均损失值:87.17890930175781
轮数:28,平均损失值:75.19281959533691
轮数:29,平均损失值:84.16963740757534
轮数:30,平均损失值:74.6390517098563
轮数:31,平均损失值:74.08153424944196
轮数:32,平均损失值:77.41675513131278
轮数:33,平均损失值:78.16011810302734
轮数:34,平均损失值:84.90502057756696
轮数:35,平均损失值:102.24421855381557
轮数:36,平均损失值:86.36869212559291
轮数:37,平均损失值:79.79138837541852
轮数:38,平均损失值:87.80255835396903
轮数:39,平均损失值:89.73381532941546
轮数:40,平均损失值:82.25183595929828
轮数:41,平均损失值:77.3248405456543
轮数:42,平均损失值:85.60394178118024
轮数:43,平均损失值:76.80496161324638
轮数:44,平均损失值:80.82711356026786
轮数:45,平均损失值:75.08517074584961
轮数:46,平均损失值:73.23149612971714
轮数:47,平均损失值:96.66055951799665
轮数:48,平均损失值:80.27226148332868
轮数:49,平均损失值:82.68898282732282
轮数:50,平均损失值:84.77420752389091
轮数:51,平均损失值:80.39404296875
轮数:52,平均损失值:74.19170079912458
轮数:53,平均损失值:76.09908894130162
轮数:54,平均损失值:87.979918888637
轮数:55,平均损失值:83.72765677315849
轮数:56,平均损失值:76.26428331647601
轮数:57,平均损失值:78.05144664219448
轮数:58,平均损失值:74.45630427769252
轮数:59,平均损失值:83.54700415475028
轮数:60,平均损失值:84.32203183855329
轮数:61,平均损失值:87.54211534772601
轮数:62,平均损失值:85.28771427699498
轮数:63,平均损失值:77.27156121390206
轮数:64,平均损失值:94.7388300214495
轮数:65,平均损失值:75.22668184552874
轮数:66,平均损失值:75.99233627319336
轮数:67,平均损失值:74.61290386744908
轮数:68,平均损失值:91.01730510166713
轮数:69,平均损失值:79.44541440691266
轮数:70,平均损失值:79.60392161778041
轮数:71,平均损失值:78.99691717965263
轮数:72,平均损失值:78.45030702863421
轮数:73,平均损失值:78.58223724365234
轮数:74,平均损失值:79.4729254586356
轮数:75,平均损失值:81.47309330531529
轮数:76,平均损失值:78.3311151776995
轮数:77,平均损失值:80.62224905831474
轮数:78,平均损失值:78.88288933890206
轮数:79,平均损失值:89.45929718017578
轮数:80,平均损失值:79.45879200526646
轮数:81,平均损失值:81.08362524850028
轮数:82,平均损失值:75.64321981157575
轮数:83,平均损失值:88.05703135899135
轮数:84,平均损失值:78.62890951974052
轮数:85,平均损失值:75.98532322474888
轮数:86,平均损失值:83.39856610979352
轮数:87,平均损失值:77.05607005528041
轮数:88,平均损失值:98.51185389927456
轮数:89,平均损失值:74.06922067914691
轮数:90,平均损失值:74.0155884878976
轮数:91,平均损失值:85.84842463902065
轮数:92,平均损失值:83.3331663949149
轮数:93,平均损失值:75.9749401637486
轮数:94,平均损失值:74.29283142089844
轮数:95,平均损失值:84.82763344900948
轮数:96,平均损失值:75.32686342511859
轮数:97,平均损失值:87.81455067225865
轮数:98,平均损失值:78.42629623413086
轮数:99,平均损失值:79.29963084629604
轮数:100,平均损失值:79.70837020874023
最终100平均损失分别为[1005.3584856305804, 753.6461879185268, 550.310786655971, 436.4559064592634, 353.28519330705916, 265.3744386945452, 233.3917192731585, 204.02883802141463, 165.04705374581474, 145.10754067557198, 130.88368334089006, 133.30799647739954, 110.22104317801339, 95.59957449776786, 94.76232310703823, 87.888185773577, 83.23655101231166, 81.2883665902274, 102.31726782662528, 79.96381786891392, 82.31244386945453, 86.40313502720424, 84.2410272870745, 74.07170976911273, 76.87670789446149, 79.66712079729352, 87.17890930175781, 75.19281959533691, 84.16963740757534, 74.6390517098563, 74.08153424944196, 77.41675513131278, 78.16011810302734, 84.90502057756696, 102.24421855381557, 86.36869212559291, 79.79138837541852, 87.80255835396903, 89.73381532941546, 82.25183595929828, 77.3248405456543, 85.60394178118024, 76.80496161324638, 80.82711356026786, 75.08517074584961, 73.23149612971714, 96.66055951799665, 80.27226148332868, 82.68898282732282, 84.77420752389091, 80.39404296875, 74.19170079912458, 76.09908894130162, 87.979918888637, 83.72765677315849, 76.26428331647601, 78.05144664219448, 74.45630427769252, 83.54700415475028, 84.32203183855329, 87.54211534772601, 85.28771427699498, 77.27156121390206, 94.7388300214495, 75.22668184552874, 75.99233627319336, 74.61290386744908, 91.01730510166713, 79.44541440691266, 79.60392161778041, 78.99691717965263, 78.45030702863421, 78.58223724365234, 79.4729254586356, 81.47309330531529, 78.3311151776995, 80.62224905831474, 78.88288933890206, 89.45929718017578, 79.45879200526646, 81.08362524850028, 75.64321981157575, 88.05703135899135, 78.62890951974052, 75.98532322474888, 83.39856610979352, 77.05607005528041, 98.51185389927456, 74.06922067914691, 74.0155884878976, 85.84842463902065, 83.3331663949149, 75.9749401637486, 74.29283142089844, 84.82763344900948, 75.32686342511859, 87.81455067225865, 78.42629623413086, 79.29963084629604, 79.70837020874023]
模型参数,权重:Parameter containing:
tensor([[27.5763]], requires_grad=True),偏置:Parameter containing:
tensor([13.9042], requires_grad=True)


预测折线和真实折线基本重叠,说明学习的很好
Day4
激活函数
sigmoid

tanh

与 Sigmoid 相比,它是以 0 为中心的,且梯度相对于sigmoid大,使得其收敛速度要比 Sigmoid 快,减少迭代次数。然而,从图中可以看出,Tanh 两侧的导数也为 0,同样会造成梯度消失。若使用时可在隐藏层使用tanh函数,在输出层使用sigmoid函数。
ReLU
与sigmoid相比,RELU的优势是:
采用sigmoid函数,计算量大(指数运算),反向传播求误差梯度时,计算量相对大,而采用Relu激活函数,整个过程的计算量节省很多。 sigmoid函数反向传播时,很容易就会出现梯度消失的情况,从而无法完成深层网络的训练。 Relu会使一部分神经元的输出为0,这样就造成了网络的稀疏性,并且减少了参数的相互依存关系,缓解了过拟合问题的发生。
ReLU 能够在x>0时保持梯度不衰减,从而缓解梯度消失问题。然而,随着训练的推进,部分输入会落入小于0区域,导致对应权重无法更新。这种现象被称为“神经元死亡”。
softmax

其他常见的激活函数


7150

被折叠的 条评论
为什么被折叠?



