基于深度强化学习的绘画智能体 代码分析(五)

本文深入剖析了pytorch中autograd.grad()函数的使用,详细解释了该函数的各个参数,如outputs、inputs、grad_outputs等,并结合代码实例进行分析,帮助理解深度学习中的自动求导过程。

GIthub源码

  1. wgan.py
import torch
import torch.nn as nn
import numpy as np
from torch.optim import Adam, SGD
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
from torch.autograd import grad as torch_grad
import torch.nn.utils.weight_norm as weightNorm
from utils.util import *

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dim = 128
LAMBDA = 10 # Gradient penalty lambda hyperparameter

class TReLU(nn.Module):
    def __init__(self):
            super(TReLU, self).__init__() #子类继承了父类的所有属性和方法,父类属性自然会用父类方法来进行初始化
            self.alpha = nn.Parameter(torch.FloatTensor(1), requires_grad=True) #将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面,self.alpha变成了模型的一部分,成为了模型中根据训练可以改动的参数了,让某些变量在学习的过程中不断的修改其值以达到最优化,Tensor可以通过参数 requires_grad=True 创建, 
            self.alpha.data.fill_(0)

    def forward(self, x):
        x = F.relu(x - self.alpha) + self.alpha
        return x

class Discriminator(nn.Module):
        def __init__(self):
            super(Discriminator, self).__init__()

            self.conv0 = weightNorm(nn.Conv2d(6, 16, 5, 2, 2))
            self.conv1 = weightNorm(nn.Conv2d(16, 32, 5, 2, 2))
            self.conv2 = weightNorm(nn.Conv2d(32, 64, 5, 2, 2))
            self.conv3 = weightNorm(nn.Conv2d(64, 128, 5, 2, 2))
            self.conv4 = weightNorm(nn.Conv2d(128, 1, 1, 1, 0))
            self.relu0 = TReLU()
            self.relu1 = TReLU()
            self.relu2 = TReLU()
            self.relu3 = TReLU()

        def forward(self, x):
            x = self.conv0(x)
            x = self.relu0(x)
            x = self.conv1(x)
            x = self.relu1(x)
            x = self.conv2(x)
            x = self.relu2(x)
            x = self.conv3(x)
            x = self.relu3(x)
            x = self.conv4(x)
            x = x.view(-1, 64) # Patch Q
            return x

netD = Discriminator()  
target_netD = Discriminator() #模型结构一样,但是独立的两个网络
netD = netD.to(device)
target_netD = target_netD.to(device) #CPU,GPU
hard_update(target_netD, netD) #netD复制到target_netD上

optimizerD = Adam(netD.parameters(), lr=3e-4, betas=(0.5, 0.999)) #调整每个参数的学习率。它的优点主要在于经过偏置校正后,每一次迭代学习率都有个确定范围,使得参数比较平稳。
#lr:学习率,更新梯度的时候使用,步子的大小
#betas:用于计算梯度的平均和平方的系


def cal_gradient_penalty(netD, real_data, fake_data, batch_size):
    alpha = torch.rand(batch_size, 1) #返回一个张量,包含了从区间 [0, 1) 的均匀分布中抽取的一组随机数。张量的形状batch_size*1。
    alpha = alpha.expand(batch_size, int(real_data.nelement()/batch_size)).contiguous() #expand将单个维度扩大成更大维度, 第二维是int(real_data.nelement()/batch_size),直接复制的,让数值重复,返回一个新的tensor
#contiguous() 返回一个内存连续的有相同数据的tensor,如果原tensor内存连续,则返回原tensor
    alpha = alpha.view(batch_size, 6, dim, dim) #改变维度 batch_size*6*dim*dim,view()相当于numpy中resize()的功能
    alpha = alpha.to(device)
    fake_data = fake_data.view(batch_size, 6, dim, dim)
    interpolates = Variable(alpha * real_data.data + ((1 - alpha) * fake_data.data), requires_grad=True)
    disc_interpolates = netD(interpolates)
    gradients = autograd.grad(disc_interpolates, interpolates,  
                              grad_outputs=torch.ones(disc_interpolates.size()).to(device),
                              create_graph=True, retain_graph=True)[0] #使用autograd必需先将tensor数据包成Variable,autograd.grad()函数实现自动求导 
    gradients = gradients.view(gradients.size(0), -1)
    gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * LAMBDA  #.main() 求平均
    return gradient_penalty
详解 pytorch 中的 autograd.grad() 函数

autograd.grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=False, only_inputs=True, allow_unused=False)

  • outputs: 求导的因变量(需要求导的函数)
  • inputs: 求导的自变量
  • grad_outputs: 如果 outputs为标量,则grad_outputs=None,也就是说,可以不用写; 如果outputs 是向量,则此参数必须写
  • retain_graph: True 则保留计算图, False则释放计算图
  • create_graph: 若要计算高阶导数,则必须选为True
  • allow_unused: 允许输入变量不进入计算
def cal_reward(fake_data, real_data):
    return target_netD(torch.cat([real_data, fake_data], 1))  #cat是concatnate的意思:拼接,联系在一起。按维数1拼接(横着拼)

def save_gan(path):
    netD.cpu()
    torch.save(netD.state_dict(),'{}/wgan.pkl'.format(path))
    netD.to(device)

def load_gan(path):
    netD.load_state_dict(torch.load('{}/wgan.pkl'.format(path)))

def update(fake_data, real_data):
    fake_data = fake_data.detach() #返回一个新的tensor,从当前计算图中分离下来的,但是仍指向原变量的存放位置,不同之处只是requires_grad为false,得到的这个tensor永远不需要计算其梯度,不具有grad。
    real_data = real_data.detach()
    fake = torch.cat([real_data, fake_data], 1)
    real = torch.cat([real_data, real_data], 1)
    D_real = netD(real)
    D_fake = netD(fake)
    gradient_penalty = cal_gradient_penalty(netD, real, fake, real.shape[0])
    optimizerD.zero_grad() #将模型的参数梯度初始化为0
    D_cost = D_fake.mean() - D_real.mean() + gradient_penalty
    D_cost.backward()#.backward()是自动求导函数,out是一个标量的话(相当于一个神经网络有一个样本,这个样本有两个属性,神经网络有一个输出)那么此backward函数是不需要输入任何参数的。
    optimizerD.step() #更新所有参数  
    soft_update(target_netD, netD, 0.001)
    return D_fake.mean(), D_real.mean(), gradient_penalty
  1. evaluator.py
import numpy as np
from utils.util import *

class Evaluator(object):

    def __init__(self, args, writer): #args可以当成字典argument,把值传给self    
        self.validate_episodes = args.validate_episodes 
        self.max_step = args.max_step
        self.env_batch = args.env_batch
        self.writer = writer
        self.log = 0

    def __call__(self, env, policy, debug=False):        
        observation = None
        for episode in range(self.validate_episodes):
            # reset at the start of episode
            observation = env.reset(test=True, episode=episode)
            episode_steps = 0
            episode_reward = 0.     
            assert observation is not None  #用于在调试过程中捕捉程序错误          
            # start episode 
            episode_reward = np.zeros(self.env_batch)
            while (episode_steps < self.max_step or not self.max_step): #小于且不等于
                action = policy(observation) #看输入里的函数
                observation, reward, done, (step_num) = env.step(action)
                episode_reward += reward
                episode_steps += 1
                env.save_image(self.log, episode_steps)
            dist = env.get_dist()
            self.log += 1
        return episode_reward, dist
  1. multi.py
import cv2
import torch
import numpy as np
from env import Paint
from utils.util import *
from DRL.ddpg import decode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class fastenv():
    def __init__(self, 
                 max_episode_length=10, env_batch=64, \
                 writer=None):
        self.max_episode_length = max_episode_length
        self.env_batch = env_batch
        self.env = Paint(self.env_batch, self.max_episode_length)
        self.env.load_data()
        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        self.writer = writer
        self.test = False
        self.log = 0

    def save_image(self, log, step):
        for i in range(self.env_batch):
            if self.env.imgid[i] <= 10:
                canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                self.writer.add_image('{}/canvas_{}.png'.format(str(self.env.imgid[i]), str(step)), canvas, log)
        if step == self.max_episode_length:
            for i in range(self.env_batch):
                if self.env.imgid[i] < 50:
                    gt = cv2.cvtColor((to_numpy(self.env.gt[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                    canvas = cv2.cvtColor((to_numpy(self.env.canvas[i].permute(1, 2, 0))), cv2.COLOR_BGR2RGB)
                    self.writer.add_image(str(self.env.imgid[i]) + '/_target.png', gt, log)
                    self.writer.add_image(str(self.env.imgid[i]) + '/_canvas.png', canvas, log)
    
    def step(self, action):
        with torch.no_grad():
            ob, r, d, _ = self.env.step(torch.tensor(action).to(device))
        if d[0]:
            if not self.test:
                self.dist = self.get_dist()
                for i in range(self.env_batch):
                    self.writer.add_scalar('train/dist', self.dist[i], self.log)
                    self.log += 1
        return ob, r, d, _

    def get_dist(self):
        return to_numpy((((self.env.gt.float() - self.env.canvas.float()) / 255) ** 2).mean(1).mean(1).mean(1))
        
    def reset(self, test=False, episode=0):
        self.test = test
        ob = self.env.reset(self.test, episode * self.env_batch)
        return ob
  1. rpm.py
import numpy as np
import random
import torch
import pickle as pickle

class rpm(object):
    # replay memory 起到存储最近的样本的作用,使同一样本可以多次参加训练
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.buffer = []
        self.index = 0
        
    def append(self, obj):
        if self.size() > self.buffer_size:
            print('buffer size larger than set value, trimming...') #当存储容量大就修剪
            self.buffer = self.buffer[(self.size() - self.buffer_size):]
        elif self.size() == self.buffer_size:
            self.buffer[self.index] = obj
            self.index += 1
            self.index %= self.buffer_size
        else:
            self.buffer.append(obj)

    def size(self):
        return len(self.buffer)

    def sample_batch(self, batch_size, device, only_state=False):
        if self.size() < batch_size:
            batch = random.sample(self.buffer, self.size()) #从指定序中随机获取指定长度的 片断并随机排列,结果以列表的形式返回
        else:
            batch = random.sample(self.buffer, batch_size)

        if only_state:
            res = torch.stack(tuple(item[3] for item in batch), dim=0) #在维度上连接(concatenate)若干个张量。(这些张量形状相同) ,tuple元组格式           
            return res.to(device)
        else:
            item_count = 5
            res = []
            for i in range(5):
                k = torch.stack(tuple(item[i] for item in batch), dim=0)
                res.append(k.to(device))
            return res[0], res[1], res[2], res[3], res[4]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值