目录
单模型的训练技巧——CosineAnnealingWarmRestarts
任务和数据集
任务
Objective - Image Classification
1. Solve image classification with convolutional neural networks.
2. Improve the performance with data augmentations.
3. Understand popular image model techniques such as residual
数据集
● The images are collected from the food-11 dataset classified into 11 classes.
● Training set: 9866 labeled images
● Validation set: 3430 labeled images
● Testing set: 3347 image
下载地址:ML2022Spring-HW3 | Kaggle
Baseline
Simple : 0.50099
Medium : 0.73207 Training Augmentation + Train Longer
Strong : 0.81872 Training Augmentation + Model Design + Train Looonger (+
Cross Validation + Ensemble)
Boss : 0.88446 Training Augmentation + Model Design +Test Time
Augmentation + Train Looonger (+ Cross Validation + Ensemble)
导包
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
from d2l import torch as d2l
import random
def same_seeds(seed):
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
数据处理
Transforms
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])
# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
# Resize the image into a fixed shape (height = width = 128)
transforms.Resize((128, 128)),
# You may add some transforms here.
# ToTensor() should be the last one of the transforms.
transforms.ToTensor(),
])
Datasets
class FoodDataset(Dataset):
def __init__(self,path=None,tfm=test_tfm,files=None):
super(FoodDataset).__init__()
self.path = path
if path:
self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
else:
self.files = files
self.transform = tfm
def __len__(self):
return len(self.files)
def __getitem__(self,idx):
fname = self.files[idx]
im = Image.open(fname)
im = self.transform(im)
#im = self.data[idx]
try:
label = int(fname.split("/")[-1].split("_")[0]) # windows写成\\
except:
label = -1 # test has no label
return im,label
数据加载函数
def loadData(dataset_dir, batch_size, num_workers, train_tfm, test_tfm):
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last = True)
valid_set = FoodDataset(os.path.join(dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last = True)
print('训练集总长度是 {:d}, batch数量是 {:.2f}'.format(len(train_set), len(train_set)/ batch_size))
print('验证集总长度是 {:d}, batch数量是 {:.2f}'.format(len(valid_set), len(valid_set)/ batch_size))
return train_loader, valid_loader
分类模型
使用的模型有点像VGG,共同点是使用3X3的卷积核、使用池化、维度持续X2、使用线性层,不同点在于这里的模型更浅,使用了BatchNorm

class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# torch.nn.MaxPool2d(kernel_size, stride, padding)
# input 維度 [3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]
nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]
nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]
nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]
nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)
训练
训练函数
def trainer(train_loader, val_loader, model, config, devices):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
T_0=config['T_0'], T_mult=config['T_mult'],
eta_min=config['learning_rate']/config['eta_min_ratio'])
n_epochs, patience = config['num_epoch'], config['patience']
num_batches = len(train_loader)
show_batches = num_batches // config['show_num']
if not os.path.isdir('./' + config['model_path'].split('/')[1]):
os.mkdir('./' + config['model_path'].split('/')[1]) # Create directory of saving models.
legend = ['train loss', 'train acc']
if val_loader is not None:
legend.append('valid loss')
legend.append('valid acc')
animator = d2l.Animator(xlabel='epoch', xlim=[0, n_epochs], legend=legend)
for epoch in range(n_epochs):
train_acc, train_loss = 0.0, 0.0
# training
model.train() # set the model to training mode
for i, (data, labels) in enumerate(train_loader):
data, labels = data.to(devices[0]), labels.to(devices[0])
optimizer.zero_grad()
outputs = model(data)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
train_acc += (train_pred.detach() == labels.detach()).sum().item()
train_loss += loss.item()
if (i + 1) % show_batches == 0:
train_acc = train_acc / show_batches / len(data)
train_loss = train_loss / show_batches
print('train_acc {:.3f}, train_loss {:.3f}'.format(train_acc, train_loss))
animator.add(epoch + (i + 1) / num_batches, (train_loss, train_acc, None, None))
train_acc, train_loss = 0.0, 0.0
scheduler.step()
# validation
if val_loader != None:
model.eval() # set the model to evaluation mode
val_acc, val_loss = 0.0, 0.0
with torch.no_grad():
for i, (data, labels) in enumerate(val_loader):
data, labels = data.to(devices[0]), labels.to(devices[0])
outputs = model(data)
loss = criterion(outputs, labels)
_, val_pred = torch.max(outputs, 1)
val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
val_loss += loss.item()
val_acc = val_acc / len(val_loader) / len(data)
val_loss = val_loss / len(val_loader)
print('val_acc {:.3f}, val_loss {:.3f} '.format(val_acc, val_loss))
animator.add(epoch + 1, (None, None, val_loss, val_acc))
# if the model improves, save a checkpoint at this epoch
if val_acc > config['best_acc']:
conf

本文介绍了在图像分类任务中应用深度学习的方法,包括数据集介绍、模型训练、数据增广、模型集成(Ensemble)、Test Time Augmentation(TTA)以及交叉验证。通过这些技术,作者提高了模型性能,逼近Boss Line标准。讨论部分强调了数据增广在防止过拟合中的作用,以及不同训练技巧如CosineAnnealingWarmRestarts的影响。

2万+

被折叠的 条评论
为什么被折叠?



