李宏毅机器学习作业3——Image Classification，模型集成，交叉验证，TTA

原创

已于 2022-12-27 17:15:22 修改 · 2.8k 阅读

标签

#人工智能 #深度学习

于 2022-11-17 15:05:57 首次发布

本文介绍了在图像分类任务中应用深度学习的方法，包括数据集介绍、模型训练、数据增广、模型集成（Ensemble）、Test Time Augmentation（TTA）以及交叉验证。通过这些技术，作者提高了模型性能，逼近Boss Line标准。讨论部分强调了数据增广在防止过拟合中的作用，以及不同训练技巧如CosineAnnealingWarmRestarts的影响。

Test Time Augmentation

单模型的训练技巧——CosineAnnealingWarmRestarts

任务和数据集

任务

Objective - Image Classification
1. Solve image classification with convolutional neural networks.
2. Improve the performance with data augmentations.
3. Understand popular image model techniques such as residual

数据集

● The images are collected from the food-11 dataset classified into 11 classes.
● Training set: 9866 labeled images
● Validation set: 3430 labeled images
● Testing set: 3347 image

下载地址：ML2022Spring-HW3 | Kaggle

Baseline

Simple : 0.50099
Medium : 0.73207 Training Augmentation + Train Longer
Strong : 0.81872 Training Augmentation + Model Design + Train Looonger (+
Cross Validation + Ensemble)
Boss : 0.88446 Training Augmentation + Model Design +Test Time
Augmentation + Train Looonger (+ Cross Validation + Ensemble)

导包

import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
from d2l import torch as d2l
import random

def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

数据处理

Transforms

# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])

Datasets

class FoodDataset(Dataset):

    def __init__(self,path=None,tfm=test_tfm,files=None):
        super(FoodDataset).__init__()
        self.path = path
        if path:
            self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
        else:
            self.files = files
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("/")[-1].split("_")[0])  # windows写成\\
        except:
            label = -1 # test has no label
        return im,label

数据加载函数

def loadData(dataset_dir, batch_size, num_workers, train_tfm, test_tfm):
    # Construct datasets.
    # The argument "loader" tells how torchvision reads the data.
    train_set = FoodDataset(os.path.join(dataset_dir,"training"), tfm=train_tfm)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last = True)
    valid_set = FoodDataset(os.path.join(dataset_dir,"validation"), tfm=test_tfm)
    valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True, drop_last = True)
    print('训练集总长度是 {:d}, batch数量是 {:.2f}'.format(len(train_set), len(train_set)/ batch_size))
    print('验证集总长度是 {:d}, batch数量是 {:.2f}'.format(len(valid_set), len(valid_set)/ batch_size))
    return train_loader, valid_loader

分类模型

使用的模型有点像VGG，共同点是使用3X3的卷积核、使用池化、维度持续X2、使用线性层，不同点在于这里的模型更浅，使用了BatchNorm

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

训练

训练函数

def trainer(train_loader, val_loader, model, config, devices):  
    
    criterion = nn.CrossEntropyLoss() 
    optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                T_0=config['T_0'], T_mult=config['T_mult'], 
                eta_min=config['learning_rate']/config['eta_min_ratio'])
    n_epochs, patience = config['num_epoch'], config['patience']
    num_batches = len(train_loader)
    show_batches = num_batches // config['show_num']
    
    if not os.path.isdir('./' + config['model_path'].split('/')[1]):
        os.mkdir('./' + config['model_path'].split('/')[1]) # Create directory of saving models.
    legend = ['train loss', 'train acc']
    
    if val_loader is not None:
        legend.append('valid loss')  
        legend.append('valid acc')  
    animator = d2l.Animator(xlabel='epoch', xlim=[0, n_epochs], legend=legend)       
        
    for epoch in range(n_epochs):
        train_acc, train_loss = 0.0, 0.0        
        
        # training
        model.train() # set the model to training mode
        for i, (data, labels) in enumerate(train_loader):
            data, labels = data.to(devices[0]), labels.to(devices[0])         

            optimizer.zero_grad() 
            outputs = model(data)             
            
            loss = criterion(outputs, labels)
            loss.backward() 
            optimizer.step() 

            _, train_pred = torch.max(outputs, 1) # get the index of the class with the highest probability
            train_acc += (train_pred.detach() == labels.detach()).sum().item()
            train_loss += loss.item()            
        
            if (i + 1) % show_batches == 0:
                train_acc = train_acc / show_batches / len(data)
                train_loss = train_loss / show_batches
                print('train_acc {:.3f}, train_loss {:.3f}'.format(train_acc, train_loss))
                animator.add(epoch  + (i + 1) / num_batches, (train_loss, train_acc, None, None)) 
                train_acc, train_loss = 0.0, 0.0               
                
        scheduler.step()
        # validation
        if val_loader != None:
            model.eval() # set the model to evaluation mode
            val_acc, val_loss = 0.0, 0.0  
            with torch.no_grad():
                for i, (data, labels) in enumerate(val_loader):
                    data, labels = data.to(devices[0]), labels.to(devices[0])
                    outputs = model(data)
                                        
                    loss = criterion(outputs, labels) 

                    _, val_pred = torch.max(outputs, 1) 
                    val_acc += (val_pred.cpu() == labels.cpu()).sum().item() # get the index of the class with the highest probability
                    val_loss += loss.item()                

                val_acc = val_acc / len(val_loader) / len(data)
                val_loss = val_loss / len(val_loader)
                print('val_acc {:.3f}, val_loss {:.3f} '.format(val_acc, val_loss))
                animator.add(epoch + 1, (None, None, val_loss, val_acc))
                
                # if the model improves, save a checkpoint at this epoch
                if val_acc > config['best_acc']:
                    conf

最低0.47元/天解锁文章