【数据集】YOLO的labelme标注的样本进行样本增强，图像和label同时增强

原创已于 2025-05-21 10:29:19 修改 · 557 阅读

12 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#YOLO

于 2025-04-18 11:37:43 首次发布

[识别] 专栏收录该内容

7 篇文章

订阅专栏

CLIP-GmP-ViT-L-14编码模型

CLIP-GmP-ViT-L-14编码模型` 是一个图文双塔编码模型，适合做图文匹配、零样本分类和跨模态检索演示。本镜像已经完成 Web 部署，打开页面即可上传图片并测试图文表征能力

YOLO的labelme标注的样本进行样本增强，图像和label同时增强

参考：json格式 labelme标注的样本进行样本增强（图像和label同时增强）
如果你是json格式就跳转吧。

他站在学姐肩膀上
在这里插入图片描述
我站在他学姐和他和AI的肩膀上（YHJ学姐和lemon_tttea无敌）（AI ？go work！！）

在这里插入图片描述

label如下：
在这里插入图片描述

准备文件夹和文件首先，确保你的文件系统中有以下结构：

你的工作目录/
├── script_name.py  # 保存的脚本文件
├── data/           # 存放原始图片和标注文件的文件夹
│   ├── image1.jpg
│   ├── image1.txt
│   ├── image2.jpg
│   ├── image2.txt
│   └── ...

命令行运行直接

python script_name.py --source_img_txt_path data --save_img_txt_path data2

• script_name.py 是你的 Python 脚本文件。
• data 文件夹中包含原始的图片文件（如 .jpg 或 .png ）和对应的 YOLO 格式标注文件（如 .txt ）。
• data是增强前的原始文件夹，data2是增强后的文件夹，自己换名字，记得。

复制完代码，缺哪个，pip install 哪个

例如：
ModuleNotFoundError: No module named 'skimage'

pip install scikit-image

下面是具体代码，你感觉不满意再自己调整吧，或者让AI go work!!
(YHJ学姐和lemon_tttea无敌)
再次鸣谢。

import time
import random
import cv2
import os
import numpy as np
from skimage.util import random_noise
import base64
import json
import re
from copy import deepcopy
import argparse

# 图像均为cv2读取
class DataAugmentForObjectDetection():
    def __init__(self, change_light_rate=0.5,
                 add_noise_rate=0.5, random_point=0.5, flip_rate=0.5, shift_rate=0.5, rand_point_percent=0.03,
                 is_addNoise=True, is_changeLight=True, is_random_point=True, is_shift_pic_bboxes=True,
                 is_filp_pic_bboxes=True):
        # 配置各个操作的属性
        self.change_light_rate = change_light_rate
        self.add_noise_rate = add_noise_rate
        self.random_point = random_point
        self.flip_rate = flip_rate
        self.shift_rate = shift_rate

        self.rand_point_percent = rand_point_percent

        # 是否使用某种增强方式
        self.is_addNoise = is_addNoise
        self.is_changeLight = is_changeLight
        self.is_random_point = is_random_point
        self.is_filp_pic_bboxes = is_filp_pic_bboxes
        self.is_shift_pic_bboxes = is_shift_pic_bboxes

    # 加噪声
    def _addNoise(self, img):
        return random_noise(img) * 255

    # 调整亮度
    def _changeLight(self, img):
        alpha = random.uniform(0.8, 1)
        blank = np.zeros(img.shape, img.dtype)
        return cv2.addWeighted(img, alpha, blank, 1 - alpha, 0)

    # 随机的改变点的值
    def _addRandPoint(self, img):
        percent = self.rand_point_percent
        num = int(percent * img.shape[0] * img.shape[1])
        for i in range(num):
            rand_x = random.randint(0, img.shape[0] - 1)
            rand_y = random.randint(0, img.shape[1] - 1)
            if random.randint(0, 1) == 0:
                img[rand_x, rand_y] = 0
            else:
                img[rand_x, rand_y] = 255
        return img

    # 平移
    def _shift_pic_bboxes(self, img, bboxes):
        h, w, _ = img.shape
        x_min = w
        x_max = 0
        y_min = h
        y_max = 0

        for bbox in bboxes:
            x_min = min(x_min, bbox[1] * w)
            y_min = min(y_min, bbox[2] * h)
            x_max = max(x_max, (bbox[1] + bbox[3]) * w)
            y_max = max(y_max, (bbox[2] + bbox[4]) * h)

        d_to_left = x_min  # 包含所有目标框的最大左移动距离
        d_to_right = w - x_max  # 包含所有目标框的最大右移动距离
        d_to_top = y_min  # 包含所有目标框的最大上移动距离
        d_to_bottom = h - y_max  # 包含所有目标框的最大下移动距离

        x = random.uniform(-(d_to_left - 1) / w, (d_to_right - 1) / w)
        y = random.uniform(-(d_to_top - 1) / h, (d_to_bottom - 1) / h)

        M = np.float32([[1, 0, x * w], [0, 1, y * h]])  # x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上
        shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

        # 更新标注信息
        for bbox in bboxes:
            bbox[1] += x
            bbox[2] += y
            bbox[1] = max(0, min(1, bbox[1]))  # 限制在0到1之间
            bbox[2] = max(0, min(1, bbox[2]))

        return shift_img, bboxes

    # 镜像
    def _filp_pic_bboxes(self, img, bboxes):
        h, w, _ = img.shape
        sed = random.random()

        if 0 < sed < 0.33:  # 0.33的概率水平翻转，0.33的概率垂直翻转,0.33是对角反转
            flip_img = cv2.flip(img, 0)  # _flip_x
            inver = 0
        elif 0.33 < sed < 0.66:
            flip_img = cv2.flip(img, 1)  # _flip_y
            inver = 1
        else:
            flip_img = cv2.flip(img, -1)  # flip_x_y
            inver = -1

        # 更新标注信息
        for bbox in bboxes:
            if inver == 0:
                bbox[2] = 1 - bbox[2]
            elif inver == 1:
                bbox[1] = 1 - bbox[1]
            elif inver == -1:
                bbox[1] = 1 - bbox[1]
                bbox[2] = 1 - bbox[2]

        return flip_img, bboxes

    # 图像增强方法
    def dataAugment(self, img, bboxes):
        change_num = 0  # 改变的次数
        while change_num < 1:  # 默认至少有一种数据增强生效

            if self.is_changeLight:
                if random.random() > self.change_light_rate:  # 改变亮度
                    change_num += 1
                    img = self._changeLight(img)

            if self.is_addNoise:
                if random.random() < self.add_noise_rate:  # 加噪声
                    change_num += 1
                    img = self._addNoise(img)
            if self.is_random_point:
                if random.random() < self.random_point:  # 加随机点
                    change_num += 1
                    img = self._addRandPoint(img)
            if self.is_shift_pic_bboxes:
                if random.random() < self.shift_rate:  # 平移
                    change_num += 1
                    img, bboxes = self._shift_pic_bboxes(img, bboxes)
            if self.is_filp_pic_bboxes:
                if random.random() < self.flip_rate:  # 翻转
                    change_num += 1
                    img, bboxes = self._filp_pic_bboxes(img, bboxes)

        return img, bboxes


# 工具类
class ToolHelper():
    # 读取YOLO标注文件
    def parse_yolo_txt(self, path):
        with open(path, 'r') as f:
            lines = f.readlines()
        bboxes = []
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center = float(parts[1])
            y_center = float(parts[2])
            width = float(parts[3])
            height = float(parts[4])
            bboxes.append([class_id, x_center, y_center, width, height])
        return bboxes

    # 保存YOLO标注文件
    def save_yolo_txt(self, file_name, save_folder, bboxes):
        with open(os.path.join(save_folder, file_name), 'w') as f:
            for bbox in bboxes:
                f.write(f"{int(bbox[0])} {bbox[1]:.6f} {bbox[2]:.6f} {bbox[3]:.6f} {bbox[4]:.6f}\n")

    # 保存图片结果
    def save_img(self, save_path, img):
        cv2.imwrite(save_path, img)


if __name__ == '__main__':
    need_aug_num = 5  # 每张图片需要增强的次数

    toolhelper = ToolHelper()  # 工具类
    dataAug = DataAugmentForObjectDetection()  # 数据增强工具类

    # 获取相关参数
    parser = argparse.ArgumentParser()
    parser.add_argument('--source_img_txt_path', type=str, default='data')
    parser.add_argument('--save_img_txt_path', type=str, default='data2')
    args = parser.parse_args()
    source_img_txt_path = args.source_img_txt_path  # 图片和txt文件原始位置
    save_img_txt_path = args.save_img_txt_path  # 图片增强结果保存文件夹

    # 如果保存文件夹不存在就创建
    if not os.path.exists(save_img_txt_path):
        os.mkdir(save_img_txt_path)

    for parent, _, files in os.walk(source_img_txt_path):
        files.sort()  # 排序一下
        for file in files:
            if file.endswith('jpg') or file.endswith('png'):  # 如样本是其他格式，需要自行进行补充
                cnt = 0
                pic_path = os.path.join(parent, file)
                txt_path = os.path.join(parent, file[:-4] + '.txt')
                if not os.path.exists(txt_path):
                    print(f"标注文件 {txt_path} 不存在，跳过该图片")
                    continue
                bboxes = toolhelper.parse_yolo_txt(txt_path)  # 读取YOLO格式的标注信息
                img = cv2.imread(pic_path)

                while cnt < need_aug_num:  # 继续增强
                    auged_img, auged_bboxes = dataAug.dataAugment(deepcopy(img), deepcopy(bboxes))
                    img_name = '{}_{}{}'.format(file[:-4], cnt + 1, file[-4:])  # 图片保存的信息
                    img_save_path = os.path.join(save_img_txt_path, img_name)
                    toolhelper.save_img(img_save_path, auged_img)  # 保存增强图片

                    txt_name = f"{file[:-4]}_{cnt + 1}.txt"
                    toolhelper.save_yolo_txt(txt_name, save_img_txt_path, auged_bboxes)  # 保存YOLO格式的标注文件
                    print(f"保存增强图片和标注文件：{img_name} 和 {txt_name}")
                    cnt += 1  # 继续增强下一张