怎么用yolo识别旋转的图标？

原创于 2025-10-20 10:57:36 发布 · 717 阅读

20 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#yolo #图像处理 #人工智能

CLIP-GmP-ViT-L-14编码模型

CLIP-GmP-ViT-L-14编码模型` 是一个图文双塔编码模型，适合做图文匹配、零样本分类和跨模态检索演示。本镜像已经完成 Web 部署，打开页面即可上传图片并测试图文表征能力

在学习yolo过程中，我有一个突发奇想，想去识别一个旋转不同角度的图标。并且新手试验，证明是非常可行的。

思路：准备一个待识别的图标，要求图标背景最好是透明色；生成一个640X640图标，中心位置放置待识别的图标，并计算图标在图像中的坐标位置；通过python生成图标的旋转图标及四个角点的坐标，并保存下来；使用yolo工程训练；测试。

准备一个待识别的图标
用画图生成一个640X640的图像，并记录图标四个角点的坐标。如果以图像左上角为原点，图标的四个角点坐标是（288,286），（288,354），（344,354），（344,286）。如果以图标中心点为原点，四个角点坐标是（-32，-34），（-32, 34），（24， 34），（24，-34）

python脚本旋转，以图标中心点旋转。图像旋转的计算公式：i′=icosθ−jsinθ， j′=isinθ+jcosθ，转化成代码

def calc_coordinate(x, y, angle):
    PI = math.pi
    a = angle * PI / 180.0
    xp = x * math.cos(a) - y * math.sin(a) + 320
    yp = x * math.sin(a) + y * math.cos(a) + 320

    return (xp, yp)

同过随机数位移图标，让图标在图像中的不同位置

def move(image, x0, y0, x1, y1, angle):
    i = 0
    width = abs(x0 - x1)
    height = abs(y0 - y1)
    while i < 10:
        rx = random.randint(0, 553)
        ry = random.randint(0, 553)
        #new_matrix = image.copy()
        new_matrix = np.zeros((640,640,3), np.uint8)
        new_matrix[:,:] = (255,255,255)

        print("move {} {} {} {}".format(rx, rx + width, ry, ry + height))
        print("width {} height {}".format(width, height))
        print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width))
        print("== {} {} {} {}".format(ry, ry + height, rx, rx + width))
        new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width]

        choice = random.random()
        if choice < 0.9:
            image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i)
            anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i)
        else:
            image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i)
            anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i)


        #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1)
        cv2.imwrite(image_file_name, new_matrix)
        convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height)
        i += 1

通过循环生成不同角度的图像

def rotate(xlist, ylist):
    # 读取图像
    image = cv2.imread("pic/qie.png")
    (h, w) = image.shape[:2]
    print("width {}, height {}".format(w//2, h//2))

    # 设置旋转中心和角度（示例：中心旋转45度）
    center = (w//2 - 4, h // 2 )
    angle = 145
    scale = 1.0  # 缩放比例

    #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1)

    # 定义矩形区域的左上角坐标和宽度、高度
    # x, y, w, h = 800, 160, 150, 150  # 例如，从(100, 100)开始，宽度150，高度150

    # 裁剪矩形区域
    # roi = image[y:y+h, x:x+w]
    # cv2.imwrite("roi.jpg", roi)
    # image[0:h, 0:w] = roi
    # cv2.imwrite("input2.jpg", image)
    angle = 0
    while angle < 360:
        # 获取旋转矩阵
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
        # 执行旋转
        rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
        xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle)
        print("{} {} {} {}".format(xmin, ymin, xmax, ymax))
        print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax)))
        #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1)
        move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle)
        # 保存结果
        cv2.imwrite("rotated_qie.jpg", rotated_image)
        angle += 1

将第三点旋转得到的坐标转化成yolo标签

def convert_yolo_anno(file_path, x0, y0, x1, y1):
    imgae_width = 640
    image_height = 640
    x_center = (float(x0) + float(x1)) / 2.0
    y_center = (float(y0) + float(y1)) / 2.0
    x = x_center / float(imgae_width)
    y = y_center / float(image_height)
    w = abs(float(x1) - float(x0)) / float(imgae_width)
    h = abs(float(y1) - float(y0)) / float(image_height)

    dm_type = 0
    qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h)
    write_file(file_path, qie_label)

新建两个目录，qie_images_train和qie_images_val, 以便存放图像

整个脚本代码

import cv2
import math
import random
import os
import numpy as np

def convert_yolo_anno(file_path, x0, y0, x1, y1):
    imgae_width = 640
    image_height = 640
    x_center = (float(x0) + float(x1)) / 2.0
    y_center = (float(y0) + float(y1)) / 2.0
    x = x_center / float(imgae_width)
    y = y_center / float(image_height)
    w = abs(float(x1) - float(x0)) / float(imgae_width)
    h = abs(float(y1) - float(y0)) / float(image_height)

    dm_type = 0
    qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h)
    write_file(file_path, qie_label)


def write_file(file_path,  qie_label):
    with open(file_path, mode='w', encoding='utf-8') as file: #写文件
        file.write(qie_label)


def calc_coordinate(x, y, angle):
    PI = math.pi
    a = angle * PI / 180.0
    xp = x * math.cos(a) - y * math.sin(a) + 320
    yp = x * math.sin(a) + y * math.cos(a) + 320

    return (xp, yp)

def calc_coordinate_list(xlist, ylist, angle):
    xp_list = []
    yp_list = []

    i = 0
    while i < 4:
        xp, yp = calc_coordinate(xlist[i], ylist[i], angle)
        print("i:{} {}, {}".format(i, xp, yp))
        xp_list.append(xp)
        yp_list.append(yp)
        i += 1

    xmin = min(xp_list)
    xmax = max(xp_list)
    ymin = min(yp_list)
    ymax = max(yp_list)

    return (xmin, ymin, xmax, ymax)
def move(image, x0, y0, x1, y1, angle):
    i = 0
    width = abs(x0 - x1)
    height = abs(y0 - y1)
    while i < 10:
        rx = random.randint(0, 553)
        ry = random.randint(0, 553)
        #new_matrix = image.copy()
        new_matrix = np.zeros((640,640,3), np.uint8)
        new_matrix[:,:] = (255,255,255)

        print("move {} {} {} {}".format(rx, rx + width, ry, ry + height))
        print("width {} height {}".format(width, height))
        print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width))
        print("== {} {} {} {}".format(ry, ry + height, rx, rx + width))
        new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width]

        choice = random.random()
        if choice < 0.9:
            image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i)
            anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i)
        else:
            image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i)
            anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i)


        #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1)
        cv2.imwrite(image_file_name, new_matrix)
        convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height)
        i += 1



def rotate(xlist, ylist):
    # 读取图像
    image = cv2.imread("pic/qie.png")
    (h, w) = image.shape[:2]
    print("width {}, height {}".format(w//2, h//2))

    # 设置旋转中心和角度（示例：中心旋转45度）
    center = (w//2 - 4, h // 2 )
    angle = 145
    scale = 1.0  # 缩放比例

    #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1)

    # 定义矩形区域的左上角坐标和宽度、高度
    # x, y, w, h = 800, 160, 150, 150  # 例如，从(100, 100)开始，宽度150，高度150

    # 裁剪矩形区域
    # roi = image[y:y+h, x:x+w]
    # cv2.imwrite("roi.jpg", roi)
    # image[0:h, 0:w] = roi
    # cv2.imwrite("input2.jpg", image)
    angle = 0
    while angle < 360:
        # 获取旋转矩阵
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
        # 执行旋转
        rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
        xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle)
        print("{} {} {} {}".format(xmin, ymin, xmax, ymax))
        print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax)))
        #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1)
        move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle)
        # 保存结果
        cv2.imwrite("rotated_qie.jpg", rotated_image)
        angle += 1

if __name__ == "__main__":
    Xlist = [-32, -32, 24, 24]
    Ylist = [-34, 34, 34, -34]
    #Xlist = [-28, -28, 28, 28]
    #Ylist = [-34, 34, 34, -34]
    rotate(Xlist, Ylist)

运行该脚本可以得到3600张不同旋转、位移的图像，可用于yolo训练

搭建yolo工程和环境

yolo训练

import os
import sys

os.environ['PYTHONPATH']='/home/yangkm/studio/yolov8'
sys.path.insert(0, "/home/yangkm/studio/yolov8")

# 现在就可以导入Yolo类了
from ultralytics import YOLO

model = YOLO('yolov8n.pt') #新模型,网络

# Train the model
model.train(
    data='./ultralytics/cfg/datasets/dm.yaml',
    batch=280, epochs=320, imgsz=640, device=[0,1,2,3,4,5,6,7], workers=4,
    val=False, task='detect', patience=0
    #,resume=True
)

./ultralytics/cfg/datasets/dm.yaml指定训练数据的目录：

# path: ../datasets/dota8 # dataset root dir
#train: /home/yangkm/plate_face_dataset/new_train_data  # train images (relative to 'path') 4 images
#train:['/home/yangkm/plate_face_dataset/new_train_data','/home/yangkm/plate_face_dataset/ccpd_challenge_train']
#val: /home/yangkm/plate_face_dataset/new_val_data   # val images (relative to 'path') 4 images
#val:['/home/yangkm/plate_face_dataset/new_val_data','/home/yangkm/plate_face_dataset/ccpd_challenge_val']

train:
  #- /home/yangkm/plate_face_dataset/wider_face_train_data
- /home/yangkm/dm_script/test/qie_images_train

val:
- /home/yangkm/dm_script/test/qie_images_val

# Classes for DOTA 1.0
names:
  0: qie

测试

yolo predict model=qie_10_08.pt source=qie_test save_txt conf=0.2

您可能感兴趣的与本文相关的镜像

CLIP-GmP-ViT-L-14编码模型

图像识别

CLIP