怎么用yolo识别旋转的图标?

CLIP-GmP-ViT-L-14编码模型

CLIP-GmP-ViT-L-14编码模型` 是一个图文双塔编码模型,适合做图文匹配、零样本分类和跨模态检索演示。本镜像已经完成 Web 部署,打开页面即可上传图片并测试图文表征能力

在学习yolo过程中,我有一个突发奇想,想去识别一个旋转不同角度的图标。并且新手试验,证明是非常可行的。

思路:准备一个待识别的图标,要求图标背景最好是透明色;生成一个640X640图标,中心位置放置待识别的图标,并计算图标在图像中的坐标位置;通过python生成图标的旋转图标及四个角点的坐标,并保存下来;使用yolo工程训练;测试。

  1. 准备一个待识别的图标
  2. 用画图生成一个640X640的图像,并记录图标四个角点的坐标。如果以图像左上角为原点,图标的四个角点坐标是(288,286) ,(288,354),(344,354),(344,286)。如果以图标中心点为原点,四个角点坐标是(-32,-34),(-32,  34),(24, 34),(24,-34)
  3. python脚本旋转,以图标中心点旋转。图像旋转的计算公式:i′=icosθ−jsinθ, j′=isinθ+jcosθ,转化成代码
    def calc_coordinate(x, y, angle):
        PI = math.pi
        a = angle * PI / 180.0
        xp = x * math.cos(a) - y * math.sin(a) + 320
        yp = x * math.sin(a) + y * math.cos(a) + 320
    
        return (xp, yp)
    

  4. 同过随机数位移图标,让图标在图像中的不同位置
    def move(image, x0, y0, x1, y1, angle):
        i = 0
        width = abs(x0 - x1)
        height = abs(y0 - y1)
        while i < 10:
            rx = random.randint(0, 553)
            ry = random.randint(0, 553)
            #new_matrix = image.copy()
            new_matrix = np.zeros((640,640,3), np.uint8)
            new_matrix[:,:] = (255,255,255)
    
            print("move {} {} {} {}".format(rx, rx + width, ry, ry + height))
            print("width {} height {}".format(width, height))
            print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width))
            print("== {} {} {} {}".format(ry, ry + height, rx, rx + width))
            new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width]
    
            choice = random.random()
            if choice < 0.9:
                image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i)
                anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i)
            else:
                image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i)
                anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i)
    
    
            #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1)
            cv2.imwrite(image_file_name, new_matrix)
            convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height)
            i += 1
    
    

  5. 通过循环生成不同角度的图像
    def rotate(xlist, ylist):
        # 读取图像
        image = cv2.imread("pic/qie.png")
        (h, w) = image.shape[:2]
        print("width {}, height {}".format(w//2, h//2))
    
        # 设置旋转中心和角度(示例:中心旋转45度)
        center = (w//2 - 4, h // 2 )
        angle = 145
        scale = 1.0  # 缩放比例
    
        #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1)
    
        # 定义矩形区域的左上角坐标和宽度、高度
        # x, y, w, h = 800, 160, 150, 150  # 例如,从(100, 100)开始,宽度150,高度150
    
        # 裁剪矩形区域
        # roi = image[y:y+h, x:x+w]
        # cv2.imwrite("roi.jpg", roi)
        # image[0:h, 0:w] = roi
        # cv2.imwrite("input2.jpg", image)
        angle = 0
        while angle < 360:
            # 获取旋转矩阵
            rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
            # 执行旋转
            rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
            xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle)
            print("{} {} {} {}".format(xmin, ymin, xmax, ymax))
            print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax)))
            #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1)
            move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle)
            # 保存结果
            cv2.imwrite("rotated_qie.jpg", rotated_image)
            angle += 1
    

  6. 将第三点旋转得到的坐标转化成yolo标签
    def convert_yolo_anno(file_path, x0, y0, x1, y1):
        imgae_width = 640
        image_height = 640
        x_center = (float(x0) + float(x1)) / 2.0
        y_center = (float(y0) + float(y1)) / 2.0
        x = x_center / float(imgae_width)
        y = y_center / float(image_height)
        w = abs(float(x1) - float(x0)) / float(imgae_width)
        h = abs(float(y1) - float(y0)) / float(image_height)
    
        dm_type = 0
        qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h)
        write_file(file_path, qie_label)
    
    

  7. 新建两个目录,qie_images_train和qie_images_val, 以便存放图像
  8. 整个脚本代码
    import cv2
    import math
    import random
    import os
    import numpy as np
    
    def convert_yolo_anno(file_path, x0, y0, x1, y1):
        imgae_width = 640
        image_height = 640
        x_center = (float(x0) + float(x1)) / 2.0
        y_center = (float(y0) + float(y1)) / 2.0
        x = x_center / float(imgae_width)
        y = y_center / float(image_height)
        w = abs(float(x1) - float(x0)) / float(imgae_width)
        h = abs(float(y1) - float(y0)) / float(image_height)
    
        dm_type = 0
        qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h)
        write_file(file_path, qie_label)
    
    
    def write_file(file_path,  qie_label):
        with open(file_path, mode='w', encoding='utf-8') as file: #写文件
            file.write(qie_label)
    
    
    def calc_coordinate(x, y, angle):
        PI = math.pi
        a = angle * PI / 180.0
        xp = x * math.cos(a) - y * math.sin(a) + 320
        yp = x * math.sin(a) + y * math.cos(a) + 320
    
        return (xp, yp)
    
    def calc_coordinate_list(xlist, ylist, angle):
        xp_list = []
        yp_list = []
    
        i = 0
        while i < 4:
            xp, yp = calc_coordinate(xlist[i], ylist[i], angle)
            print("i:{} {}, {}".format(i, xp, yp))
            xp_list.append(xp)
            yp_list.append(yp)
            i += 1
    
        xmin = min(xp_list)
        xmax = max(xp_list)
        ymin = min(yp_list)
        ymax = max(yp_list)
    
        return (xmin, ymin, xmax, ymax)
    def move(image, x0, y0, x1, y1, angle):
        i = 0
        width = abs(x0 - x1)
        height = abs(y0 - y1)
        while i < 10:
            rx = random.randint(0, 553)
            ry = random.randint(0, 553)
            #new_matrix = image.copy()
            new_matrix = np.zeros((640,640,3), np.uint8)
            new_matrix[:,:] = (255,255,255)
    
            print("move {} {} {} {}".format(rx, rx + width, ry, ry + height))
            print("width {} height {}".format(width, height))
            print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width))
            print("== {} {} {} {}".format(ry, ry + height, rx, rx + width))
            new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width]
    
            choice = random.random()
            if choice < 0.9:
                image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i)
                anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i)
            else:
                image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i)
                anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i)
    
    
            #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1)
            cv2.imwrite(image_file_name, new_matrix)
            convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height)
            i += 1
    
    
    
    def rotate(xlist, ylist):
        # 读取图像
        image = cv2.imread("pic/qie.png")
        (h, w) = image.shape[:2]
        print("width {}, height {}".format(w//2, h//2))
    
        # 设置旋转中心和角度(示例:中心旋转45度)
        center = (w//2 - 4, h // 2 )
        angle = 145
        scale = 1.0  # 缩放比例
    
        #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1)
    
        # 定义矩形区域的左上角坐标和宽度、高度
        # x, y, w, h = 800, 160, 150, 150  # 例如,从(100, 100)开始,宽度150,高度150
    
        # 裁剪矩形区域
        # roi = image[y:y+h, x:x+w]
        # cv2.imwrite("roi.jpg", roi)
        # image[0:h, 0:w] = roi
        # cv2.imwrite("input2.jpg", image)
        angle = 0
        while angle < 360:
            # 获取旋转矩阵
            rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale)
            # 执行旋转
            rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h))
            xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle)
            print("{} {} {} {}".format(xmin, ymin, xmax, ymax))
            print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax)))
            #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1)
            move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle)
            # 保存结果
            cv2.imwrite("rotated_qie.jpg", rotated_image)
            angle += 1
    
    if __name__ == "__main__":
        Xlist = [-32, -32, 24, 24]
        Ylist = [-34, 34, 34, -34]
        #Xlist = [-28, -28, 28, 28]
        #Ylist = [-34, 34, 34, -34]
        rotate(Xlist, Ylist)
    
    

    运行该脚本可以得到3600张不同旋转、位移的图像,可用于yolo训练

  9. 搭建yolo工程和环境
  10. yolo训练
    import os
    import sys
    
    os.environ['PYTHONPATH']='/home/yangkm/studio/yolov8'
    sys.path.insert(0, "/home/yangkm/studio/yolov8")
    
    # 现在就可以导入Yolo类了
    from ultralytics import YOLO
    
    model = YOLO('yolov8n.pt') #新模型,网络
    
    # Train the model
    model.train(
        data='./ultralytics/cfg/datasets/dm.yaml',
        batch=280, epochs=320, imgsz=640, device=[0,1,2,3,4,5,6,7], workers=4,
        val=False, task='detect', patience=0
        #,resume=True
    )
    

    ./ultralytics/cfg/datasets/dm.yaml指定训练数据的目录:

    # path: ../datasets/dota8 # dataset root dir
    #train: /home/yangkm/plate_face_dataset/new_train_data  # train images (relative to 'path') 4 images
    #train:['/home/yangkm/plate_face_dataset/new_train_data','/home/yangkm/plate_face_dataset/ccpd_challenge_train']
    #val: /home/yangkm/plate_face_dataset/new_val_data   # val images (relative to 'path') 4 images
    #val:['/home/yangkm/plate_face_dataset/new_val_data','/home/yangkm/plate_face_dataset/ccpd_challenge_val']
    
    train:
      #- /home/yangkm/plate_face_dataset/wider_face_train_data
    - /home/yangkm/dm_script/test/qie_images_train
    
    val:
    - /home/yangkm/dm_script/test/qie_images_val
    
    # Classes for DOTA 1.0
    names:
      0: qie
    
    

  11. 测试
    yolo predict model=qie_10_08.pt source=qie_test save_txt conf=0.2

您可能感兴趣的与本文相关的镜像

CLIP-GmP-ViT-L-14编码模型

CLIP-GmP-ViT-L-14编码模型

图像识别
CLIP

CLIP-GmP-ViT-L-14编码模型` 是一个图文双塔编码模型,适合做图文匹配、零样本分类和跨模态检索演示。本镜像已经完成 Web 部署,打开页面即可上传图片并测试图文表征能力

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值