在学习yolo过程中,我有一个突发奇想,想去识别一个旋转不同角度的图标。并且新手试验,证明是非常可行的。
思路:准备一个待识别的图标,要求图标背景最好是透明色;生成一个640X640图标,中心位置放置待识别的图标,并计算图标在图像中的坐标位置;通过python生成图标的旋转图标及四个角点的坐标,并保存下来;使用yolo工程训练;测试。
- 准备一个待识别的图标
- 用画图生成一个640X640的图像,并记录图标四个角点的坐标。如果以图像左上角为原点,图标的四个角点坐标是(288,286) ,(288,354),(344,354),(344,286)。如果以图标中心点为原点,四个角点坐标是(-32,-34),(-32, 34),(24, 34),(24,-34)
- python脚本旋转,以图标中心点旋转。图像旋转的计算公式:i′=icosθ−jsinθ, j′=isinθ+jcosθ,转化成代码
def calc_coordinate(x, y, angle): PI = math.pi a = angle * PI / 180.0 xp = x * math.cos(a) - y * math.sin(a) + 320 yp = x * math.sin(a) + y * math.cos(a) + 320 return (xp, yp) - 同过随机数位移图标,让图标在图像中的不同位置
def move(image, x0, y0, x1, y1, angle): i = 0 width = abs(x0 - x1) height = abs(y0 - y1) while i < 10: rx = random.randint(0, 553) ry = random.randint(0, 553) #new_matrix = image.copy() new_matrix = np.zeros((640,640,3), np.uint8) new_matrix[:,:] = (255,255,255) print("move {} {} {} {}".format(rx, rx + width, ry, ry + height)) print("width {} height {}".format(width, height)) print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width)) print("== {} {} {} {}".format(ry, ry + height, rx, rx + width)) new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width] choice = random.random() if choice < 0.9: image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i) anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i) else: image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i) anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i) #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1) cv2.imwrite(image_file_name, new_matrix) convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height) i += 1 - 通过循环生成不同角度的图像
def rotate(xlist, ylist): # 读取图像 image = cv2.imread("pic/qie.png") (h, w) = image.shape[:2] print("width {}, height {}".format(w//2, h//2)) # 设置旋转中心和角度(示例:中心旋转45度) center = (w//2 - 4, h // 2 ) angle = 145 scale = 1.0 # 缩放比例 #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1) # 定义矩形区域的左上角坐标和宽度、高度 # x, y, w, h = 800, 160, 150, 150 # 例如,从(100, 100)开始,宽度150,高度150 # 裁剪矩形区域 # roi = image[y:y+h, x:x+w] # cv2.imwrite("roi.jpg", roi) # image[0:h, 0:w] = roi # cv2.imwrite("input2.jpg", image) angle = 0 while angle < 360: # 获取旋转矩阵 rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale) # 执行旋转 rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h)) xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle) print("{} {} {} {}".format(xmin, ymin, xmax, ymax)) print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax))) #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1) move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle) # 保存结果 cv2.imwrite("rotated_qie.jpg", rotated_image) angle += 1 - 将第三点旋转得到的坐标转化成yolo标签
def convert_yolo_anno(file_path, x0, y0, x1, y1): imgae_width = 640 image_height = 640 x_center = (float(x0) + float(x1)) / 2.0 y_center = (float(y0) + float(y1)) / 2.0 x = x_center / float(imgae_width) y = y_center / float(image_height) w = abs(float(x1) - float(x0)) / float(imgae_width) h = abs(float(y1) - float(y0)) / float(image_height) dm_type = 0 qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h) write_file(file_path, qie_label) - 新建两个目录,qie_images_train和qie_images_val, 以便存放图像
- 整个脚本代码
import cv2 import math import random import os import numpy as np def convert_yolo_anno(file_path, x0, y0, x1, y1): imgae_width = 640 image_height = 640 x_center = (float(x0) + float(x1)) / 2.0 y_center = (float(y0) + float(y1)) / 2.0 x = x_center / float(imgae_width) y = y_center / float(image_height) w = abs(float(x1) - float(x0)) / float(imgae_width) h = abs(float(y1) - float(y0)) / float(image_height) dm_type = 0 qie_label = "{} {} {} {} {}".format(dm_type, x, y, w, h) write_file(file_path, qie_label) def write_file(file_path, qie_label): with open(file_path, mode='w', encoding='utf-8') as file: #写文件 file.write(qie_label) def calc_coordinate(x, y, angle): PI = math.pi a = angle * PI / 180.0 xp = x * math.cos(a) - y * math.sin(a) + 320 yp = x * math.sin(a) + y * math.cos(a) + 320 return (xp, yp) def calc_coordinate_list(xlist, ylist, angle): xp_list = [] yp_list = [] i = 0 while i < 4: xp, yp = calc_coordinate(xlist[i], ylist[i], angle) print("i:{} {}, {}".format(i, xp, yp)) xp_list.append(xp) yp_list.append(yp) i += 1 xmin = min(xp_list) xmax = max(xp_list) ymin = min(yp_list) ymax = max(yp_list) return (xmin, ymin, xmax, ymax) def move(image, x0, y0, x1, y1, angle): i = 0 width = abs(x0 - x1) height = abs(y0 - y1) while i < 10: rx = random.randint(0, 553) ry = random.randint(0, 553) #new_matrix = image.copy() new_matrix = np.zeros((640,640,3), np.uint8) new_matrix[:,:] = (255,255,255) print("move {} {} {} {}".format(rx, rx + width, ry, ry + height)) print("width {} height {}".format(width, height)) print("-- {} {} {} {}".format(y0, y0 + height, x0, x0 + width)) print("== {} {} {} {}".format(ry, ry + height, rx, rx + width)) new_matrix[ry: ry + height, rx: rx + width] = image[ y0: y0 + height, x0: x0 + width] choice = random.random() if choice < 0.9: image_file_name = "qie_images_train/img_{}_{}.jpg".format(angle, i) anno_file_name = "qie_images_train/img_{}_{}.txt".format(angle, i) else: image_file_name = "qie_images_val/img_{}_{}.jpg".format(angle, i) anno_file_name = "qie_images_val/img_{}_{}.txt".format(angle, i) #cv2.rectangle(new_matrix, (rx, ry, width, height), (0, 0, 155), 1) cv2.imwrite(image_file_name, new_matrix) convert_yolo_anno(anno_file_name, rx, ry, rx + width, ry + height) i += 1 def rotate(xlist, ylist): # 读取图像 image = cv2.imread("pic/qie.png") (h, w) = image.shape[:2] print("width {}, height {}".format(w//2, h//2)) # 设置旋转中心和角度(示例:中心旋转45度) center = (w//2 - 4, h // 2 ) angle = 145 scale = 1.0 # 缩放比例 #cv2.rectangle(image, (int(288), int(286)), (int(348), int(354)), (0, 0, 255), 1) # 定义矩形区域的左上角坐标和宽度、高度 # x, y, w, h = 800, 160, 150, 150 # 例如,从(100, 100)开始,宽度150,高度150 # 裁剪矩形区域 # roi = image[y:y+h, x:x+w] # cv2.imwrite("roi.jpg", roi) # image[0:h, 0:w] = roi # cv2.imwrite("input2.jpg", image) angle = 0 while angle < 360: # 获取旋转矩阵 rotation_matrix = cv2.getRotationMatrix2D(center, angle, scale) # 执行旋转 rotated_image = cv2.warpAffine(image, rotation_matrix, (w, h)) xmin, ymin, xmax, ymax = calc_coordinate_list(xlist, ylist, angle) print("{} {} {} {}".format(xmin, ymin, xmax, ymax)) print("--{} {} {} {}".format(int(xmin), int(ymin), int(xmax), int(ymax))) #cv2.rectangle(rotated_image, (round(xmin), round(ymin)), (round(xmax), round(ymax)), (0, 255, 0), 1) move(rotated_image, int(xmin), int(ymin), int(xmax), int(ymax), angle) # 保存结果 cv2.imwrite("rotated_qie.jpg", rotated_image) angle += 1 if __name__ == "__main__": Xlist = [-32, -32, 24, 24] Ylist = [-34, 34, 34, -34] #Xlist = [-28, -28, 28, 28] #Ylist = [-34, 34, 34, -34] rotate(Xlist, Ylist)运行该脚本可以得到3600张不同旋转、位移的图像,可用于yolo训练
- 搭建yolo工程和环境
- yolo训练
import os import sys os.environ['PYTHONPATH']='/home/yangkm/studio/yolov8' sys.path.insert(0, "/home/yangkm/studio/yolov8") # 现在就可以导入Yolo类了 from ultralytics import YOLO model = YOLO('yolov8n.pt') #新模型,网络 # Train the model model.train( data='./ultralytics/cfg/datasets/dm.yaml', batch=280, epochs=320, imgsz=640, device=[0,1,2,3,4,5,6,7], workers=4, val=False, task='detect', patience=0 #,resume=True )./ultralytics/cfg/datasets/dm.yaml指定训练数据的目录:
# path: ../datasets/dota8 # dataset root dir #train: /home/yangkm/plate_face_dataset/new_train_data # train images (relative to 'path') 4 images #train:['/home/yangkm/plate_face_dataset/new_train_data','/home/yangkm/plate_face_dataset/ccpd_challenge_train'] #val: /home/yangkm/plate_face_dataset/new_val_data # val images (relative to 'path') 4 images #val:['/home/yangkm/plate_face_dataset/new_val_data','/home/yangkm/plate_face_dataset/ccpd_challenge_val'] train: #- /home/yangkm/plate_face_dataset/wider_face_train_data - /home/yangkm/dm_script/test/qie_images_train val: - /home/yangkm/dm_script/test/qie_images_val # Classes for DOTA 1.0 names: 0: qie - 测试
yolo predict model=qie_10_08.pt source=qie_test save_txt conf=0.2

9625

被折叠的 条评论
为什么被折叠?



