Anylabeling自动标记、转YOLO的label格式和按比例分数据集
·
1. Anylabeling自动标记
使用以下代码
import numpy as np
import torch
import matplotlib.pyplot as plt
import cv2
import json
import sys
import os
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
def segment(imgPath):
# 通过opencv图取图像
print("开始读图像")
image = cv2.imread(imgPath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 添加当前系统路径,添加模型文件路径
sys.path.append("..")
sam_checkpoint = "sam_vit_l_0b3195.pth"
model_type = "vit_l"
#设置运行推理的设备
device = "cuda"
# 创建sam模型推理对象
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)
mask_generator = SamAutomaticMaskGenerator(sam)
masks = mask_generator.generate(image)
# 给分割出来的物体上色,显示分割效果
show_anns(masks, imgPath)
torch.cuda.empty_cache()
def show_anns(anns, imgPath):
if len(anns) == 0:
return
# 对检测结果的字典对象进行排序
sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
ax = plt.gca()
ax.set_autoscale_on(False)
img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
img[:,:,3] = 0
shapes = []
for ann in sorted_anns:
# 过滤面积比较小的物体
if ann['area'] >=10:
# 创建labelme格式
tempData = {"label": "air", ###### 需要修改标签名 ###########
"points": [],
"group_id": None,
"shape_type": "polygon",
"flags": {}
}
# 获取分割物体掩膜
m = ann['segmentation']
# 找出物体轮廓
objImg = np.zeros((m.shape[0], m.shape[1], 1), np.uint8)
objImg[m] = 255
contours, hierarchy = cv2.findContours(objImg, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 找出轮廓最大的
max_area = 0
maxIndex = 0
for i in range(0, len(contours)):
area = cv2.contourArea(contours[i])
if area >= max_area:
max_area = area
maxIndex = i
# 将每个物体轮廓点数限制在一定范围内
if len(contours[maxIndex]) >=30:
contours = list(contours[maxIndex])
contours = contours[::int(len(contours)/30)]
else:
contours = list(contours[maxIndex])
# 向labelme数据格式中添加轮廓点
for point in contours:
tempData["points"].append([int(point[0][0]), int(point[0][1])])
# 添加物体标注信息
shapes.append(tempData)
# 在彩色图像上标出物体
color_mask = np.concatenate([np.random.random(3), [1]])
img[m] = color_mask
jsonPath = imgPath.replace(".bmp", ".json") # 需要生成的文件路径
print(jsonPath) #
# 分离文件最后的名
split_path = imgPath.split("//")
title = split_path[-1]
print("title为", title)
# 创建json文件
file_out = open(jsonPath, "w")
# 载入json文件
jsonData = {}
# 写入,修改json文件
jsonData["version"] = "5.2.1"
jsonData["flags"] = {}
jsonData["shapes"] = shapes
jsonData["imagePath"] = title
jsonData["imageData"] = None
jsonData["imageHeight"] = sorted_anns[0]['segmentation'].shape[0]
jsonData["imageWidth"] = sorted_anns[0]['segmentation'].shape[1]
# 保存json文件
file_out.write(json.dumps(jsonData, indent=4)) # 保存文件
# 关闭json文件
file_out.close()
ax.imshow(img)
if __name__ == '__main__':
# dir_path = r"20230922//aggregate//" #该py文件路径下
dir_path = r"20230927//air//" ###修改文件的路径####
for image_name in os.listdir(dir_path):
img_path = os.path.join(dir_path, image_name)
print(img_path)
# print(image_name)
if img_path.endswith('.bmp'):
segment(img_path)
2.使用Anylabeing或者labelme打开核对标记
1.用anylabeling打开
在Python的终端输入anylabeling
即可打开
模型加载不出来,可参考这篇博客。
打开文件夹之后如图所示。
但是发现删除图层的话,对于很小的图层会出现选不中的问题,可使用放大图片在选中的方式。但是创建图层很方便。
1.1 创建图层

2.用labelme打开
终端输入labelme,打开labelme工具,选择OpenDir,选择所对应的文件目录。


点击红色图层,按键盘上的delete可删除。会有部分图像没有图层,点击Create Polygons创建图层。
但是这种创建图层的方式又太麻烦,没有anylabeling创建图层的方式简单。
3.修改标签label名称(可选)

4. Jons文件格式转txt格式
标记结束后就会出现下述情况:图片和json文件在同一个文件夹。
将json文件转换为txt文件,使用以下代码
import os
import json
import numpy as np
# 类和索引
CLASSES=["brick","aggregate","air","other"]
def convert_2(size,box):
w = (box[1] - box[0]) / size[0]
h = (box[3] - box[2]) / size[1]
x = (box[0]+box[1]) /2 /size[0]
y = (box[2]+box[3]) /2 /size[1]
return (x,y,w,h)
def json2txt(path_json,path_txt):
with open(path_json,"r") as path_json:
jsonx=json.load(path_json)
width=int(jsonx["imageWidth"]) # 原图的宽
height=int(jsonx["imageHeight"]) # 原图的高
with open(path_txt,"w+") as ftxt:
# 遍历每一个bbox对象
for shape in jsonx["shapes"]:
obj_cls=str(shape["label"]) # 获取类别
cls_id=CLASSES.index(obj_cls) # 获取类别索引
points=np.array(shape["points"]) # 获取(x1,y1,x2,y2)
#print(points)
xmin = int(points[0][0])
xmax = int(points[0][0])
ymin = int(points[0][1])
ymax = int(points[0][1])
for point in shape["points"]:
if point[0] > xmax:
xmax = point[0]
if point[0] < xmin:
xmin = point[0]
if point[1] > ymax:
ymax = point[1]
if point[1] < ymin:
ymin = point[1]
#print(xmin,xmax,ymin,ymax)
# (左上角,右下角) -> (中心点,宽高) 归一化
bb=convert_2((width,height),(xmin,xmax,ymin,ymax))
ftxt.write(str(cls_id)+" "+" ".join([str(a) for a in bb])+"\n")#.join()
if __name__=="__main__":
# json文件夹
dir_json="E:\\dataset\\BrickConcreteDataSet\\20230927\\air\\"
# txt文件夹
dir_txt="E:\\dataset\BrickConcreteDataSet\\20230927\\lables_air\\"
if not os.path.exists(dir_txt):
os.makedirs(dir_txt)
# 得到所有json文件
list_json=os.listdir(dir_json)
# 遍历每一个json文件,转成txt文件
for cnt, json_name in enumerate(list_json):
#print("cnt=%d,name=%s"%(cnt, json_name))
file,fileext = os.path.splitext(json_name)
if(fileext == ".json"):
path_json = dir_json + json_name
path_txt= dir_txt + json_name.replace(".json",".txt")
# (x1,y1,x2,y2)->(x,y,w,h)
json2txt(path_json,path_txt)
else:
pass
运行后可生成txt文件。
5. 按比例分数据集
将标记的数据集按比例分成训练集,验证集和测试集。
将图片和标签分别存放在不同的文件夹内。运行以下代码,可生成不同比例的数据集。
import os
import shutil
import random
random.seed(0)
def split_data(file_path,xml_path, new_file_path, train_rate, val_rate, test_rate):
each_class_image = []
each_class_label = []
for image in os.listdir(file_path):
each_class_image.append(image)
for label in os.listdir(xml_path):
each_class_label.append(label)
data=list(zip(each_class_image,each_class_label))
total = len(each_class_image)
random.shuffle(data)
each_class_image,each_class_label=zip(*data)
train_images = each_class_image[0:int(train_rate * total)]
val_images = each_class_image[int(train_rate * total):int((train_rate + val_rate) * total)]
test_images = each_class_image[int((train_rate + val_rate) * total):]
train_labels = each_class_label[0:int(train_rate * total)]
val_labels = each_class_label[int(train_rate * total):int((train_rate + val_rate) * total)]
test_labels = each_class_label[int((train_rate + val_rate) * total):]
for image in train_images:
print(image)
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'train' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path)
for label in train_labels:
print(label)
old_path = xml_path + '/' + label
new_path1 = new_file_path + '/' + 'train' + '/' + 'labels'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + label
shutil.copy(old_path, new_path)
for image in val_images:
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'val' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path)
for label in val_labels:
old_path = xml_path + '/' + label
new_path1 = new_file_path + '/' + 'val' + '/' + 'labels'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + label
shutil.copy(old_path, new_path)
for image in test_images:
old_path = file_path + '/' + image
new_path1 = new_file_path + '/' + 'test' + '/' + 'images'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + image
shutil.copy(old_path, new_path)
for label in test_labels:
old_path = xml_path + '/' + label
new_path1 = new_file_path + '/' + 'test' + '/' + 'labels'
if not os.path.exists(new_path1):
os.makedirs(new_path1)
new_path = new_path1 + '/' + label
shutil.copy(old_path, new_path)
if __name__ == '__main__':
file_path = "E:/VOCdata/image"//存放图片的文件路径
xml_path = 'E:/VOCdata/labels'//存放标签的文件路径
new_file_path = "E:/VOCdevkit"//生成数据集的路径
split_data(file_path,xml_path, new_file_path, train_rate=0.7, val_rate=0.2, test_rate=0.1)
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐


所有评论(0)