- 使用labelme打标,得到json文件
- 把所有json文件放到一个单独的文件夹,里面只有json文件
- 使用脚本,把json里面的label,标注框的中心坐标、宽、高提取出来,注意这里的4个值都按照图像大小压缩了。
脚本如下:
import json
import os
def read_json(json_file):
with open(json_file,'r') as f:
load_dict = json.load(f)
f.close()
return load_dict
def json2txt(json_path,txt_path):
for json_file in os.listdir(json_path):
txt_name = txt_path+json_file[0:-5]+'.txt'
txt_file = open(txt_name, 'w')
json_file_path = os.path.join(json_path,json_file)
json_data = read_json(json_file_path)
imageWidth = json_data['imageWidth']
imageHeight = json_data['imageHeight']
for i in range(len(json_data['shapes'])):
label = json_data['shapes'][i]['label']
if label=='Lesions':
index=0
else:
index=1
x1 = json_data['shapes'][i]['points'][0][0]
x2 = json_data['shapes'][i]['points'][1][0]
y1 = json_data['shapes'][i]['points'][0][1]
y2 = json_data['shapes'][i]['points'][1][1]
#将标注框按照图像大小压缩
x_center = (x1+x2)/2/imageWidth
y_center = (y1+y2)/2/imageHeight
bbox_w = (x2-x1)/imageWidth
bbox_h = (y2-y1)/imageHeight
bbox = (x_center,y_center,bbox_w,bbox_h)
txt_file.write( str(index) + " " + " ".join([str(a) for a in bbox]) + '\n')
print(label)
if __name__ == "__main__":
json_path = 'H:\images_json'
txt_path = 'data/'
json2txt(json_path,txt_path)
json_path是你放置json文件的文件夹路径
txt_path是你放置转化成txt的存储位置
这里的label改成你自己打标的类别,我这里这么写是因为我一张图里有两个类别
划分数据集代码如下,只有image和label要改,改成你自己放图片和txt的路径就行
# -*- coding: utf-8 -*-
"""
将数据集划分为训练集,验证集,测试集
"""
import os
import random
import shutil
# 创建保存数据的文件夹
def makedir(new_dir):
if not os.path.exists(new_dir):
os.makedirs(new_dir)
def split_data(img_dir,label_dir):
random.seed(1) # 随机种子
# 1.确定原图片数据集路径
datasetimg_dir = img_dir
#确定原label数据集路径
datasetlabel_dir = label_dir
# 2.确定数据集划分后保存的路径
split_dir = os.path.join(".", "dataset")
train_dir = os.path.join(split_dir, "train")
valid_dir = os.path.join(split_dir, "valid")
test_dir = os.path.join(split_dir, "test")
dir_list = [train_dir,valid_dir,test_dir]
image_label = ['images','labels']
for i in range(len(dir_list)):
for j in range(len(image_label)):
makedir(os.path.join(dir_list[i],image_label[j]))
# 3.确定将数据集划分为训练集,验证集,测试集的比例
train_pct = 0.8
valid_pct = 0.1
test_pct = 0.1
# 4.划分
imgs = os.listdir(datasetimg_dir) # 展示目标文件夹下所有的文件名
imgs = list(filter(lambda x: x.endswith('.tif'), imgs)) # 取到所有以.png结尾的文件,如果改了图片格式,这里需要修改
random.shuffle(imgs) # 乱序路径
img_count = len(imgs) # 计算图片数量
train_point = int(img_count * train_pct) # 0:train_pct
valid_point = int(img_count * (train_pct + valid_pct)) # train_pct:valid_pct
for i in range(img_count):
if i < train_point: # 保存0-train_point的图片到训练集
out_dir = os.path.join(train_dir, 'images')
label_out_dir = os.path.join(train_dir, 'labels')
elif i < valid_point: # 保存train_point-valid_point的图片到验证集
out_dir = os.path.join(valid_dir, 'images')
label_out_dir = os.path.join(valid_dir, 'labels')
else: # 保存test_point-结束的图片到测试集
out_dir = os.path.join(test_dir, 'images')
label_out_dir = os.path.join(test_dir, 'labels')
target_path = os.path.join(out_dir, imgs[i]) # 指定目标保存路径
src_path = os.path.join(datasetimg_dir, imgs[i]) #指定目标原图像路径
label_target_path = os.path.join(label_out_dir, imgs[i][0:-4]+'.txt')
label_src_path = os.path.join(datasetlabel_dir,imgs[i][0:-4]+'.txt')
shutil.copy(src_path, target_path) # 复制图片
shutil.copy(label_src_path, label_target_path) #复制txt
print('train:{}, valid:{}, test:{}'.format(train_point, valid_point-train_point,
img_count-valid_point))
if __name__ == "__main__":
img_dir = './images'
label_dir = './data'
split_data(img_dir,label_dir)
img_dir是原始图片路径,label_dir是原始txt路径。只需要改这俩就能直接划分数据集了,嘎嘎好用。
划分后效果如图所示: