700字范文 > 【目标检测】TT100K数据集使用提取标注信息并转换成VOC格式的xml文件或yolo格式的txt文件

【目标检测】TT100K数据集使用提取标注信息并转换成VOC格式的xml文件或yolo格式的txt文件

时间：2022-09-03 06:09:03

1 TT100K 官网

TT100K官网

1.1 数据集介绍

本人下载的是的数据集，训练集 6105张图片，测试集 3071 张图片，每张图片的分辨率为2048 * 2048，共有232 种标志类别（有221种类别）

下载数据集是保证有 100 G 的存储空间

2 下载数据集

可以点击使用教程查看官方教程文档：

点击 Tsinghua-Tencent 100K Annotations (with more classification) 下载数据集压缩包 .zip 文件并解压

3.1 与 3.2 分别是生成 xml 与 txt 格式标注文件的代码，读者可根据需要跳转对应部分。

3.1 在train、test 同级目录生成同名xml文件

3.1.1 代码

'''Description: version: Author: 悠悠青青.Date: -03-27 09:05:43LastEditors: Please set LastEditorsLastEditTime: -03-27 09:20:01'''import osimport json'''人为构造xml文件的格式'''out0 ='''<annotation><filename>%(name)s</filename><source><database>None</database></source><size><width>%(width)d</width><height>%(height)d</height><depth>3</depth></size><segmented>0</segmented>'''out1 = ''' <object><name>%(class)s</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>%(xmin)d</xmin><ymin>%(ymin)d</ymin><xmax>%(xmax)d</xmax><ymax>%(ymax)d</ymax></bndbox></object>'''out2 = '''</annotation>'''def annos2xml(annos, out_dir):if not os.path.exists(out_dir):os.makedirs(out_dir)source = {} # 用于暂存图片名字、尺寸等基本信息h,w = 2048, 2048 # 图片大小image_name = annos["path"] # eg test/10056.jpgout_xml = os.path.join(out_dir, image_name[:-4] + '.xml')fxml = open(out_xml, 'w') # 新建对应 xml 文件source['name'] = image_name# 图片路径source['width'] = w # 图片宽度source['height'] = h # 图片高度fxml.write(out0 % source) # 写入 out0 模板label = {} # 暂存当前图片的 box 信息for obj in annos['objects']:# 遍历每个子列表即原 txt 的每一行label['class'] = obj['category'] # 写入编号对应类别标签'''读取框的坐标'''label['xmin'] = obj['bbox']['xmin']label['ymin'] = obj['bbox']['ymin']label['xmax'] = obj['bbox']['xmax']label['ymax'] = obj['bbox']['ymax']fxml.write(out1 % label) # 写入 out1 模板fxml.write(out2)def main():filedir = "./annotations_all.json"# json文件ids_file = "./train/ids.txt"# 指定为 train/testids = open(ids_file).read().splitlines()# 获取 id 编号annos = json.loads(open(filedir).read())outdir = './'# xml 保存目录img_num = len(ids)# 统计当前图片数量cnt_id = 1# 计数变量for imgid in ids:print('\rprocessing :[{} / {}]'.format(cnt_id ,img_num),end = "")cnt_id += 1# 跳过没有目标的图片if imgid in annos['imgs']:xml_cls = annos['imgs'][imgid]annos2xml(xml_cls, outdir)# 生成 xml 文件''' annos['imgs'][imgid] 示例：{'path': 'test/10056.jpg', 'id': 10056, 'objects': [{'bbox': {'xmin': 452.475, 'ymin': 886.139, 'xmax': 468.3168, 'ymax': 916.8317}, 'category': 'i5'}, {'bbox': {'xmin': 1274.26, 'ymin': 927.723, 'xmax': 1294.0594, 'ymax': 949.505}, 'category': 'i5'}, {'bbox': {'xmin': 414.851, 'ymin': 877.228, 'xmax': 431.6832, 'ymax': 909.901}, 'category': 'pne'}, {'bbox': {'xmin': 1215.84, 'ymin': 928.713, 'xmax': 1237.6237999999998, 'ymax': 950.495}, 'category': 'pne'}, {'bbox': {'xmin': .83, 'ymin': 910.891, 'xmax': 2032.6733, 'ymax': 934.6535}, 'category': 'pne'}]}'''if __name__ == "__main__":main()

3.1.2 生成 xml 如图所示

3.1.3 打开 labelImg 检查是否无误

如图所示，标注正确

笔者第一次读边框数据时没有读对，导致边框乱飞，故此步骤非常有必要

3.2 在train、test 同级目录生成同名 txt 文件

修改参数：

classes: 需要转换的类别，默认为train/test中的所有类别，共 184 类；outdir: txt 生成目录，默认在train/test下，可根据需要指定。

'''Description: version: Author: 悠悠青青.Date: -03-27 09:05:43LastEditors: Please set LastEditorsLastEditTime: -05-17 16:44:15'''import osimport jsonclasses = ['ph5', 'p26', 'pl40', 'pl60', 'pn', 'i5', 'p11', 'pne', 'pcl', 'pl50', 'pcr', 'w55', 'pl5', 'ph4.5', 'pl80', 'pg', 'w28', 'w30', 'pl30', 'p19', 'i4l', 'i2r', 'pw3.2', 'pm20', 'pbp', 'p5', 'pl120', 'w24', 'p13', 'w57', 'ip', 'p10', 'il100', 'il60', 'il90', 'pb', 'pl110', 'w59', 'il80', 'pl100', 'ph4', 'pmb', 'p14', 'pl15', 'i4', 'p16', 'p3', 'pl70', 'pdd', 'pr70', 'w13', 'w32', 'i2', 'pr40', 'pm30', 'w63', 'p12', 'p17', 'p18', 'im', 'pl20', 'p6', 'pw3.5', 'p27', 'pcd', 'i14', 'p2', 'p1', 'i12', 'wc', 'i10', 'p23', 'w58', 'p25', 'ph3', 'pl90', 'pbm', 'w5', 'pl10', 'pss', 'pm55', 'phclr', 'i13', 'i1', 'ph2.2', 'w47', 'pr60', 'w38', 'il50', 'w16', 'w22', 'p20', 'pn-2', 'iz', 'p9', 'p1n', 'ph4.3', 'ps', 'pm8', 'w3', 'w21', 'p29', 'w18', 'pa10', 'pa14', 'pa13', 'il70', 'ph2', 'pr100', 'pr80', 'pm5', 'w45', 'pmr', 'w12', 'ph2.9', 'pr50', 'il110', 'w42', 'p8', 'pt', 'pm35', 'pa12', 'w41', 'p28', 'ph3.5', 'pw4', 'pm2.5', 'w37', 'ph5.3', 'ph5.5', 'ph2.8', 'i15', 'w10', 'pmblr', 'p21', 'ph4.2', 'pm15', 'pr30', 'pctl', 'w66', 'w46', 'ph1.8', 'pm50', 'w20', 'w15', 'pl25', 'pm40', 'pa18', 'pa6', 'pw4.5', 'p15', 'ph2.5', 'p4', 'w35', 'pm10', 'pr20', 'i3', 'ph3.2', 'pw3', 'ph2.4', 'ph4.8', 'pw4.2', 'phcs', 'ph2.1', 'w34', 'pc', 'pr45', 'pm2', 'pl35', 'pcs', 'pw2.5', 'i11', 'w60', 'pr10', 'pa8', 'p24', 'w8', 'w14', 'pm13', 'pnlc', 'pclr', 'w56', 'w43', 'ph3.8']def convert(size, box):'''@size: (w, h)，图片的高宽@box: (xmin, xmax, ymin, ymax), 标注框的坐标@return: (x_center, y_center, w2, h2), 返回目标中心坐标与相对高宽'''dw = 1. / size[0]dh = 1. / size[1]x = (box[0] + box[1]) / 2.0y = (box[2] + box[3]) / 2.0w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn (x, y, w, h)def annos2txt(annos, out_dir):if not os.path.exists(out_dir):os.makedirs(out_dir)h,w = 2048, 2048 # 图片大小image_name = annos["path"] # eg test/10056.jpgout_txt = os.path.join(out_dir, image_name[:-4] + '.txt')with open(out_txt, 'a') as f: # 新建对应 txt 文件for obj in annos['objects']:# 遍历每个子列表即原 txt 的每一行cls = obj['category']if cls in classes:cls_id = classes.index(cls) # 写入编号对应类别标签'''读取框的坐标'''xmin = obj['bbox']['xmin']ymin = obj['bbox']['ymin']xmax = obj['bbox']['xmax']ymax = obj['bbox']['ymax']bndbox = convert((w, h), (xmin, xmax, ymin, ymax))f.write(str(cls_id) + " " + " ".join([str(a) for a in bndbox]) + '\n') # 写入def main():filedir = "./annotations_all.json"# json文件ids_file = "./train/ids.txt"# 指定为 train/testids = open(ids_file).read().splitlines() # 获取 id 编号annos = json.loads(open(filedir).read())outdir = './' # txt 保存目录img_num = len(ids) # 统计当前图片数量cnt_id = 1 # 计数变量for imgid in ids:print('\rprocessing :[{} / {}]'.format(cnt_id ,img_num),end = "")cnt_id += 1# 跳过没有目标的图片if imgid in annos['imgs']:xml_cls = annos['imgs'][imgid]annos2txt(xml_cls, outdir) # 生成 txt 文件''' annos['imgs'][imgid] 示例：{'path': 'test/10056.jpg', 'id': 10056, 'objects': [{'bbox': {'xmin': 452.475, 'ymin': 886.139, 'xmax': 468.3168, 'ymax': 916.8317}, 'category': 'i5'}, {'bbox': {'xmin': 1274.26, 'ymin': 927.723, 'xmax': 1294.0594, 'ymax': 949.505}, 'category': 'i5'}, {'bbox': {'xmin': 414.851, 'ymin': 877.228, 'xmax': 431.6832, 'ymax': 909.901}, 'category': 'pne'}, {'bbox': {'xmin': 1215.84, 'ymin': 928.713, 'xmax': 1237.6237999999998, 'ymax': 950.495}, 'category': 'pne'}, {'bbox': {'xmin': .83, 'ymin': 910.891, 'xmax': 2032.6733, 'ymax': 934.6535}, 'category': 'pne'}]}'''if __name__ == "__main__":main()

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。