1、VOC标签格式说明
VOC数据格式,会直接把每张图片标注的标签信息保存到一个xml文件中。
xml中的信息如下:
<annotation>
<folder>矿区图像</folder>
<filename>0066.jpg</filename>
<path>/home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1280</width>
<height>720</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>car</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>812</xmin>
<ymin>223</ymin>
<xmax>1280</xmax>
<ymax>557</ymax>
</bndbox>
</object>
</annotation>
xml文件中的关键信息说明:
0066.jpg: 是图片名称,则xml文件名为0066.xml;
/home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg: 是存放该图片的绝对路径;
1280*720: 是图片分辨率,3代表三通道图片;
car: 是类别名;
xmin,ymin,xmax,ymax,定义了每个目标的标定框坐标:即左上角的坐标和右下角的坐标;
2、YOLO标签格式说明
YOLO标签格式,会直接把每张图片标注的标签信息保存到一个txt文件中。
图片名称为1.jpg,则对应的txt文件名称为1.txt。
txt中的信息如下:
3 0.286328 0.475694 0.132031 0.123611
txt文件中的关键信息说明:
每一行代表标注的一个目标,张图中只标注了一个目标,所以只有一行;
第一个数字0代表标注目标的类别;
后面四个数字代表标注框的中心坐标和标注框的相对宽和高(进行了归一化处理);
五个数据从左到右依次为:(class_id, x_center, y_center, width, height)
同时还会生成一个classes.txt,里面内容如下:\
car
3. VOC转YOLO
voctoyolo.py的目的就是把voc数据格式转换为yolo格式:
voc格式标签:图片的实际宽高,标注框的左上角和右下角坐标;
yolo格式标签:标注框的中心坐标(归一化),标注框的宽和高(归一化)。
voc格式转换为yolo格式计算公式:
框中心的实际坐标(x,y),一般可能还会在后面减1
x_center=(xmax+xmin)/2
y_center=(ymax+ymin)/2
归一化以后的中心坐标(x,y)
x=x_center/width
y=y_center/height
框的高和宽(归一化后)
w=(xmax-xmin)/width
h=(ymax-ymin)/height
voc格式的xml标签文件转化yolo格式的txt标签文件代码:voctoyolo.py
# -*- coding:utf8 -*-
import os
import xml.etree.ElementTree as ET
import io
find_path = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/3/' # xml所在的文件
savepath = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/4/' # 保存文件
classes = ['car','Truck','person','bicycle','bus']
class Voc_Yolo(object):
def __init__(self, find_path):
self.find_path = find_path
def Make_txt(self, outfile):
out = open(outfile, 'w')
print("创建成功:{}".format(outfile))
return out
def Work(self, count):
# 找到文件路径
for root, dirs, files in os.walk(self.find_path):
# 找到文件目录中每一个xml文件
for file in files:
# 记录处理过的文件
count += 1
# 输入、输出文件定义
input_file = find_path + file
outfile = savepath + file[:-4] + '.txt'
# 新建txt文件,确保文件正常保存
out = self.Make_txt(outfile)
# 分析xml树,取出w_image、h_image
tree = ET.parse(input_file)
root = tree.getroot()
size = root.find('size')
w_image = float(size.find('width').text)
h_image = float(size.find('height').text)
# 继续提取有效信息来计算txt中的四个数据
for obj in root.iter('object'):
# 将类型提取出来,不同目标类型不同,本文仅有一个类别->0
classname = obj.find('name').text
# 如果类别不是对应在我们预定好的class文件中,或difficult==1则跳过
if classname not in classes == 1:
continue
# 通过类别名称找到id
cls_id = classes.index(classname)
xmlbox = obj.find('bndbox')
x_min = float(xmlbox.find('xmin').text)
x_max = float(xmlbox.find('xmax').text)
y_min = float(xmlbox.find('ymin').text)
y_max = float(xmlbox.find('ymax').text)
# 计算公式
x_center = ((x_min + x_max) / 2 - 1) / w_image
y_center = ((y_min + y_max) / 2 - 1) / h_image
w = (x_max - x_min) / w_image
h = (y_max - y_min) / h_image
# 文件写入
out.write(
str(cls_id) + " " + str(x_center) + " " + str(y_center) + " " + str(w) + " " + str(h) + '\n')
out.close()
return count
if __name__ == "__main__":
data = Voc_Yolo(find_path)
number = data.Work(0)
print(number)
4、yolo格式转化为voc格式
voc格式中保存的信息为:xmin,ymin,xmax,ymax,所以只要根据上面的公式,就可以推导出这四个值。
yolo格式的txt标签文件转化voc格式的xml标签文件代码:yolotovoc.py
# -*- coding:utf8 -*-
from xml.dom.minidom import Document
import os
import cv2
def makexml(picPath, txtPath, xmlPath):
dic = {'0': "car",
'1': "lightTruck",
'2': "person",
'3': "tipperTruck",
'4': "construction",
'5': "tricycle",
'6': "train",
'7': "bicycle",
}
files = os.listdir(txtPath)
for i, name in enumerate(files):
print(name)
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation")
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath + name)
txtList = txtFile.readlines()
img = cv2.imread(picPath + name[0:-4] + ".jpg")
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder")
foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
folder.appendChild(foldercontent)
annotation.appendChild(folder)
filename = xmlBuilder.createElement("filename")
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename)
size = xmlBuilder.createElement("size")
width = xmlBuilder.createElement("width")
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width)
height = xmlBuilder.createElement("height")
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height)
depth = xmlBuilder.createElement("depth")
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth)
annotation.appendChild(size)
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object")
picname = xmlBuilder.createElement("name")
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname)
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
f = open(xmlPath + name[0:-4] + ".xml", 'w')
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
f.close()
if __name__ == "__main__":
picPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/image/" # 图片所在文件夹路径,后面的/一定要带上
txtPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/labels/" # txt所在文件夹路径,后面的/一定要带上
xmlPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/xml/" # xml文件保存路径,后面的/一定要带上
makexml(picPath, txtPath, xmlPath)