淘先锋技术网

首页 1 2 3 4 5 6 7

1、VOC标签格式说明

VOC数据格式,会直接把每张图片标注的标签信息保存到一个xml文件中。

xml中的信息如下:

<annotation>
	<folder>矿区图像</folder>
	<filename>0066.jpg</filename>
	<path>/home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg</path>
	<source>
		<database>Unknown</database>
	</source>
	<size>
		<width>1280</width>
		<height>720</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>
	<object>
		<name>car</name>
		<pose>Unspecified</pose>
		<truncated>1</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>812</xmin>
			<ymin>223</ymin>
			<xmax>1280</xmax>
			<ymax>557</ymax>
		</bndbox>
	</object>
</annotation>

xml文件中的关键信息说明:

 0066.jpg: 是图片名称,则xml文件名为0066.xml;

 /home/zhy/Documents/智能驾驶项目/标注/矿区图像/0066.jpg: 是存放该图片的绝对路径;

 1280*720: 是图片分辨率,3代表三通道图片;

car: 是类别名;

 xmin,ymin,xmax,ymax,定义了每个目标的标定框坐标:即左上角的坐标和右下角的坐标;

2、YOLO标签格式说明

YOLO标签格式,会直接把每张图片标注的标签信息保存到一个txt文件中。

图片名称为1.jpg,则对应的txt文件名称为1.txt。

txt中的信息如下:
 

3 0.286328 0.475694 0.132031 0.123611

txt文件中的关键信息说明:

    每一行代表标注的一个目标,张图中只标注了一个目标,所以只有一行;
    第一个数字0代表标注目标的类别;
    后面四个数字代表标注框的中心坐标和标注框的相对宽和高(进行了归一化处理);
    五个数据从左到右依次为:(class_id,  x_center,  y_center,  width,  height)

同时还会生成一个classes.txt,里面内容如下:\

car

3. VOC转YOLO

voctoyolo.py的目的就是把voc数据格式转换为yolo格式:

    voc格式标签:图片的实际宽高,标注框的左上角和右下角坐标;
    yolo格式标签:标注框的中心坐标(归一化),标注框的宽和高(归一化)。

voc格式转换为yolo格式计算公式:

   框中心的实际坐标(x,y),一般可能还会在后面减1

      x_center=(xmax+xmin)/2

      y_center=(ymax+ymin)/2

  

    归一化以后的中心坐标(x,y)

     x=x_center/width

     y=y_center/height

 

    框的高和宽(归一化后)

    w=(xmax-xmin)/width

    h=(ymax-ymin)/height

 voc格式的xml标签文件转化yolo格式的txt标签文件代码:voctoyolo.py

# -*- coding:utf8 -*-
import os
import xml.etree.ElementTree as ET
import io

find_path = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/3/'  # xml所在的文件
savepath = '/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/label/4/'  # 保存文件

classes = ['car','Truck','person','bicycle','bus']


class Voc_Yolo(object):
    def __init__(self, find_path):
        self.find_path = find_path

    def Make_txt(self, outfile):
        out = open(outfile, 'w')
        print("创建成功:{}".format(outfile))
        return out

    def Work(self, count):
        # 找到文件路径
        for root, dirs, files in os.walk(self.find_path):
            # 找到文件目录中每一个xml文件
            for file in files:
                # 记录处理过的文件
                count += 1
                # 输入、输出文件定义
                input_file = find_path + file
                outfile = savepath + file[:-4] + '.txt'
                # 新建txt文件,确保文件正常保存
                out = self.Make_txt(outfile)
                # 分析xml树,取出w_image、h_image
                tree = ET.parse(input_file)
                root = tree.getroot()
                size = root.find('size')
                w_image = float(size.find('width').text)
                h_image = float(size.find('height').text)
                # 继续提取有效信息来计算txt中的四个数据
                for obj in root.iter('object'):
                    # 将类型提取出来,不同目标类型不同,本文仅有一个类别->0
                    classname = obj.find('name').text
                    # 如果类别不是对应在我们预定好的class文件中,或difficult==1则跳过
                    if classname not in classes == 1:
                        continue
                    # 通过类别名称找到id
                    cls_id = classes.index(classname)
                    xmlbox = obj.find('bndbox')
                    x_min = float(xmlbox.find('xmin').text)
                    x_max = float(xmlbox.find('xmax').text)
                    y_min = float(xmlbox.find('ymin').text)
                    y_max = float(xmlbox.find('ymax').text)
                    # 计算公式
                    x_center = ((x_min + x_max) / 2 - 1) / w_image
                    y_center = ((y_min + y_max) / 2 - 1) / h_image
                    w = (x_max - x_min) / w_image
                    h = (y_max - y_min) / h_image
                    # 文件写入
                    out.write(
                        str(cls_id) + " " + str(x_center) + " " + str(y_center) + " " + str(w) + " " + str(h) + '\n')
                out.close()
        return count


if __name__ == "__main__":
    data = Voc_Yolo(find_path)
    number = data.Work(0)
    print(number)

4、yolo格式转化为voc格式

voc格式中保存的信息为:xmin,ymin,xmax,ymax,所以只要根据上面的公式,就可以推导出这四个值。

yolo格式的txt标签文件转化voc格式的xml标签文件代码:yolotovoc.py

# -*- coding:utf8 -*-
from xml.dom.minidom import Document
import os
import cv2


def makexml(picPath, txtPath, xmlPath):
    dic = {'0': "car",
           '1': "lightTruck",
           '2': "person",
           '3': "tipperTruck",
           '4': "construction",
           '5': "tricycle",
           '6': "train",
           '7': "bicycle",
           }
    files = os.listdir(txtPath)
    for i, name in enumerate(files):
        print(name)
        xmlBuilder = Document()
        annotation = xmlBuilder.createElement("annotation")
        xmlBuilder.appendChild(annotation)
        txtFile = open(txtPath + name)
        txtList = txtFile.readlines()
        img = cv2.imread(picPath + name[0:-4] + ".jpg")
        Pheight, Pwidth, Pdepth = img.shape

        folder = xmlBuilder.createElement("folder")
        foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
        folder.appendChild(foldercontent)
        annotation.appendChild(folder)

        filename = xmlBuilder.createElement("filename")
        filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
        filename.appendChild(filenamecontent)
        annotation.appendChild(filename)

        size = xmlBuilder.createElement("size")
        width = xmlBuilder.createElement("width")
        widthcontent = xmlBuilder.createTextNode(str(Pwidth))
        width.appendChild(widthcontent)
        size.appendChild(width)

        height = xmlBuilder.createElement("height")
        heightcontent = xmlBuilder.createTextNode(str(Pheight))
        height.appendChild(heightcontent)
        size.appendChild(height)

        depth = xmlBuilder.createElement("depth")
        depthcontent = xmlBuilder.createTextNode(str(Pdepth))
        depth.appendChild(depthcontent)
        size.appendChild(depth)

        annotation.appendChild(size)

        for j in txtList:
            oneline = j.strip().split(" ")
            object = xmlBuilder.createElement("object")
            picname = xmlBuilder.createElement("name")
            namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
            picname.appendChild(namecontent)
            object.appendChild(picname)

            pose = xmlBuilder.createElement("pose")  # pose标签
            posecontent = xmlBuilder.createTextNode("Unspecified")
            pose.appendChild(posecontent)
            object.appendChild(pose)  # pose标签结束

            truncated = xmlBuilder.createElement("truncated")  # truncated标签
            truncatedContent = xmlBuilder.createTextNode("0")
            truncated.appendChild(truncatedContent)
            object.appendChild(truncated)  # truncated标签结束

            difficult = xmlBuilder.createElement("difficult")  # difficult标签
            difficultcontent = xmlBuilder.createTextNode("0")
            difficult.appendChild(difficultcontent)
            object.appendChild(difficult)  # difficult标签结束

            bndbox = xmlBuilder.createElement("bndbox")  # bndbox标签
            xmin = xmlBuilder.createElement("xmin")  # xmin标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
            xminContent = xmlBuilder.createTextNode(str(mathData))
            xmin.appendChild(xminContent)
            bndbox.appendChild(xmin)  # xmin标签结束

            ymin = xmlBuilder.createElement("ymin")  # ymin标签
            mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
            yminContent = xmlBuilder.createTextNode(str(mathData))
            ymin.appendChild(yminContent)
            bndbox.appendChild(ymin)  # ymin标签结束

            xmax = xmlBuilder.createElement("xmax")  # xmax标签
            mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
            xmaxContent = xmlBuilder.createTextNode(str(mathData))
            xmax.appendChild(xmaxContent)
            bndbox.appendChild(xmax)  # xmax标签结束

            ymax = xmlBuilder.createElement("ymax")  # ymax标签
            mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
            ymaxContent = xmlBuilder.createTextNode(str(mathData))
            ymax.appendChild(ymaxContent)
            bndbox.appendChild(ymax)  # ymax标签结束

            object.appendChild(bndbox)  # bndbox标签结束

            annotation.appendChild(object)  # object标签结束

        f = open(xmlPath + name[0:-4] + ".xml", 'w')
        xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='utf-8')
        f.close()


if __name__ == "__main__":
    picPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/image/"  # 图片所在文件夹路径,后面的/一定要带上
    txtPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/labels/"  # txt所在文件夹路径,后面的/一定要带上
    xmlPath = "/home/zhy/Documents/Perception/camera_data/mine_obstacle_image/deepsort_data/xml/"  # xml文件保存路径,后面的/一定要带上
    makexml(picPath, txtPath, xmlPath)