python dom解析xml实例

本文介绍了一个使用Python的dom模块解析XML文件的例子,该程序能够读取XML文件中关于不同对象的位置信息,并根据特定条件筛选和转换数据。

python dom解析xml实例

#python version :Python3.4(64-bit)

#['car', 'bus', 'truck', 'layon', 'backmirror',' motorbike','windglass', 'entrylisence', 'carrier', 'bicycle', 'cat', 'carlight', 'paperbox', 'hungs', 'safebelt', 'newersign', 'wheel', 'tricar', 'person', 'dog', 'horse', 'cartopwindow', 'lightcover', 'anusigns', 'plate','filepath','singerFrame','unitPrice']
#标签类型

import os
import xml.dom.minidom
import datetime
import time

__width = 0.0
__height = 0.0

class bndbox(object):
    def __init__(self,a,b,c,d):
        self.xmin=float(a)
        self.ymin=float(b)
        self.xmax=float(c)
        self.ymax=float(d)

def handle(filename):
    global __width
    global __height
    car = []
    window = []
    light = []
    otherCar = 0
    notcar = 0

    #打开文档 将xml文件数据转化成一个树储存
    file = xml.dom.minidom.parse(filename)
    root =  file.documentElement#根节点

    #获取图片的属性
    __width = float(root.getElementsByTagName('width')[0].firstChild.data)
    __height = float(root.getElementsByTagName('height')[0].firstChild.data)
    #root.getElementsByTagName(string)返回属性名为string的结点数组
    #root.firstChild.data 返回root结点下第一个属性值

    #获取所有object
    object = root.getElementsByTagName('object')

    #按name将object分类
    for x in object:
        if x.getElementsByTagName('name')[0].firstChild.data in ['car','bus','truck']:
            car.append(bndbox(x.getElementsByTagName('xmin')[0].firstChild.data,x.getElementsByTagName('ymin')[0].firstChild.data,x.getElementsByTagName('xmax')[0].firstChild.data,x.getElementsByTagName('ymax')[0].firstChild.data,))
        elif x.getElementsByTagName('name')[0].firstChild.data in ['bicycle','motorbike','tricar']:
            otherCar+=1
        elif x.getElementsByTagName('name')[0].firstChild.data == 'windglass':
            window.append(bndbox(x.getElementsByTagName('xmin')[0].firstChild.data,x.getElementsByTagName('ymin')[0].firstChild.data,x.getElementsByTagName('xmax')[0].firstChild.data,x.getElementsByTagName('ymax')[0].firstChild.data,))
            notcar+=1
        elif x.getElementsByTagName('name')[0].firstChild.data == 'carlight':
            light.append(bndbox(x.getElementsByTagName('xmin')[0].firstChild.data,x.getElementsByTagName('ymin')[0].firstChild.data,x.getElementsByTagName('xmax')[0].firstChild.data,x.getElementsByTagName('ymax')[0].firstChild.data,))
            notcar+=1
        else:
            notcar+=1

    fname,fename = os.path.splitext(filename)
    with open(fname+'.txt','w') as f:
        for x in car:
            if judge(x):
                f.write('1 '+str(x.xmin/__width)+' '+str(x.ymin/__height)+' '+str(x.xmax/__width)+' '+str(x.ymax/__height)+' '+windowdata(x,window)+' '+lightdata(x,light)+'\n')
        for x in range(otherCar):
            f.write('1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n')
        for x in range(notcar):
            f.write('0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n')

def judge(x):
    if x.xmin>x.xmax or x.ymin>x.ymax or x.xmin<0 or x.ymin<0 or x.xmax>__width or x.ymax>__height:
        return False
    else:
        return True
#此处的iou是我自己重新定义的基于iou思想判断一个小矩形框是否与一个大矩形框的相关度
def getIOU(a,b):
    w = min(a.xmax,b.xmax) - max(a.xmin,b.xmin)
    h = min(a.ymax,b.ymax) - max(a.ymin,b.ymin)
    if w <= 0 or h <= 0:
        return 0.0
    else:
        sa = (a.xmax - a.xmin) * (a.ymax - a.ymin)
        sb = (b.xmax - b.xmin) * (b.ymax - b.ymin)
        c = w * h
        return float(c)/sb


def windowdata(c,w):
    #找到最大相关车窗
    max = 0.0
    maxw=None
    for x in w:
        if max < getIOU(c,x):
            max = getIOU(c,x)
            maxw = x
    #判断车窗方框是否有70%以上在车的方框内
    if max <= 0.7:
        return '-1 -1 -1 -1 -1 -1 -1 -1'
    return str(maxw.xmin/__width)+' '+str(maxw.ymin/__height)+' '+str(maxw.xmax/__width)+' '+str(maxw.ymin/__height)+' '+str(maxw.xmin/__width)+' '+str(maxw.ymax/__height)+' '+str(maxw.xmax/__width)+' '+str(maxw.ymax/__height)

def lightdata(c,l):
    #找到最大相关两个车灯
    max0,max = 0.0,0.0
    maxl0,maxl = None,None
    for x in l:
        now = getIOU(c,x)
        if max0 < now:
            if max < now:
                max0 = max
                maxl0 = maxl
                max = now
                maxl = x
            else:
                max0 = now
                maxl0 = x
    #判断车灯方框是否有70%以上在车的方框内
    if max0 >= 0.7 and max >= 0.7:
        return str(((maxl0.xmin+maxl0.xmax+maxl.xmin+maxl.xmax)/4)/__width)+' '+str(((maxl0.ymin+maxl0.ymax+maxl.ymin+maxl.ymax)/4)/__height)
    else:
        return '-1 -1'

print('*************使用说明*************')
print('*    打开储存.xml文件的文件夹    *')
print('*     直接在地址栏上复制地址     *')
print('*           粘贴到下面           *')
print('*     若路径不对提示重新输入     *')
print('*输出的同名txt文件仍在该文件夹下 *')
print('**********************************\n\n')
print('请输入文件路径:')

while True:
    try:
        filepath = input()
        break
    except OSError:
        print('请重新输入文件路径:')
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
while True:
    try:
        fileName = os.listdir(filepath)
        break
    except FileNotFoundError:
        print('请重新输入文件路径:')
        filepath = input()
for x in fileName:
    if x.find('.xml') != -1:
        handle(filepath+'\\'+x)
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

xml文件

<?xml version="1.0" encoding="UTF-8"?>
<annotation>
  <folder>VOC2007</folder>
  <filename>test</filename>
  <source>
    <database></database>
    <annotation></annotation>
    <image></image>
    <flickrid></flickrid>
  </source>
  <owner>
    <flickrid></flickrid>
    <name></name>
  </owner>
  <size>
    <width>1600</width>
    <height>1200</height>
    <depth>3</depth>
  </size>
  <segmented></segmented>
  <object>
    <name>carlight</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>1369</xmin>
      <ymin>884</ymin>
      <xmax>1414</xmax>
      <ymax>1042</ymax>
    </bndbox>
  </object>
  <object>
    <name>carlight</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>927</xmin>
      <ymin>881</ymin>
      <xmax>968</xmax>
      <ymax>1037</ymax>
    </bndbox>
  </object>
  <object>
    <name>carlight</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>559</xmin>
      <ymin>716</ymin>
      <xmax>635</xmax>
      <ymax>760</ymax>
    </bndbox>
  </object>
  <object>
    <name>carlight</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>362</xmin>
      <ymin>714</ymin>
      <xmax>427</xmax>
      <ymax>760</ymax>
    </bndbox>
  </object>
  <object>
    <name>plate</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>1118</xmin>
      <ymin>1071</ymin>
      <xmax>1213</xmax>
      <ymax>1119</ymax>
    </bndbox>
  </object>
  <object>
    <name>plate</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>455</xmin>
      <ymin>787</ymin>
      <xmax>529</xmax>
      <ymax>808</ymax>
    </bndbox>
  </object>
  <object>
    <name>backmirror</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>581</xmin>
      <ymin>944</ymin>
      <xmax>633</xmax>
      <ymax>993</ymax>
    </bndbox>
  </object>
  <object>
    <name>backmirror</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>191</xmin>
      <ymin>944</ymin>
      <xmax>249</xmax>
      <ymax>986</ymax>
    </bndbox>
  </object>
  <object>
    <name>backmirror</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>631</xmin>
      <ymin>612</ymin>
      <xmax>672</xmax>
      <ymax>646</ymax>
    </bndbox>
  </object>
  <object>
    <name>backmirror</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>377</xmin>
      <ymin>612</ymin>
      <xmax>408</xmax>
      <ymax>641</ymax>
    </bndbox>
  </object>
  <object>
    <name>backmirror</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>828</xmin>
      <ymin>425</ymin>
      <xmax>875</xmax>
      <ymax>510</ymax>
    </bndbox>
  </object>
  <object>
    <name>windglass</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>983</xmin>
      <ymin>692</ymin>
      <xmax>1380</xmax>
      <ymax>845</ymax>
    </bndbox>
  </object>
  <object>
    <name>windglass</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>189</xmin>
      <ymin>1027</ymin>
      <xmax>501</xmax>
      <ymax>1141</ymax>
    </bndbox>
  </object>
  <object>
    <name>windglass</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>386</xmin>
      <ymin>651</ymin>
      <xmax>603</xmax>
      <ymax>726</ymax>
    </bndbox>
  </object>
  <object>
    <name>cartopwindow</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>288</xmin>
      <ymin>910</ymin>
      <xmax>492</xmax>
      <ymax>954</ymax>
    </bndbox>
  </object>
  <object>
    <name>cartopwindow</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>438</xmin>
      <ymin>590</ymin>
      <xmax>587</xmax>
      <ymax>619</ymax>
    </bndbox>
  </object>
  <object>
    <name>car</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>133</xmin>
      <ymin>898</ymin>
      <xmax>631</xmax>
      <ymax>1204</ymax>
    </bndbox>
  </object>
  <object>
    <name>car</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>356</xmin>
      <ymin>553</ymin>
      <xmax>668</xmax>
      <ymax>864</ymax>
    </bndbox>
  </object>
  <object>
    <name>bus</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>826</xmin>
      <ymin>366</ymin>
      <xmax>1455</xmax>
      <ymax>1119</ymax>
    </bndbox>
  </object>
  <object>
    <name>person</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>475</xmin>
      <ymin>376</ymin>
      <xmax>537</xmax>
      <ymax>536</ymax>
    </bndbox>
    <attribute>
      <clousecolor>0</clousecolor>
      <trousecolor>1</trousecolor>
      <hairtype>1</hairtype>
      <clousetype>1</clousetype>
      <trousetype>1</trousetype>
      <bagtype>2</bagtype>
      <agetype>1</agetype>
      <sextype>0</sextype>
      <racetype>0</racetype>
      <haptype>0</haptype>
    </attribute>
  </object>
  <object>
    <name>person</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>628</xmin>
      <ymin>374</ymin>
      <xmax>715</xmax>
      <ymax>536</ymax>
    </bndbox>
    <attribute>
      <clousecolor>7</clousecolor>
      <trousecolor>1</trousecolor>
      <hairtype>1</hairtype>
      <clousetype>1</clousetype>
      <trousetype>0</trousetype>
      <bagtype>1</bagtype>
      <agetype>1</agetype>
      <sextype>0</sextype>
      <racetype>0</racetype>
      <haptype>0</haptype>
    </attribute>
  </object>
  <object>
    <name>person</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>735</xmin>
      <ymin>405</ymin>
      <xmax>797</xmax>
      <ymax>573</ymax>
    </bndbox>
    <attribute>
      <clousecolor>0</clousecolor>
      <trousecolor>4</trousecolor>
      <hairtype>0</hairtype>
      <clousetype>1</clousetype>
      <trousetype>1</trousetype>
      <bagtype>3</bagtype>
      <agetype>1</agetype>
      <sextype>1</sextype>
      <racetype>0</racetype>
      <haptype>0</haptype>
    </attribute>
  </object>
  <object>
    <name>person</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>715</xmin>
      <ymin>361</ymin>
      <xmax>761</xmax>
      <ymax>522</ymax>
    </bndbox>
    <attribute>
      <clousecolor>0</clousecolor>
      <trousecolor>1</trousecolor>
      <hairtype>1</hairtype>
      <clousetype>1</clousetype>
      <trousetype>0</trousetype>
      <bagtype>0</bagtype>
      <agetype>2</agetype>
      <sextype>0</sextype>
      <racetype>0</racetype>
      <haptype>0</haptype>
    </attribute>
  </object>
  <object>
    <name>motorbike</name>
    <pose>Unspecified</pose>
    <type></type>
    <truncated>0</truncated>
    <difficult>0</difficult>
    <bndbox>
      <xmin>782</xmin>
      <ymin>184</ymin>
      <xmax>869</xmax>
      <ymax>245</ymax>
    </bndbox>
  </object>
</annotation>

输出结果

1 0.2225 0.4608333333333333 0.4175 0.72 0.24125 0.5425 0.376875 0.5425 0.24125 0.605 0.376875 0.605 0.30984375 0.6145833333333334
1 0.51625 0.305 0.909375 0.9325 0.614375 0.5766666666666667 0.8625 0.5766666666666667 0.614375 0.7041666666666667 0.8625 0.7041666666666667 0.7309375 0.8008333333333333
1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值