Skip to content

Python操作xml文件

811字约3分钟

PythonIPython

2023-10-28

Python操作xml文件

1.读取xml文件

import xmltodict
# xmltodict
from lxml import etree
# 加载xml文件
tree = etree.parse('/e3base/apps/fair-scheduler.xml')
# 将已解析的XML树对象 tree 转换为一个XML字符串,然后将这个XML字符串赋值给变量 config
config = etree.tostring(tree)

image-20231027101759809

# 将xml字符串转换为Python能够处理的字典格式
queue_config = xmltodict.parse(xml_input=config, attr_prefix="", cdata_key="")

image-20231027102136973

# yarn公平调度器的配置文件中,以 allocations 标签作为开始和结束,这里解析队列信息并不需要 allocations 标签
queues = queue_config['allocations']

image-20231027102613094

image-20231027102556911

# 从队列字典中获取当前配置文件的队列信息
# 这里的队列是从 root 队列开始解析的,因此需要考虑子队列存在的情况
# 这里用迭代的方法,解析出来了配置文件中所有的队列信息
def parse_queue(queue_obj):
    queue_dic = {}
    queue_dic["queueName"] = queue_obj["name"]
    queue_dic["config"] = {item:queue_obj[item] for item in queue_obj if item not in ["queue","name"]}
    queue_dic["childQueue"] = []
    if "queue" in queue_obj:
        for queue in queue_obj["queue"]:
            queue_dic["childQueue"].append(parse_queue(queue))
    return queue_dic

image-20231027105328765

2.修改xml文件

2.1 新建xml节点

import xmltodict
# xmltodict
from lxml import etree
# 加载xml文件
tree = etree.parse('/e3base/apps/fair-scheduler.xml')
# 新建节点之前,首先要确认节点是否存在
queue_path = "/allocations/queue[@name='root']/queue[@name='default']"
# 列表长度大于零,说明该节点已存在,则不能进行创建
node_list = tree.xpath(queue_path)

image-20231027110227396

# 列表长度等于零,进行队列的新建
# 获取节点父节点的位置
parent_queue_path = "/allocations/queue[@name='root']"
parent_queue_list = tree.xpath(parent_queue_path)
parent_queue = parent_queue_list[0]

image-20231027110725124

# 准备队列的配置信息
queue_config_dic = {
    "schedulingPolicy":"fair",
    "weight":"1",
    "maxResources":"1024 mb,1 vcores",
    "minResources":"1024 mb,1 vcores",
    "maxRunningApps":"10",
    "aclSubmitApps":"euansu euansu",
    "aclAdministerApps":"euansu euansu"
}
# 构造队列节点
new_queue = etree.Element("queue", {"name":"euansu"})

# 添加队列的相关配置
for config,value in queue_config_dic.items():
    config_element = etree.Element(config)
    config_element.text = value
    new_queue.append(config_element)
# 添加到父队列下
parent_queue.append(new_queue)
# 判断队列是否添加成功
queue_path = "/allocations/queue[@name='root']/queue[@name='euansu']"
node_list = tree.xpath(queue_path)

image-20231027112424221

# 将修改后的XML对象写入到xml文件中
tree.write("/e3base/apps/fair-scheduler-updated.xml", pretty_print=True, encoding="utf-8")
# 打开生成的文件,队列添加成功,但格式有点混乱,需要进行优化

image-20231027112611234

root = etree.parse(filename).getroot()
self.indent(root)
etree.ElementTree(root).write(filename)

2.2 修改xml节点

import xmltodict
# xmltodict
from lxml import etree
# 加载xml文件
tree = etree.parse('/e3base/apps/fair-scheduler-updated.xml')
# 获取要修改的xml节点
queue_node = tree.xpath("/allocations/queue[@name='root']/queue[@name='cmi_1026']")
queue_node = queue_node[0]

image-20231027113635439

max_resources_element = queue_node.find("maxResources")
max_resources_element.text = "2048 mb, 2 vcores"
tree.write("/e3base/apps/fair-scheduler-updated.xml", pretty_print=True, encoding="utf-8")

image-20231027114836438

2.3 删除xml节点

import xmltodict
# xmltodict
from lxml import etree
# 加载xml文件
tree = etree.parse('/e3base/apps/fair-scheduler-updated.xml')
# 获取要删除的xml节点
queue_node = tree.xpath("/allocations/queue[@name='root']/queue[@name='cmi_1026']")
queue_node = queue_node[0]
# 获取要删除xml节点的父节点
parent_node = tree.xpath("/allocations/queue[@name='root']")
parent_node = parent_node[0]
# 删除子队列
parent_node.remove(queue_node)
# 将删除后的xml文件写入配置文件中
tree.write("/e3base/apps/fair-scheduler-updated.xml", encoding="utf-8", xml_declaration=True)

image-20231027115432869