Suppressing namespace prefixes in ElementTree 1.2
Question:
In python 2.7 (with etree 1.3), I can suppress the XML prefixes on elements like this:
Python 2.7.1 (r271:86832, Jun 16 2011, 16:59:05)
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import xml.etree.ElementTree as etree
>>> etree.VERSION
'1.3.0'
>>> something = etree.Element('{http://some.namespace}token')
>>> etree.tostring(something)
'<ns0:token />'
>>> etree.register_namespace('', 'http://some.namespace')
>>> etree.tostring(something)
'<token />'
The register_namespace
function was added in 1.3. I’m trying to remove the prefix in a way that is compatible with python 2.6’s etree at version 1.2.6. Here’s what I’ve tried:
Python 2.6.7 (r267:88850, Jul 31 2011, 19:30:54)
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import xml.etree.ElementTree as etree
>>> etree.VERSION
'1.2.6'
>>> something = etree.Element('{http://some.namespace}token')
>>> etree.tostring(something)
'<ns0:token />'
>>> etree._namespace_map['http://some.namespace'] = ''
>>> etree.tostring(something)
'<:token />'
This is not what I want. The prefixes are still there but are blank. Is there any way to remove them completely?
Answers:
After looking at the source code for ElementTree in python2.6, the :
is hard coded in the fixtag
function. As a workaround, here’s what I did:
from xml.etree import ElementTree as etree
if etree.VERSION[0:3] == '1.2':
#in etree < 1.3, this is a workaround for supressing prefixes
def fixtag(tag, namespaces):
import string
# given a decorated tag (of the form {uri}tag), return prefixed
# tag and namespace declaration, if any
if isinstance(tag, etree.QName):
tag = tag.text
namespace_uri, tag = string.split(tag[1:], "}", 1)
prefix = namespaces.get(namespace_uri)
if namespace_uri not in namespaces:
prefix = etree._namespace_map.get(namespace_uri)
if namespace_uri not in etree._namespace_map:
prefix = "ns%d" % len(namespaces)
namespaces[namespace_uri] = prefix
if prefix == "xml":
+ prefix
else:
nsprefix = ''
else:
prefix = ''
return "%s%s" % (prefix, tag), >here.
I created a method at the top of my file and simply use it where the tag string should be. I named it 'ns_tag', but you could name it whatever you want.
def ns_tag(tag):
return str( ElementTree.QName('http://some.namespace/api/4/', tag) )
Example:
root = ElementTree.fromstring(xml)
success = root.find(ns_tag('success'))
if success.text == 'true':
for node in root.find(ns_tag('items')):
id = node.find(ns_tag('id')).text
...
It's a hack, but this worked well for me on Jython 2.5.2
ns=re.match('^{([^}]+)}', mydoc.getroot().tag ).group(1)
etree._namespace_map[ns]='STRIPME'
etree.tostring( mydoc.getroot() ).replace( 'STRIPME:', '' )
For removing namespace from tags, I use this code where a taget object is a subclass of TreeBuilder. In both methods start and end, we remove the unwanted namespace.
from xml.etree.ElementTree import XML, XMLParser, tostring, TreeBuilder
class StripNamespace(TreeBuilder):
def start(self, tag, attrib):
index = tag.find('}')
if index != -1:
tag = tag[index+1:]
super(StripNamespace, self).start(tag, attrib)
def end(self, tag):
index = tag.find('}')
if index != -1:
tag = tag[index+1:]
super(StripNamespace, self).end(tag)
target = StripNamespace()
parser = XMLParser(target=target)
with open("DmsDesc.xml") as f:
content = f.read()
root = XML(content, parser=parser)
print tostring(root)
Simple and pretty.
In python 2.7 (with etree 1.3), I can suppress the XML prefixes on elements like this:
Python 2.7.1 (r271:86832, Jun 16 2011, 16:59:05)
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import xml.etree.ElementTree as etree
>>> etree.VERSION
'1.3.0'
>>> something = etree.Element('{http://some.namespace}token')
>>> etree.tostring(something)
'<ns0:token />'
>>> etree.register_namespace('', 'http://some.namespace')
>>> etree.tostring(something)
'<token />'
The register_namespace
function was added in 1.3. I’m trying to remove the prefix in a way that is compatible with python 2.6’s etree at version 1.2.6. Here’s what I’ve tried:
Python 2.6.7 (r267:88850, Jul 31 2011, 19:30:54)
[GCC 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2335.15.00)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import xml.etree.ElementTree as etree
>>> etree.VERSION
'1.2.6'
>>> something = etree.Element('{http://some.namespace}token')
>>> etree.tostring(something)
'<ns0:token />'
>>> etree._namespace_map['http://some.namespace'] = ''
>>> etree.tostring(something)
'<:token />'
This is not what I want. The prefixes are still there but are blank. Is there any way to remove them completely?
After looking at the source code for ElementTree in python2.6, the :
is hard coded in the fixtag
function. As a workaround, here’s what I did:
from xml.etree import ElementTree as etree
if etree.VERSION[0:3] == '1.2':
#in etree < 1.3, this is a workaround for supressing prefixes
def fixtag(tag, namespaces):
import string
# given a decorated tag (of the form {uri}tag), return prefixed
# tag and namespace declaration, if any
if isinstance(tag, etree.QName):
tag = tag.text
namespace_uri, tag = string.split(tag[1:], "}", 1)
prefix = namespaces.get(namespace_uri)
if namespace_uri not in namespaces:
prefix = etree._namespace_map.get(namespace_uri)
if namespace_uri not in etree._namespace_map:
prefix = "ns%d" % len(namespaces)
namespaces[namespace_uri] = prefix
if prefix == "xml":
+ prefix
else:
nsprefix = ''
else:
prefix = ''
return "%s%s" % (prefix, tag), >here.
I created a method at the top of my file and simply use it where the tag string should be. I named it 'ns_tag', but you could name it whatever you want.
def ns_tag(tag):
return str( ElementTree.QName('http://some.namespace/api/4/', tag) )
Example:
root = ElementTree.fromstring(xml)
success = root.find(ns_tag('success'))
if success.text == 'true':
for node in root.find(ns_tag('items')):
id = node.find(ns_tag('id')).text
...
It's a hack, but this worked well for me on Jython 2.5.2
ns=re.match('^{([^}]+)}', mydoc.getroot().tag ).group(1)
etree._namespace_map[ns]='STRIPME'
etree.tostring( mydoc.getroot() ).replace( 'STRIPME:', '' )
For removing namespace from tags, I use this code where a taget object is a subclass of TreeBuilder. In both methods start and end, we remove the unwanted namespace.
from xml.etree.ElementTree import XML, XMLParser, tostring, TreeBuilder
class StripNamespace(TreeBuilder):
def start(self, tag, attrib):
index = tag.find('}')
if index != -1:
tag = tag[index+1:]
super(StripNamespace, self).start(tag, attrib)
def end(self, tag):
index = tag.find('}')
if index != -1:
tag = tag[index+1:]
super(StripNamespace, self).end(tag)
target = StripNamespace()
parser = XMLParser(target=target)
with open("DmsDesc.xml") as f:
content = f.read()
root = XML(content, parser=parser)
print tostring(root)
Simple and pretty.