jens hat geschrieben:
Aber ich kann nicht wirklich glauben, das es nicht schon sowas in der Art, irgendwo fertig rumschwirrt...
Ich habe heute nebenbei mal was gebastelt. Ist nicht wirklich schön, ziemlich unterdokumentiert, aber scheint zu funktionieren:
Code: Alles auswählen
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""XML objects.
The following types can be converted to XML and back:
* None, True and False
* int, long and float
* str
* unicode (as long as it contains only printable characters below U+0020)
* tuple, list, set, frozenset and dict
The container types must not contain cycles and if an object is referenced
more than once it's contend will be expanded for every occurence.
>>> import xmlobjects
>>> a = (1, 2, 3)
>>> b = [a, a]
>>> xml = xmlobjects.encode(b)
>>> print xml
<objects version="0">
<object type="list">
<object type="tuple">
<object type="int">1</object>
<object type="int">2</object>
<object type="int">3</object>
</object>
<object type="tuple">
<object type="int">1</object>
<object type="int">2</object>
<object type="int">3</object>
</object>
</object>
</objects>
If that XML is decoded, both elements of the list are different objects:
>>> c = xmlobjects.decode(xml)
>>> c
[(1, 2, 3), (1, 2, 3)]
>>> c[0] is c[1]
False
"""
from __future__ import division
from itertools import imap
from elementtree.ElementTree import TreeBuilder, XML, tostring
__author__ = "Marc 'BlackJack' Rintsch"
__version__ = '0.0.1'
__date__ = '$Date: 2006-03-16 21:02:28 +0100 (Thu, 16 Mar 2006) $'
__revision__ = '$Rev: 825 $'
OBJECT = 'object'
OBJECTS = 'objects'
encoders = dict()
decoders = dict()
def encode_none(builder, obj):
builder.start_object('None')
builder.end_object()
def encode_bool(builder, obj):
builder.start_object('bool')
builder.data(str(obj))
builder.end_object()
def decode_bool(decoder, element):
try:
return {'True': True, 'False': False}[element.text]
except KeyError:
raise Exception('expected True or False, got %r instead' % element.text)
def encode_complex(builder, obj):
builder.start_object('complex')
builder.feed(obj.real)
builder.feed(obj.imag)
builder.end_object()
def decode_complex(decoder, element):
return complex(decoder.dispatch(element[0]), decoder.dispatch(element[1]))
def encode_str(builder, obj):
try:
value = obj.decode('ascii')
typename = 'str'
small_values = set(char for char in value if char < ' ')
small_values -= set('\t\r\n')
if small_values:
value = obj.encode('base64')
typename = 'binstr'
except UnicodeDecodeError:
value = obj.encode('base64')
typename = 'binstr'
builder.start_object(typename)
builder.data(value)
builder.end_object()
def decode_str(decoder, element):
return str(element.text)
def decode_binstr(decoder, element):
return element.text.decode('base64')
def make_encode_object(typename, encode_func):
def encode_object(builder, obj):
builder.start_object(typename)
builder.data(encode_func(obj))
builder.end_object()
return encode_object
def make_decode_object(decode_func):
def decode_object(decoder, element):
return decode_func(element.text)
return decode_object
def make_iterable_encoder(typename):
def encode_iterable(builder, obj):
builder.encode_iterable(typename, obj)
return encode_iterable
def make_iterable_decoder(type_):
def decode_iterable(decoder, element):
return type_(imap(decoder.dispatch, element))
return decode_iterable
def register(type_, encode_func, decode_func, serialized_name=None):
typename = type_.__name__
if not serialized_name:
serialized_name = typename
encoders[typename] = encode_func
decoders[serialized_name] = decode_func
def _register_default_types():
register(type(None), encode_none, lambda d, e: None, 'None')
for type_ in (bool, complex, str):
typename = type_.__name__
register(type_,
globals()['encode_' + typename],
globals()['decode_' + typename])
decoders['binstr'] = decode_binstr
for type_, encode_func in ((int, str),
(float, repr),
(unicode, unicode)):
typename = type_.__name__
register(type_,
make_encode_object(typename, encode_func),
make_decode_object(type_))
for type_ in (list, tuple, set, frozenset, dict):
typename = type_.__name__
register(type_,
make_iterable_encoder(typename),
make_iterable_decoder(type_))
encoders['dict'] = lambda b, o: b.encode_iterable('dict', o.iteritems())
for coders in (encoders, decoders):
coders['long'] = coders['int']
_register_default_types()
class ObjectTreeBuilder(TreeBuilder):
def __init__(self):
TreeBuilder.__init__(self)
self.start(OBJECTS, {'version': '0'})
self.encoders = encoders
def feed(self, obj):
typename = obj.__class__.__name__
try:
encode_func = self.encoders[typename]
except KeyError:
raise Exception('unknown type %r' % typename)
encode_func(self, obj)
def close(self):
self.end(OBJECTS)
return TreeBuilder.close(self)
def start_object(self, typename):
self.start(OBJECT, {'type': typename})
def end_object(self):
self.end(OBJECT)
def encode_iterable(self, typename, iterable):
self.start_object(typename)
for item in iterable:
self.feed(item)
self.end_object()
class Decoder(object):
def __init__(self, source):
self.elements = iter(XML(source))
self.decoders = decoders
def dispatch(self, element):
typename = element.attrib['type']
try:
decode_func = self.decoders[typename]
except KeyError:
raise Exception('unknown type %r' % typename)
return decode_func(self, element)
def read(self):
return self.dispatch(self.elements.next())
def indent(element, level=0):
if not element.tail:
element.tail = '\n' + (' ' * level)
if not element.text and len(element):
element.text = '\n' + (' ' * (level + 1))
for child in element:
indent(child, level + 1)
if element:
element[-1].tail = '\n' + (' ' * level)
def encode(obj, encoding='utf-8', pretty_print=True):
otb = ObjectTreeBuilder()
otb.feed(obj)
tree = otb.close()
if pretty_print:
indent(tree)
return tostring(tree, encoding)
def decode(text):
return Decoder(text).read()
def test():
objects = ([None, True, False, (1, 0.1, (42+23j)), '<test>', u'hällø'],
{'name': 'Methusalem', 'age': 1000000000000000000000},
set('Mississippi'),
frozenset('Mississippi'))
xml_string = encode(objects)
print xml_string
print decode(xml_string)
if __name__ == '__main__':
test()
Dein Beispiel sieht dann so aus:
Code: Alles auswählen
In [527]:t = [
.527.: "beispiel", 123, "noch was",
.527.: {'status':'GM', 'rating':2700},
.527.: {'status':'Computer', 'rating':2700},
.527.: {'status':'Amateur', 'rating':1400},
.527.: {
.527.: "eins": {"a":1, "b":2},
.527.: "zwei": {"c":2, "d":3},
.527.: },
.527.:]
In [528]:print xmlobjects.encode(t)
<objects version="0">
<object type="list">
<object type="str">beispiel</object>
<object type="int">123</object>
<object type="str">noch was</object>
<object type="dict">
<object type="tuple">
<object type="str">status</object>
<object type="str">GM</object>
</object>
<object type="tuple">
<object type="str">rating</object>
<object type="int">2700</object>
</object>
</object>
<object type="dict">
<object type="tuple">
<object type="str">status</object>
<object type="str">Computer</object>
</object>
<object type="tuple">
<object type="str">rating</object>
<object type="int">2700</object>
</object>
</object>
<object type="dict">
<object type="tuple">
<object type="str">status</object>
<object type="str">Amateur</object>
</object>
<object type="tuple">
<object type="str">rating</object>
<object type="int">1400</object>
</object>
</object>
<object type="dict">
<object type="tuple">
<object type="str">eins</object>
<object type="dict">
<object type="tuple">
<object type="str">a</object>
<object type="int">1</object>
</object>
<object type="tuple">
<object type="str">b</object>
<object type="int">2</object>
</object>
</object>
</object>
<object type="tuple">
<object type="str">zwei</object>
<object type="dict">
<object type="tuple">
<object type="str">c</object>
<object type="int">2</object>
</object>
<object type="tuple">
<object type="str">d</object>
<object type="int">3</object>
</object>
</object>
</object>
</object>
</object>
</objects>
Man kann natürlich sagen, dass es auch ein wenig aufgebläht ist, weil ich nur ein Tag verwende, aber ich wollte es erstmal einfach halten und erweiterbar, so dass man auch Typnamen verwenden kann die keine gültigen Tagnamen sind.
Man kann auch (De)Kodierfunktionen für andere Datentypen registrieren.
Wenn Zeichenketten "binäre" Daten enthalten, dann werden sie in Base64 kodiert gespeichert.
Und das ganze kommt nicht mit zyklischen Datenstrukturen klar und Referenzen auf das gleiche Objekt werden jedesmal neu kodiert und beim wiedereinlesen hat man dann zwei gleiche Objekte, die aber nicht die selben sind.