Code: Alles auswählen
from xml.dom.minidom import parse as makeDomObjFromFile, parseString as makeDomObjFromString
def parseRDFobj(dom_obj,extracttags={"title":"no title","link":None,"description":"no description"},encoding=None):
for item in dom_obj.getElementsByTagName("item"):
extracted_item={}
for tag in extracttags:
try:
text=""
for node in item.getElementsByTagName(tag)[0].childNodes:
if node.nodeType == node.TEXT_NODE:
text += node.data
assert text != ""
except (IndexError,AssertionError):
extracted_item[tag]=extracttags[tag]
else:
if encoding:
text=text.encode(encoding)
extracted_item[tag]=text
yield extracted_item
if __name__ == "__main__":
import urllib
dom_obj=makeDomObjFromFile( urllib.urlopen("http://www.tagesschau.de/newsticker.rdf") )
for item in parseRDFobj(dom_obj,encoding="latin-1"):
print item
