Verfasst: Montag 15. August 2005, 14:11
Code: Alles auswählen
for line in open("Datendatei").readlines():
line = line[SKIPBYTES:]
feld1 = line[:4]
feld2 = line[4:]
Seit 2002 Diskussionen rund um die Programmiersprache Python
https://www.python-forum.de/
Code: Alles auswählen
for line in open("Datendatei").readlines():
line = line[SKIPBYTES:]
feld1 = line[:4]
feld2 = line[4:]
Bin zwar kein Experte, was SAX angeht, aber parseString findest du folgendermaßen:Anonymous hat geschrieben:ich versuche gerade, SAX zu verwenden, leider gibt es ein Problem:
im Buch gibt es ein Beispiel, wie man make_parser zu verwenden kann. Aber für meinen Fall ist parseString etwas sinnvoller (aus Python-dokument). Mit make_parser wird als Default ExpatParser verwendet, der keine parseString vorhanden ist.
Meine Frage, wie kann man ein String (aus XML-tags) parsen?
Code: Alles auswählen
>>> import xml.sax
>>> xml.sax.parseString
<function parseString at 0x502d99f0>
Code: Alles auswählen
<?xml version="1.0"?>
<Media>
<Name>CD Nummer 1</Name>
<Table>
<URL>Test.ASC</URL>
<Name>daten</Name>
<Description>...</Description>
<FixedColumn>
<Name>NUMMER</Name>
<Numeric/>
<FixedRange>
<From>97</From>
<Length>4</Length>
</FixedRange>
</FixedColumn>
<FixedColumn>
<Name>Name</Name>
<AlphaNumeric/>
<FixedRange>
<From>101</From>
<Length>12</Length>
</FixedRange>
</FixedColumn>
</Table>
</Media>
Code: Alles auswählen
#!/usr/bin/env python
# -*- encoding: latin-1 -*-
import re
import xml.sax, xml.sax.handler
def parsedata_re(filename):
"""Parses the XML-file using regular expressions"""
f = file(filename, 'r')
# regex for matching needed tags: Name and Length
name_rex = re.compile(r'(?<=\<Name>).*(?=</Name>)')
len_rex = re.compile(r'(?<=\<Length>).*(?=</Length>)')
currentname = ''
values = {}
parse_enabled = False
for line in f:
if '<FixedColumn>' in line:
# only parse inside FixedColumn
parse_enabled = True
elif '</FixedColumn>' in line:
parse_enabled = False
if parse_enabled:
name = name_rex.findall(line)
if name != []:
currentname = name[0]
values[currentname] = {}
length = len_rex.findall(line)
if length != []:
values[currentname]['length'] = length[0]
if '<Numeric/>' in line:
values[currentname]['type'] = 'INTEGER'
elif '<AlphaNumeric/>' in line:
values[currentname]['type'] = 'VARCHAR'
f.close()
return values
def create_sql(values):
"""Creates a SQL statement"""
sql = 'CREATE TABLE Daten\n(\n'
for key, item in values.items():
sql += '%(name)s %(type)s(%(length)s)\n' % {'name' : key, 'type' : item['type'], 'length' : item['length'] }
sql += ');'
return sql
class TablHandler(xml.sax.handler.ContentHandler):
def __init__(self):
self.in_name = False
self.in_fc = False
self.in_len = False
self.values = {}
self.currentname = ''
def startElement(self, name, attrs):
# check what tag was opened
if name == 'FixedColumn':
# we're in FixedColumn, so we are permitted to parse Name-tags
self.in_fc = True
elif name == 'Name' and self.in_fc:
# only permit parsing if we are in FixedColumn
self.in_name = True
elif name == 'Length':
# we're in Length tag
self.in_len = True
elif name == 'Numeric':
# the empty tag Numeric occured
self.values[self.currentname]['type'] = 'INTEGER'
elif name == 'AlphaNumeric':
self.values[self.currentname]['type'] = 'VARCHAR'
def characters(self, data):
if self.in_name:
self.currentname = data
self.values[self.currentname] = {}
elif self.in_len:
self.values[self.currentname]['length'] = data
def endElement(self, name):
if name == 'FixedColumn':
self.in_fc = False
elif name == 'Name':
self.in_name = False
elif name == 'Length':
self.in_len = False
def parsedata_sax(filename):
parser = xml.sax.make_parser()
handler = TablHandler()
parser.setContentHandler(handler)
parser.parse(filename)
return handler.values
def main():
name = 'tabl.xml'
#values = parsedata_re(name)
values = parsedata_sax(name)
print create_sql(values)
if __name__ == '__main__':
main()
Könntest du uns vielleicht einfache Daten posten und was dann rauskommen soll? So kann man das etwas einfacher nachvollziehen, denke ich.TripleH hat geschrieben:Ich werd mal versuchen hinter zu steigen. Kann jemand noch zu meinem zuvorigen Posting schauen?
Code: Alles auswählen
datei = 97 * 'a' + 4 * 'b' + 12 * 'c'
Code: Alles auswählen
{'NUMMER': {'from': '97', 'length': '4', 'type': 'INTEGER'}, 'Name': {'from': '101', 'length': '12', 'type': 'VARCHAR'}}
Code: Alles auswählen
ersten 97 zeichen ueberlesen 1234 blablub blubbla tralala naan zeta erle tuut flufluf tataa arghDATAende
149 2001 hal 9000 ugs
Code: Alles auswählen
#!/usr/bin/env python
# -*- encoding: latin-1 -*-
spec = [['NUMMER', {'from': 97, 'length': 4, 'type': 'INTEGER'}],
['Name', {'from': 10, 'length': 8, 'type': 'VARCHAR'}]]
print 'Opening file'
f = file('tabl.txt', 'r')
print 'Iterating through lines'
for line, specline in zip(f, spec):
line = line.splitlines()[0]
print 'Line: %s' % line
print 'Spec:',
print specline
from_point = specline[1]['from'] - 1
to_point = from_point + specline[1]['length']
data = line[from_point:to_point]
print 'Data: %s' % data
print 'Closing file. EOP'
f.close()
Code: Alles auswählen
import xml.parsers.expat
import MySQLdb
class Reader:
def read( self, filename ):
self.query = '['
self.path = []
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = self.start_element
p.EndElementHandler = self.end_element
p.CharacterDataHandler = self.char_data
f = open("C:\\test.xml", 'r' )
p.ParseFile( f )
f.close()
def add_seperator( self ):
if not self.first_col:
self.query += ", "
else:
self.first_col = False
def start_element( self, name, attrs):
path = "/" . join( self.path )
if path == 'Media':
if name == 'Table':
self.first_col = True
self.path.append( name )
self.cur_data = ''
def end_element( self, name):
path = "/" . join( self.path )
if path == 'Media/Table/FixedLength/FixedColumn/Name':
self.add_seperator()
self.query += "['" + self.cur_data + "',"
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/Length':
self.query += "'length': " + self.cur_data + "}]"
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/From':
self.query += " {'from': " + self.cur_data + ","
if path == 'Media/Table':
self.query += "]"
del self.path[-1]
def char_data( self, data):
self.cur_data += data
def get_query( self ):
return self.query
r = Reader()
r.read('test.xml' )
spec = r.get_query().encode('latin-1')
print spec
f = file("C:\\test.txt", 'r')
for line in f:
line = line.splitlines()[0]
print 'Line: %s' % line
for specline in spec:
from_point = specline[1]['from']
to_point = from_point + specline[1]['length']
data = line[from_point:to_point]
name = specline[0]
print 'name: %s' % name
print 'Data: %s' % data
print 'Closing file. EOP'
f.close()
Dein Problem ist, dass specline ein String ist. Du kannst den nicht wie ein Dict behandeln. Ich würde dir vorschlagen, dass du die Informationen restmal als Dict abspeicherst...TripleH hat geschrieben:Code: Alles auswählen
[...] r = Reader() r.read('test.xml' ) spec = r.get_query().encode('latin-1') line = line.splitlines()[0] print 'Line: %s' % line for specline in spec: from_point = specline[1]['from']
Code: Alles auswählen
self.query += ...
Code: Alles auswählen
spec = [['NUMMER', {'from': 97, 'length': 4}],
['Name', {'from': 10, 'length': 8}]]
Code: Alles auswählen
def end_element( self, name):
path = "/" . join( self.path )
if path == 'Media/Table/FixedLength/FixedColumn/Name':
self.add_seperator()
self.query += "['" + self.cur_data + "',"
list.append([[self.cur_data.encode('latin-1'), {'from': 0, 'length': 0}])
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/Length':
self.query += "'length': " + self.cur_data + "}]"
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/From':
self.query += " {'from': " + self.cur_data + ","
if path == 'Media/Table':
self.query += "]"
del self.path[-1]
Code: Alles auswählen
def end_element( self, name):
path = "/" . join( self.path )
if path == 'Media/Table/FixedLength/FixedColumn/Name':
self.add_seperator()
self.n = self.cur_data.encode('latin-1')
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/From':
self.f = self.cur_data.encode('latin-1')
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/Length':
list.append([self.n, {'from': self.f, 'length': self.cur_data.encode('latin-1')}])
if path == 'Media/Table':
self.query += "]"
print self.cur_data
del self.path[-1]
def char_data( self, data):
self.cur_data += data
def get_query( self ):
return self.query
r = Reader()
r.read('test.xml' )
spec = r.get_query().encode('latin-1')
print spec
print list
f = file("C:\\test.txt", 'r')
for line in f:
line = line.splitlines()[0]
print 'Line: %s' % line
for listline in list:
print listline
from_point = listline[1]['from']
to_point = from_point + listline[1]['length']
data = line[from_point:to_point]
name = listline[0]
print 'name: %s' % name
print 'Data: %s' % data
print 'Closing file. EOP'
f.close()
Code: Alles auswählen
[['NUMMER', {'length': '5', 'from': '99'}], ['NAME', {'length': '8', 'from': '104'}], ['SKZSICH', {'length': '3', 'from': '112'}], ['DATABR', {'length': '8', 'from': '115'}]]
Code: Alles auswählen
try:
item = int( item )
except:
pass
Code: Alles auswählen
import xml.parsers.expat
import MySQLdb
list=[]
l =[]
class Reader:
def read( self, filename ):
self.query = '['
self.path = []
p = xml.parsers.expat.ParserCreate()
p.StartElementHandler = self.start_element
p.EndElementHandler = self.end_element
p.CharacterDataHandler = self.char_data
f = open("C:\\test.xml", 'r' )
p.ParseFile( f )
f.close()
def add_seperator( self ):
if not self.first_col:
self.query += ",\n "
else:
self.first_col = False
def start_element( self, name, attrs):
path = "/" . join( self.path )
if path == 'Media':
if name == 'Table':
self.first_col = True
self.path.append( name )
self.cur_data = ''
def end_element( self, name):
path = "/" . join( self.path )
if path == 'Media/Table/FixedLength/FixedColumn/Name':
self.add_seperator()
self.query += "['" + self.cur_data + "',"
self.n = self.cur_data.encode('latin-1')
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/From':
self.query += " {'from': " + self.cur_data + ","
self.f = self.cur_data.encode('latin-1')
if path == 'Media/Table/FixedLength/FixedColumn/FixedRange/Length':
self.query += "'length': " + self.cur_data + "}]"
print self.cur_data
print'hier'
print self.f
list.append([self.n, {'from':int(self.f), 'length': int(self.cur_data.encode('latin-1'))}])
if path == 'Media/Table':
self.query += "]"
del self.path[-1]
def char_data( self, data):
self.cur_data += data
def get_query( self ):
return self.query
r = Reader()
r.read('test.xml' )
spec = r.get_query().encode('latin-1')
print spec
print list
print l
f = file("C:\\test.txt", 'r')
for line in f:
data = ''
line = line.splitlines()[0]
print 'Line: %s' % line
for listline in list:
print listline
from_point = listline[1]['from']
to_point = from_point + listline[1]['length']
data = data +"'"+line[from_point:to_point]+"',"
name = listline[0]
query = 'INSERT INTO der VALUES ('+ data+')'
print query
print 'Closing file. EOP'
f.close()