Re: Logfile in eine MSSQL Datenbank einlesen
Verfasst: Mittwoch 26. Juni 2019, 14:27
stimmt, das ist besser. Wobei insert_entry auch noch umgeschrieben ist 

Seit 2002 Diskussionen rund um die Programmiersprache Python
https://www.python-forum.de/
Was hab ich übersehen bzw. falsch gemacht?Traceback (most recent call last):
File "./python/SQL/test.py", line 105, in <module>
main()
File "./python/SQL/test.py", line 100, in main
process_file(log_filename)
File "./python/SQL/test.py", line 86, in process_file
for entry in parse_log(lines):
File "./python/SQL/test.py", line 61, in parse_log
for line in lines:
File "/usr/lib/python3.5/codecs.py", line 321, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xdc in position 3939: invalid continuation byte
Code: Alles auswählen
#!/usr/bin/env python3
#
import os
import sys
import re
from collections import namedtuple
from contextlib import closing
from datetime import datetime as DateTime
from datetime import date
import fnmatch
import pymssql
LOG_PATH = '/var/log/ESI/ESI2'
LOG_PATH2 = '/mnt/ESI2/ESI/F1/Log'
LOG_FILENAME_TEMPLATE = os.path.join(LOG_PATH, 'F1log_{}.txt')
LOG_FILENAME_TEMPLATE2 = os.path.join(LOG_PATH2, 'F1log_{}.txt')
COMMON_PATTERN = r'^(\d+) (?P<zeit>\d{2}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}) '
MAIN_RE = re.compile(COMMON_PATTERN + r'(?P<prom>\d{8}) (?P<message>.{14}) .+ N00 (?P<did>\d{4}) (?P<rufnummer>\d{8,12})? .+ ?SITE=(?P<site>\d{8,10})')
ERROR_RE = re.compile(COMMON_PATTERN + r'ISDN Fehlanruf C.00 (?P<did>\d{4}) (?P<rufnummer>\d{8,12})? ?Diag:(?P<message>[A-Z ]{14,18})')
Main = namedtuple('Main', 'zeit prom message did rufnummer site')
Error = namedtuple('Error', 'zeit prom message did rufnummer site')
server = 'localhost'
user = 'sa'
password = 'XXXXXXXXX'
database = 'F1'
def select_file():
files = []
for file in os.listdir(LOG_PATH):
if fnmatch.fnmatch(file, 'F1log_*.txt'):
files.append(file[6:16])
data = sorted(files)
return data
def get_common(match):
zeit = DateTime.strptime(match.group('zeit'), '%d/%m/%y %H:%M:%S')
did = int(match.group('did'))
rufnummer = match.group('rufnummer')
return (zeit, did, rufnummer)
def parse_log(lines):
for line in lines:
match = MAIN_RE.match(line) or ERROR_RE.match(line)
if match:
groups = match.groupdict()
zeit, did, rufnummer = get_common(match)
message = groups.get('message')
prom = groups.get('prom')
site = groups.get('site')
yield Main(zeit, prom, message, did, rufnummer, site)
def insert_entry(connection, entry):
with closing(connection.cursor()) as cursor:
cursor.execute(
'INSERT INTO main (zeit, prom, message, did, rufnummer, site)'
'VALUES (%s, %s, %s, %s, %s, %s)',
entry
)
def process_file(log_filename):
with open (log_filename) as lines:
connection = pymssql.connect(
server, user, password, database
)
with closing(connection):
for entry in parse_log(lines):
try:
insert_entry(connection, entry)
except pymssql.IntegrityError:
pass
except:
connection.rollback()
raise
else:
connection.commit()
def main():
if len(sys.argv) >= 2 and sys.argv[1] == 'archive':
for Date in select_file():
log_filename = LOG_FILENAME_TEMPLATE.format(Date)
print(log_filename)
process_file(log_filename)
elif len(sys.argv) >= 2:
Date = sys.argv[1]
log_filename = LOG_FILENAME_TEMPLATE.format(Date)
print(log_filename)
process_file(log_filename)
else:
Date = date.today()
log_filename = LOG_FILENAME_TEMPLATE2.format(Date)
print(log_filename)
process_file(log_filename)
if __name__ == '__main__':
main()