Vor allem brauche ich 3 Funktionen.
select_columns( Spaltennamen )
select_rows( Bedingung)
uniq ( Spaltennamen) // soll alle Kombinationen von Spalten ausgeben
Ich habe hier meine erste rein sequentielle Implementierung soweit fertig und würde gerne nette Kritik hören
( ich lerne python eben erst und weiß dass weder Design noch Implementierung perfekt sind, deswegen bitte keinen Herzinfarkt bekommen und nett bleiben ^^)
Das ganze lässt sich folgedermaßen bedienen:
Code: Alles auswählen
from pm_utils import *
pmd = pmd_handler( Dateipfad )
pmd.select_columns("x,y,z").select_rows("time > 10 and mode =='speed'").dump( neuerName )
Code: Alles auswählen
# -*- coding: utf-8 -*-
import os
import re
import ast
import tempfile
pattern_number_of_cols = re.compile( "^# cols=" )
pattern_names_of_cols = re.compile( "^# [\w \t]*")
pattern_simple_comment = re.compile( "^#" )
pattern_number = re.compile( "^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$" )
use_separator = "\t"
class pmd_handler:
def __init__(self, pmd = None):
self.col_names = None
self.number_of_cols = None
self.cols = None
self.map_name_id = None
self.pmd = None
if pmd != None:
self.set_pmd(pmd)
def set_pmd(self, path):
if os.path.isfile(path):
file = open(path)
number_of_cols = None
names_of_cols = None
for line in file:
if number_of_cols == None and pattern_number_of_cols.match(line):
number_of_cols = int( line[7:].strip() )
if names_of_cols == None and pattern_names_of_cols.match(line) and not pattern_number_of_cols.match(line):
names_of_cols = [ col.strip() for col in line[1:].split() ]
file.close()
if number_of_cols == None or names_of_cols == None:
raise ValueError("Keine gültige pmd-Datei")
if len(names_of_cols) != number_of_cols:
raise ValueError("Keine gültige pmd-Datei")
cols = []
for i in range(number_of_cols):
cols.append( (i, names_of_cols[i]))
cols = tuple(cols)
map_name_id = {}
for k in sorted(dict(cols).keys()):
map_name_id[dict(cols)[k]] = k
self.col_names = names_of_cols
self.number_of_cols = number_of_cols
self.cols = cols
self.map_name_id = map_name_id
self.pmd = path
def _enter(self):
file = open(self.pmd)
return file
def _exit(self,file):
file.close()
def uniq( self, colnames ):
uniq = []
if isinstance(colnames, str):
colnames = colnames.split(",")
colnames = [ x.strip() for x in colnames]
indices = []
for name in colnames:
indices.append( self.map_name_id[name] )
file = self._enter()
for line in file:
if pattern_simple_comment.match(line):
continue
current = []
line = line.split()
for i in indices:
current.append( line[i] )
if current not in uniq:
uniq.append(current)
self._exit(file)
return uniq
def select_columns(self, colnames):
file = self._enter()
if isinstance(colnames, str):
colnames = colnames.split(",")
colnames = [ x.strip() for x in colnames]
header_number_of_cols = "# cols= {0}".format(len(colnames))
header_names_of_cols = "# "
for name in colnames:
header_names_of_cols = header_names_of_cols + name + use_separator
tmp = tempfile.TemporaryFile()
tmp.write( header_number_of_cols + "\n")
tmp.write( header_names_of_cols + "\n")
indices = []
for name in colnames:
indices.append( self.map_name_id[name] )
for line in file:
if pattern_simple_comment.match(line):
continue
line = line.split()
for i in indices:
tmp.write( line[i] + use_separator )
tmp.write("\n")
self._exit(file)
handler = temporary_pmd_handler(tmp)
return handler
def select_rows(self, condition):
indices = []
var = []
expression = ast.parse(condition, mode="eval")
class Transformer(ast.NodeTransformer):
def visit_Name(self,node):
indices.append(node.id)
return ast.copy_location(ast.Subscript( value =ast.Name(id="var", ctx=ast.Load()), slice = ast.Index( value= ast.Num(len(indices)-1)), ctx = node.ctx), node)
Transformer().visit(expression)
expression = ast.fix_missing_locations(expression)
code = compile(expression, "<string>", mode="eval")
indices = [ self.map_name_id[x] for x in indices]
#class v(ast.NodeVisitor):
# def generic_visit(self,node):
# print type(node).__name__
# ast.NodeVisitor.generic_visit(self,node)
# def visit_Name(self,node):
# print "Name: ", node.id
# def visit_Num(self,node):
# print "Num: ", node.n
#v().visit(expression)
file = self._enter()
tmp = tempfile.TemporaryFile()
for line in file:
if pattern_number_of_cols.match(line):
tmp.write(line)
if pattern_names_of_cols.match(line) and not pattern_number_of_cols.match(line):
tmp.write(line)
file.seek(0)
for line in file:
if not pattern_simple_comment.match(line):
splitline = line.split()
var = []
x = []
for i in indices:
x.append(splitline[i])
for item in x:
if pattern_number.match(item):
var.append(float(item))
else:
var.append(item)
if eval(code) == True:
tmp.write(line)
self._exit(file)
handler = temporary_pmd_handler(tmp)
return handler
def dump(self, path):
file = self._enter()
out = open(path, "w")
for line in file:
out.write(line)
out.close()
self._exit(file)
return pmd_handler(path)
class temporary_pmd_handler(pmd_handler):
def __init__(self, tmpfile= None ):
self.col_names = None
self.cols = None
self.map_name_id = None
self.number_of_cols = None
if tmpfile != None:
self.set_pmd(tmpfile)
def set_pmd(self, tmpfile):
file = tmpfile
file.seek(0)
number_of_cols = None
names_of_cols = None
for line in file:
if number_of_cols == None and pattern_number_of_cols.match(line):
number_of_cols = int( line[7:].strip() )
if names_of_cols == None and pattern_names_of_cols.match(line) and not pattern_number_of_cols.match(line):
names_of_cols = [ col.strip() for col in line[1:].split() ]
if number_of_cols == None or names_of_cols == None:
raise ValueError("Keine gültige pmd-Datei")
if len(names_of_cols) != number_of_cols:
raise ValueError("Keine gültige pmd-Datei")
cols = []
for i in range(number_of_cols):
cols.append( (i, names_of_cols[i]))
cols = tuple(cols)
map_name_id = {}
for k in sorted(dict(cols).keys()):
map_name_id[dict(cols)[k]] = k
self.col_names = names_of_cols
self.number_of_cols = number_of_cols
self.cols = cols
self.map_name_id = map_name_id
self.pmd = tmpfile
def _enter(self):
self.pmd.seek(0)
return self.pmd
def _exit(self, file):
pass
""" Nicht in KLasse """
def is_valid_pmd_file(file):
if not os.path.isfile(file):
return False
file = open(file)
number_of_cols = None
names_of_cols = None
for line in file:
if number_of_cols == None and pattern_number_of_cols.match(line):
number_of_cols = int( line[7:].strip() )
if names_of_cols == None and pattern_names_of_cols.match(line) and not pattern_number_of_cols.match(line):
names_of_cols = [ col.strip() for col in line[1:].split() ]
file.close()
if number_of_cols == None or names_of_cols == None:
return False
if len(names_of_cols) != number_of_cols:
return Falsie
return True
(Es werden noch einige Fragen dazu kommen ^^)
Aber im Moment werde ich wohl damit arbeiten