wie gesagt, ich bin Python-Neuling und bin mit Version 3 gestartet. Auf dem Computer wo der Code laufen soll, ist aber nur 2.4 vorhanden. Dazu habe ich mir mal die PEP8 angesehen und nach den bisherigen Rückmeldungen, dass mein Code ziemlich viel Kauderwelsch ist, diesen versucht, etwas besser zu gestalten.
Danke an die Experten, die mal kurz drüber schauen, ob ich für python 2.4 noch was ändern sollte, außer die erste Zeile, da muss aus python3 ein python werden, wenn ich richtig liege.
Code: Alles auswählen
#!/usr/bin/python3
import os
import sys
################################################################################
# Skript um die Dateien der beendeten Jobs in den tempstore zu kopieren
################################################################################
print( "Start ..." )
################################################################################
# configure
# local version
PREFIX_PATH_TO_ROOT_DATA_DIRECTORY = "/home/joba/Desktop"
PREFIX_PATH_TO_PYTHON_DIRECTORY = "/home/joba/jobascripts/python/"
# university version
#PREFIX_PATH_TO_ROOT_DATA_DIRECTORY = ""
#PREFIX_PATH_TO_PYTHON_DIRECTORY = "/merkl/bauer/python/"
################################################################################
# grab files from directories
path = PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/"
directory_list = os.listdir( path )
for current_directory_list_item in directory_list :
if "fas.uwertcutofftxt" in current_directory_list_item:
current_msa_name = current_directory_list_item[0:6]
#print( "CurMsaName-Finished:", curMsaName )
# Kopiere die relevanten Daten in den tempstore
current_msa_uval_cutoff = current_msa_name + ".fas.uwertcutofftxt"
current_msa_full_analyse_file = current_msa_name + ".fas-curvefitmsaanalyse.txt"
current_msa_column_analyse_file = current_msa_name + ".fas-curvefitcolanalyse.csv"
os.system( "cp " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/" + current_msa_uval_cutoff + " " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/tempstore/" )
os.system( "cp " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/" + current_msa_full_analyse_file + " " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/tempstore/" )
os.system( "cp " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/" + current_msa_column_analyse_file + " " + PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/tempstore/" )
################################################################################
# read in key-value-relation: name of msa and job-index of
path = PREFIX_PATH_TO_PYTHON_DIRECTORY + "tupel-index-msaname.txt"
try:
file_with_msa_name_to_job_id_relation = open( path )
except:
print( "Problem beim Lesen der Zuordnungsdatei!" )
sys.exit(0)
dictionary_of_msa_name_job_id_relation = {}
for current_line in file_with_msa_name_to_job_id_relation :
line_tokens = current_line.split( ";" )
msa_job_id = line_tokens[0]
msa_name = line_tokens[1]
dictionary_of_msa_name_job_id_relation[ msa_name ] = msa_job_id
file_with_msa_name_to_job_id_relation.close()
################################################################################
# extract data out of file-name
list_of_msa_job_ids = []
list_of_msa_names = []
path = PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/tempstore/"
directory_list = os.listdir( path )
for current_directory_list_item in directory_list :
if "uwertcutofftxt" in current_directory_list_item :
current_msa_name = current_directory_list_item[0:6]
if current_msa_name in dictionary_of_msa_name_job_id_relation :
current_msa_index = dictionary_of_msa_name_job_id_relation[ current_msa_name ]
else:
current_msa_index = "FEHLER"
list_of_msa_job_ids.append( current_msa_index )
list_of_msa_names.append( current_msa_name )
################################################################################
# collect data of data files
dictionary_for_all_msas = {}
number_of_file_elements = len( list_of_msa_job_ids )
run_index = 0
while ( run_index < number_of_file_elements ):
current_msa_name = list_of_msa_names[ run_index ]
current_path_to_msa_data_file = PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/" + current_msa_name + ".fas-curvefitmsaanalyse.txt"
# analyse data file contents of msa-summary ... look for possible tags to store
current_msa_data_file_pointer = open( current_path_to_msa_data_file )
for current_line in current_msa_data_file_pointer :
if ( current_line ) :
current_tokens_of_line = current_line.split( " " )
if current_tokens_of_line[0] == "P025DROPCOUNT" :
current_tag_for_storage = current_msa_name + "-P025DROPCOUNT"
current_value_to_tag = current_tokens_of_line[1]
dictionary_for_all_msas[ current_tag_for_storage ] = current_value_to_tag
# analyse data file contents of all msa-columns ... big file
current_path_to_msa_columns_data_file = PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/h2r/cfa500/analyses/" + current_msa_name + ".fas-curvefitcolanalyse.csv"
# MsaDatenColFile ... Sequenzen ermitteln
current_msa_columns_data_file_pointer = open( current_path_to_msa_columns_data_file )
inner_run_index = 0
for inner_current_line in current_msa_columns_data_file_pointer :
if ( inner_current_line ) :
if inner_run_index == 0 :
inner_current_tokens_of_line = inner_current_line.split( "," )
inner_current_msa_name_pre = inner_current_tokens_of_line[0]
inner_current_msa_name = inner_current_msa_name_pre[0:6]
inner_current_tag_for_storage = inner_current_msa_name + "-MSANUMBEROFSEQS"
inner_current_value_to_tag = inner_current_tokens_of_line[1]
dictionary_for_all_msas[ inner_current_tag_for_storage ] = inner_current_value_to_tag
inner_run_index = inner_run_index + 1
run_index = run_index + 1
current_msa_columns_data_file_pointer.close()
current_msa_data_file_pointer.close()
################################################################################
# build data structure for output matrix
# index; MsaName; RunScriptIndex; Sequenzen; Spalten; DropCount;
output_data_matrix = []
run_index = 0
number_of_elements = len( list_of_msa_job_ids )
while run_index < number_of_elements :
current_matrix_row = []
current_matrix_row_counter = run_index
current_msa_name = list_of_msa_names[ run_index ]
current_msa_job_id = list_of_msa_job_ids[ run_index ]
current_msa_number_of_sequences_pre = "" + current_msa_name + "-MSANUMBEROFSEQS"
current_msa_number_of_sequences = dictionary_for_all_msas[ current_msa_number_of_sequences_pre ]
current_msa_number_of_kstest_drops_p025_tag = "" + current_msa_name + "-P025DROPCOUNT"
current_msa_number_of_kstest_drops_p025_value = dictionary_for_all_msas[ current_msa_number_of_kstest_drops_p025_tag ]
current_matrix_row.append( current_matrix_row_counter )
current_matrix_row.append( current_msa_name )
current_matrix_row.append( current_msa_job_id )
current_matrix_row.append( current_msa_number_of_sequences.strip() )
current_matrix_row.append( current_msa_number_of_kstest_drops_p025_value.strip() )
output_data_matrix.append( current_matrix_row )
run_index = run_index + 1
print( "Matrix ....\n" )
print( output_data_matrix )
################################################################################
# print output
try :
output_file_pointer = open( PREFIX_PATH_TO_ROOT_DATA_DIRECTORY + "/merkl/bauer/tempstore/result.txt", "w+" )
except:
print( "Oeffnen zum Schreiben schlug fehl!" )
sys.exit( 0 )
for output_data_row in output_data_matrix :
for matrix_line_token in output_data_row :
output_file_pointer.write( str( matrix_line_token ) + ";" )
output_file_pointer.write( "\n" )
output_file_pointer.close()
print( "... Ende!" )