Das deutsche Python-Forum

Hallo,
folgendes. Ich habe einen Timestring und ändere ihn wie folgt.

time=time.replace("Jan", "01")
time=time.replace("Feb", "02")
time=time.replace("Mar", "03")
time=time.replace("Apr", "04")
time=time.replace("May", "05")
...
dat64=pandas.to_datetime(time, format='%d-%b-%Y %H:%M:%S')

Beim ersten durchlauf im Interaktiven Compilor läuft alles ohne Probleme.
Führe ich allerdings das Skribt ein zweites Mal durch, so folgt dieser Fehler:

Code: Alles auswählen

<ipython-input-2-8b0ae46ec74e> in plot_xy_2017()
    106                 time=time.replace("Dec", "12")
    107 
--> 108                 dat64=pandas.to_datetime(time, format='%d-%b-%Y %H:%M:%S')
    109                 timenew=dat64.dt.to_pydatetime()
    110 

/home/samothkociok/.conda/envs/thomypy/lib/python3.4/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
    507     elif isinstance(arg, ABCSeries):
    508         from pandas import Series
--> 509         values = _convert_listlike(arg._values, False, format)
    510         result = Series(values, index=arg.index, name=arg.name)
    511     elif isinstance(arg, (ABCDataFrame, MutableMapping)):

/home/samothkociok/.conda/envs/thomypy/lib/python3.4/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
    445                 return DatetimeIndex._simple_new(values, name=name, tz=tz)
    446             except (ValueError, TypeError):
--> 447                 raise e
    448 
    449     if arg is None:

/home/samothkociok/.conda/envs/thomypy/lib/python3.4/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
    412                     try:
    413                         result = tslib.array_strptime(arg, format, exact=exact,
--> 414                                                       errors=errors)
    415                     except tslib.OutOfBoundsDatetime:
    416                         if errors == 'raise':

pandas/_libs/tslib.pyx in pandas._libs.tslib.array_strptime (pandas/_libs/tslib.c:63619)()

ValueError: time data '01-Mar-2017 06:00:00' does not match format '%d-%b-%Y %H:%M:%S' (match)

Woran liegt das?

@Samoth: was heißt "beim Zweiten mal"? Kannst Du ein minimales vollständiges Beispiel mit Daten zeigen, die das Problem reproduziert?

Der Fehler entsteht beim zweiten Aufruf der Schleife. Im Grunde ganz unten wenn die Funktion plot_XY das zweite Mal aufgerufen wird.

Gruß

Code: Alles auswählen

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Dieses Programm plottet die Bereits vorsortierten METAR Daten
# Die Metardaten wurden bereits ausgewertet und als Tabelle abgespeichert.
# Eingelesen werden .csv FIles.
##########################################################################

#Definition der verwendeten Packete
import glob
import pandas
from os.path import join
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime 
import os
import numpy as np
from matplotlib.pyplot import cm 

##############################################################################
#Definitionsblock, Veränderung des Save and Rootpath
##############################################################################
#ROOTpfad
ROOT = "/home/samothkociok/Desktop/Plots/METAR"
SAVEPATH = '/home/samothkociok/Desktop/Plots/METAR/XY_PLOT/'






pattern=".csv" #unterstützte FIleart
##############################################################################
#Programmstart
##############################################################################
print('##############################################################################')
print('#Start script METAR plot')
print('##############################################################################')
print('Datapath: ',ROOT)

##############################################################################
#Extrahieren der Ordnerstrukur aus METAR/.... (Eine Unterebene erlaubt)
##############################################################################
def folder_objects(ROOT, otype = "all"):
    if (os.path.exists(ROOT) == False or
        os.path.isdir(ROOT) == False or
        os.access(ROOT, os.R_OK) == False):
        return False
    else:
        objects = os.listdir(ROOT)
        result = []
        for objectname in objects:
            objectpath = ROOT + "/" + objectname
            if (otype == "all" or
                (otype == "dir"  and os.path.isdir(objectpath)  == True) or
                (otype == "file" and os.path.isfile(objectpath) == True) or
                (otype == "link" and os.path.islink(objectpath) == True)):
                result.append(objectname)
        result.sort()
        return result

##############################################################################
#Plotfunktion
##############################################################################
def plot_xy_2017():
	#Einlesen aller gefunenen Files -> Einspeichern in Liste
	fns = glob.glob(join(landordner, "*.csv"))[:]
	#print(fns[:])

	anzahl_fall=0
	panzahl=1
	daten_anzahl=0
	for xx in range(len(fns)):
		data_str =fns[xx]
		if xx==0:
			print('The following data has been found:')
		print(data_str)
		daten_anzahl+=1

	print('Founded stationumber: ',daten_anzahl)
	del daten_anzahl

	for xx in range(len(fns)):
		#Header Extrahieren
		temp=pandas.read_csv(fns[xx], delimiter=',')
		
		readfile=fns[xx]
		index=readfile.find(pattern)
		index=index-10				#länge des Namens DAAD_Metar = 10
		airp_name=readfile[index:index+4]
		print('Plot:', airp_name)	
	
		print('Replace month string with value.')
		time=temp['Var1']
		time=time.replace("Jan", "01")
		time=time.replace("Feb", "02")
		time=time.replace("Mar", "03")
		time=time.replace("Apr", "04")
		time=time.replace("May", "05")
		time=time.replace("Jun", "06")
		time=time.replace("Jul", "07")
		time=time.replace("Aug", "08")
		time=time.replace("Sep", "09")
		time=time.replace("Oct", "10")
		time=time.replace("Nov", "11")
		time=time.replace("Dec", "12")
	
		dat64=pandas.to_datetime(time, format='%d-%b-%Y %H:%M:%S')
		timenew=dat64.dt.to_pydatetime()
	
		wind_dir=temp['Daten_1']
		wind_spee=temp['Daten_2']
		view=temp['Daten_3']
		temp_dry=temp['Daten_4']
		temp_wet=temp['Daten_5']
		pressure=temp['Daten_6']
		cover=temp['Daten_7']
		cl_base=temp['Daten_8']
		cl_art=temp['Daten_9']		#Wert 1 = CB, WErt 0 Towering C
		#view[view == 9999] = 10000

		Metar=[wind_dir, wind_spee, view,temp_dry, temp_wet, pressure, cover, cl_base, cl_art]

		print('Creat subplot pattern.')

		years = mdates.YearLocator()   # every year
		months = mdates.MonthLocator()  # every month
		days = mdates.DayLocator()
		monthFmt = mdates.DateFormatter('%m')

		fig = plt.figure(figsize=(18, 16))
		st = fig.suptitle('Airport: '+airp_name +', Year: 2017', fontsize="x-large")
		ax1 = fig.add_subplot(321)
		ax1.set_title('Visibility')
		ax1.plot(timenew, view, marker='+')
		#plt.gcf().autofmt_xdate()
		plt.ylabel('m')
		plt.xlabel('Months in 2017')
		ax1.xaxis.set_major_locator(months)
		ax1.xaxis.set_major_formatter(monthFmt)
		ax1.xaxis.set_minor_locator(days)

		ax4 = fig.add_subplot(322)
		ax4.set_title('Temperature')
		ax4.plot(timenew, temp_dry , color='red')
		ax4.plot(timenew, temp_wet, color='blue')
		plt.ylabel('°C')
		plt.xlabel('Months in 2017')
		plt.legend(['Dry', 'Wet'])
		ax4.xaxis.set_major_locator(months)
		ax4.xaxis.set_major_formatter(monthFmt)
		ax4.xaxis.set_minor_locator(days)
		
		ax2 = fig.add_subplot(323)
		ax2.set_title('Winddirection')
		ax2.scatter(timenew, wind_dir, marker='o')
		plt.ylabel('Degree')
		plt.xlabel('Months in 2017')
		plt.ylim(ymax=360, ymin=0)
		ax2.xaxis.set_major_locator(months)
		ax2.xaxis.set_major_formatter(monthFmt)
		ax2.xaxis.set_minor_locator(days)

		ax3 = fig.add_subplot(324)
		ax3.set_title('Windspeed')
		ax3.plot(timenew, wind_spee, marker='+')
		plt.ylabel('m/s')
		plt.xlabel('Months in 2017')
		ax3.xaxis.set_major_locator(months)
		ax3.xaxis.set_major_formatter(monthFmt)
		ax3.xaxis.set_minor_locator(days)

		ax5 = fig.add_subplot(325)
		ax5.set_title('Cloud cover')
		ax5.scatter(timenew, cover, marker='+')
		plt.ylabel('Fraction')
		plt.xlabel('Months in 2017')
		plt.ylim(ymax=1.0, ymin=0.0)	
		ax5.xaxis.set_major_locator(months)
		ax5.xaxis.set_major_formatter(monthFmt)
		ax5.xaxis.set_minor_locator(days)

		ax6 = fig.add_subplot(326)
		ax6.set_title('Cloud base')
		ax6.plot(timenew, cl_base, marker='+')
		plt.ylabel('m')
		plt.xlabel('Months in 2017')
		ax6.xaxis.set_major_locator(months)
		ax6.xaxis.set_major_formatter(monthFmt)
		ax6.xaxis.set_minor_locator(days)

		plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25, wspace=0.15)

		print('DONE.')

		if not os.path.exists(SAVEPATH):
			print('Creat new save folder.')
			os.makedirs(SAVEPATH)
			print('DONE.')	

		print('The plots are saved in ', SAVEPATH)
	
		plt.savefig(SAVEPATH+airp_name+'_overview_2017.jpeg')

		print('DONE.')
		plt.clf()   # Clear figure
		

	        del panzahl, time

##############################################################################
#Aufruf der Funktion, von oben, Durchsuchen aller Verzeichnisse und Plotten
##############################################################################
#dirname = os.environ['HOME'] + "/Python"
print("All files: ", folder_objects(ROOT))
print("Folder:    ", folder_objects(ROOT, "dir"))
folder=folder_objects(ROOT, "dir")
print("Datafiles: ", folder_objects(ROOT, "file"))
#print("Symlinks:", folder_objects(ROOT, "link"))
#for Ordnerzahl in range(len(folder)):
for Ordnerzahl in range(1):
	landordner = ROOT+'/'+folder[Ordnerzahl]
	plot_xy_2017()

print('------------------------------------------')
print('-------------Skript finished--------------')
print('------------------------------------------')

@Samoth: reduzier bitte das Programm auf das Nötigste, so dass der Fehler noch auftritt. Da sind bestimmt die ganzen Plots nicht wichtig?

Generell wird immer mit 4 Leerzeichen pro Ebene eingerückt, Pfade setzt man mit os.path.join zusammen (wird zwar importiert, aber nicht konsequent benutzt), über Listen iteriert man direkt, `del` auf Variablen ist immer unsinnig, Lebensdauern von Variablen regelt man über Funktionskontexte. Apropos Funktionen, plot_xy_2017 ist viel zu lang, und sollte in mehrere Funktionen aufgeteilt werden, zumal die Plots sehr viele Wiederholungen haben. `Founded` heißt `gegründet`.

Ok, danke für die Inspiration, werde das gleich mal machen.
Ich habe den Fehler auf diesen Bereich eingrenzen können.

Lösst man das nun über eine Schleife laufen:

Code: Alles auswählen

fns = glob.glob(join(landordner, "*.csv"))[:]
for xx in range(len(fns)):
	#Header Extrahieren
	temp=pandas.read_csv(fns[xx], delimiter=',')
	
	print('Replace month string with value.')
	time=temp['Var1']
	time=time.replace("Jan", "01")
	time=time.replace("Feb", "02")
	time=time.replace("Mar", "03")
	time=time.replace("Apr", "04")
	time=time.replace("May", "05")
	time=time.replace("Jun", "06")
	time=time.replace("Jul", "07")
	time=time.replace("Aug", "08")
	time=time.replace("Sep", "09")
	time=time.replace("Oct", "10")
	time=time.replace("Nov", "11")
	time=time.replace("Dec", "12")
	
	dat64=pandas.to_datetime(time, format='%d-%b-%Y %H:%M:%S')

n=0: es wird alles wie gedacht gemacht.
bei n=1:
TypeError: Unrecognized value type: <class 'str'>

Gruß

@Samoth: Mal abgesehen von so einigen Anti-Pattern: wo tritt der Fehler genau auf, in der letzten Zeile? Kann es sein, dass die gelesenen Dateien keinen einheitlichen Aufbau, bzw. Inhalt haben? Wenn Dein Programm die erste Datei bearbeiten kann (xx gleich 0, und nicht n!), die zweite aber nicht, dann ist das ein Indiz dafür. Oder meinst Du tatsächlich n und zeigst nicht passenden Code?

also gut... nochmals genauer.. vieles konnte ich nun rausstreichen. Da ich das so nicht brauche.
Das problem liegt nicht an der gedachten Stelle, sondern hat etwas mit dem plotten zu tun.
Führe ich das den makierten Code immer wieder aus (doch kopie und paste in der shell) so bekomme ich keinen fehler.
Auch wenn ich den range auf die gewünschte größe ändere funktioniert alles.

WIrd anschließsend das ax1.plot ausgeführt, so kann es nur einmalig ausegführt werden. Beim zweiten mal (ohne zuvor die shell zu schließen)
bekomme ich den bereits oben diskutierten Fehler. Ist es nun ein wenig präziser?

Code: Alles auswählen

#HIER---------------------------------------------------------------
for Ordnerzahl in range(1):
	landordner = ROOT+'/'+folder[Ordnerzahl]
	land=folder[Ordnerzahl]
	fns = glob.glob(join(landordner, "*.csv"))[:]
	for xx in range(len(fns)):
		temp=pandas.read_csv(fns[xx], delimiter=',')
		dat64=pandas.to_datetime(temp['Var1'], format='%d-%b-%Y %H:%M:%S')
		timenew=dat64.dt.to_pydatetime()

		wind_dir=temp['Daten_1']
		wind_spee=temp['Daten_2']
		view=temp['Daten_3']
		temp_dry=temp['Daten_4']
		temp_wet=temp['Daten_5']
		pressure=temp['Daten_6']
		cover=temp['Daten_7']
		cl_base=temp['Daten_8']
		cl_art=temp['Daten_9']		#Wert 1 = CB, WErt 0 Towering C
		#view[view == 9999] = 10000

		print('Creat subplot pattern.')

		years = mdates.YearLocator()   # every year
		months = mdates.MonthLocator()  # every month
		days = mdates.DayLocator()
		monthFmt = mdates.DateFormatter('%m')[/b]
#HIER---------------------------------------------------------------
		fig = plt.figure(figsize=(18, 16))
		st = fig.suptitle('Airport: '+'test' +', Year: 2017', fontsize="x-large")
		ax1 = fig.add_subplot(321)
		ax1.set_title('Visibility')
		ax1.plot(timenew, view, marker='+')

Nach rumprobieren... habe ich rausgefunden, dass der Loop nach Beendigung nicht komplett freigegeben wird... kann das sein?

Gruß

So fehler gefunden:
#dat64=pandas.to_datetime(temp['Var1'], format='%d-%b-%Y %H:%M:%S')
dat64=pandas.to_datetime(temp['Var1'])

Ohne die Angabe des Formats gehts.

Das deutsche Python-Forum

Fehler beim erneuten ausführen eines Skribtes

Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes

Re: Fehler beim erneuten ausführen eines Skribtes