Code: Alles auswählen
def generate_filename(symbol, year, week):
return os.path.join("data", symbol, str(year), '{}.csv.gz'.format(week))
Code: Alles auswählen
def generate_filename(symbol, year, week):
return os.path.join("data", symbol, str(year), '{}.csv.gz'.format(week))
Code: Alles auswählen
# Start date (Year, Month, Day)
start_date = datetime.date(2015, 1, 4)
# End date (Year, Month, Day)
end_date = datetime.date(2016, 2, 9)
Die Codezeilen oben stören mich. Geht es vielleicht innerhalb der Funktion?
def fetch_time_period(symbol, start_date, end_date):
for week in range(start_date.isocalendar()[1], end_date.isocalendar()[1]):
year = str(start_date.isocalendar()[0])
if not exists_file(symbol, year, week):
data = pull_file(symbol, year, week)
print_data_length(data)
save_file(symbol, year, week, data)
else:
print("File for {}/{}/{} already fetched.".format(symbol, year, week))
Code: Alles auswählen
def fetch_time_period(symbol, start_date, end_date):
for week in range(datetime.date(start_date).isocalendar()[1], datetime.date(end_date).isocalendar()[1]):
year = str(start_date.isocalendar()[0])
if not exists_file(symbol, year, week):
data = pull_file(symbol, year, week)
print_data_length(data)
save_file(symbol, year, week, data)
else:
print("File for {}/{}/{} already fetched.".format(symbol, year, week))
def main():
# "symbol", "year, month, day", "year, month, day"
fetch_time_period("symbol", "2015,1,4", "2016, 3, 4")
if __name__ == '__main__':
main()
Code: Alles auswählen
import datetime as dt
def daterange(from_, to, step):
current = from_
while current < to:
yield current
current += step
def main():
for point in daterange(dt.datetime.now(), dt.datetime.now() + dt.timedelta(days=10), dt.timedelta(days=1)):
print(point)
if __name__ == '__main__':
main()
Code: Alles auswählen
datetime.date(start_date).isocalendar()[1]
Code: Alles auswählen
python TickDataCsv27.py
133135278
data/Alpha/2017/1.csv.gz
161435294
data/Alpha/2017/2.csv.gz
145706620
data/Alpha/2017/3.csv.gz
124924322
data/Alpha/2017/4.csv.gz
135177334
data/Alpha/2017/5.csv.gz
95338134
data/Alpha/2017/6.csv.gz
125369326
data/Alpha/2017/7.csv.gz
122297816
data/Alpha/2017/8.csv.gz
138437782
data/Alpha/2017/9.csv.gz
122760250
data/Alpha/2017/10.csv.gz
Traceback (most recent call last):
File "TickDataCsv27.py", line 88, in <module>
main()
File "TickDataCsv27.py", line 81, in main
fetch_whole_year("Alpha", 2017)
File "TickDataCsv27.py", line 60, in fetch_whole_year
data = pull_file(symbol, year, week)
File "TickDataCsv27.py", line 19, in pull_file
response = urllib2.urlopen(url)
File "/usr/lib64/python2.7/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib64/python2.7/urllib2.py", line 437, in open
response = meth(req, response)
File "/usr/lib64/python2.7/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib64/python2.7/urllib2.py", line 475, in error
return self._call_chain(*args)
File "/usr/lib64/python2.7/urllib2.py", line 409, in _call_chain
result = func(*args)
File "/usr/lib64/python2.7/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
Code: Alles auswählen
def fetch_time_period(symbol, start_date, end_date):
for week in rrule.rrule(rrule.WEEKLY, dtstart = datetime.start_date, until = datetime.end_date):
if not exists_file(symbol, year, week):
data = pull_file(symbol, year, week)
print_data_length(data)
save_file(symbol, year, week, data)
else:
print("File for {}/{}/{} already fetched.".format(symbol, year, week))
def main():
fetch_time_period("Alpha", "2015,1,4", "2016, 3, 4")
if __name__ == '__main__':
main()
Code: Alles auswählen
Traceback (most recent call last):
File "TickDataCsv27.py", line 92, in <module>
main()
File "TickDataCsv27.py", line 89, in main
fetch_time_period("Alpha", "2015,1,4", "2016, 3, 4")
File "TickDataCsv27.py", line 60, in fetch_time_period
for week in rrule.rrule(rrule.WEEKLY, dtstart = datetime.start_date, until = datetime.end_date):
AttributeError: 'module' object has no attribute 'start_date'
Code: Alles auswählen
# Funktion noch nicht funktionsfähig.
def fetch_time_period(symbol, start_date, end_date):
for week in rrule.rrule(rrule.WEEKLY, dtstart = start_date, until = end_date):
if not exists_file(symbol, year, week):
data = pull_file(symbol, year, week)
print_data_length(data)
save_file(symbol, year, week, data)
else:
print("File for {}/{}/{} already fetched.".format(symbol, year, week))
Code: Alles auswählen
Traceback (most recent call last):
File "TickDataCsv27.py", line 92, in <module>
main()
File "TickDataCsv27.py", line 89, in main
fetch_time_period("Alpha", "2015,1, 4", "2016, 3, 4")
File "TickDataCsv27.py", line 60, in fetch_time_period
for week in rrule.rrule(rrule.WEEKLY, dtstart = start_date, until = end_date):
File "/usr/lib/python2.7/site-packages/dateutil/rrule.py", line 240, in __init__
dtstart = datetime.datetime.fromordinal(dtstart.toordinal())
AttributeError: 'str' object has no attribute 'toordinal'
Code: Alles auswählen
def fetch_time_period(symbol, start_date, end_date):
# start_date sei: "2015, 1, 4"
start_date = datetime.date(*map(int, start_date.split(',')))
# selbiges mit end_date:
...
for week in rrule.rrule(rrule.WEEKLY, dtstart=start_date, until=end_date):
...
Code: Alles auswählen
datetime.date(start_date).isocalendar()[1]
Code: Alles auswählen
Traceback (most recent call last):
File "TickDataCsv27.py", line 81, in <module>
main()
File "TickDataCsv27.py", line 76, in main
fetch_whole_year(symbol, 2015)
File "TickDataCsv27.py", line 53, in fetch_whole_year
print_data_length(data)
File "TickDataCsv27.py", line 28, in print_data_length
data = f.read()
File "/usr/lib/python2.7/gzip.py", line 261, in read
self._read(readsize)
File "/usr/lib/python2.7/gzip.py", line 320, in _read
self._add_read_data( uncompress )
File "/usr/lib/python2.7/gzip.py", line 338, in _add_read_data
self.extrabuf = self.extrabuf[offset:] + data
MemoryError
Ich habe ein Gitlab Repo angelegt. Hilfe, Tipps, Tricks und konstruktive Kritik sind sehr willkommen.@aaron: Pause machen hilft nicht wenn man den Speicher voll müllt und nach der Pause damit weiter macht. Es sieht so aus als wenn Du mehr als eine dieser Dateien gleichzeitig entpackt im Speicher hältst, das ist/wird dann halt problematisch. 5 Dateien entpackt sind dann 500 bis 650 MiB, plus vielleicht die ungepackten Daten — wir wissen ja nicht was Du da genau machst.