Hallo @__deets__ & all nochmal
es funktioniert jetzt mit dem Download.
Allerdings sehe ich, dass der Dateiname nicht 1 zu 1 der selbe ist.
Die Originaldatei heißen Beispielsweise "490---1.JPG" "490---2.JPG" usw
Heruntergeladen heißen sie "1-490---1.JPG" "2-490---2.JPG"
Kann man da noch etwas ändern, so dass die Dateinamen unverändert runtergeladen werden?
LG
Bilder einer Liste mit URL automatisch downloaden
Ich habe das geändert und einige Bilder werden heruntergeladen.
Allerdings kommt nach einiger Zeit eine Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 19, in <module>
urllib.request.urlretrieve(fileurl, os.path.join(bilder_path, filename))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Brittas-iMac:~ brittabaumgart$
Allerdings kommt nach einiger Zeit eine Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 19, in <module>
urllib.request.urlretrieve(fileurl, os.path.join(bilder_path, filename))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Brittas-iMac:~ brittabaumgart$
Ich habe das geändert und einige Bilder werden heruntergeladen.
Allerdings kommt nach einiger Zeit eine Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 19, in <module>
urllib.request.urlretrieve(fileurl, os.path.join(bilder_path, filename))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Brittas-iMac:~ brittabaumgart$
Allerdings kommt nach einiger Zeit eine Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 19, in <module>
urllib.request.urlretrieve(fileurl, os.path.join(bilder_path, filename))
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Brittas-iMac:~ brittabaumgart$
Da wird wahrscheinlich gedrosselt. Ich habe das mal so umgearbeitet, dass jetzt schon geladene Bilder ignoriert werden. Dann kann man das Skript immer wieder starten, bis es durchgelaufen ist. Dazwischen sollte man aber ne Weile warten.
Code: Alles auswählen
import csv
import urllib.request
from urllib.request import urlopen
import cgi
import sys
import os
with open(sys.argv[1]) as csvfile:
bilder_path = sys.argv[2]
reader = csv.reader(csvfile, delimiter=';', quotechar='|')
for row in reader:
fileurl = row[0] #Link zum Herunterladen (Muss http, nicht https)
filepos = row[1] #erstes zweites oder drittes bild
remotefile = urlopen(fileurl)
info = remotefile.info()
filename = fileurl.split("/")[-1]
full_path = os.path.join(bilder_path, filename)
if not os.path.exists(full_path):
urllib.request.urlretrieve(fileurl, full_path)
print("downloaded", filename)
else:
print("already have", filename)
Hallo @__deets__
dankeschön
Ich habe das eben mal geändert.
Jetzt werden immer zwischen 2 und ca 15 Bilder heruntergeladen, dann kommt wieder die Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 16, in <module>
remotefile = urlopen(fileurl)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Kann man da vielleicht noch etwas ändern oder einstellen?
Insgesamt werden es in der Datei nämlich um die 60.000 Bilder sein.....
VlG
dankeschön

Jetzt werden immer zwischen 2 und ca 15 Bilder heruntergeladen, dann kommt wieder die Fehlermeldung:
Traceback (most recent call last):
File "/Users/brittabaumgart/Desktop/Bildersicherung/Tool1.py", line 16, in <module>
remotefile = urlopen(fileurl)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 525, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 1321, in do_open
r = h.getresponse()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 1322, in getresponse
response.begin()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 303, in begin
version, status, reason = self._read_status()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py", line 264, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/socket.py", line 669, in readinto
return self._sock.recv_into(b)
TimeoutError: [Errno 60] Operation timed out
Kann man da vielleicht noch etwas ändern oder einstellen?
Insgesamt werden es in der Datei nämlich um die 60.000 Bilder sein.....
VlG
Ich weiss nicht was da los ist, aber mein Versuch das zu fixen funktioniert nicht, weil die Downloads einfach ewig haengen bleiben. Ich weiss nicht was das ist, aber wir kommen damit dann auch in Bereiche, wo meine Bereitschaft Auftragsentwicklung zu machen ihre Grenze erreicht hat.
Hallo @__deets__
verstehe, kein Problem
Ich habe es momentan mit der vorherigen Version des Scripts recht stabil am laufen. Bei einem notwendigen Neustart lösche ich vorher in der Export-Datei die bereits heruntergeladenen
Datensätze fix raus.....
Bricht jetzt zumindest erst nach immer 500 - 1000 ab
Danke Dir nochmals
Liebe Grüße
verstehe, kein Problem

Datensätze fix raus.....
Bricht jetzt zumindest erst nach immer 500 - 1000 ab
Danke Dir nochmals
Liebe Grüße