vielen dank wie bekomme ich das in mein Code eingebaut, mit deinem Vorschlag funktioniert das leider nicht. !?
Code: Alles auswählen
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
from time import sleep
def connect():
try:
requests.get('http://google.com') #Python 3.x
return True
except:
return False
def gernate_file():
headers_text = []
headers_text.append('Article Number')
headers_text.append('Title')
headers_text.append('menge')
headers_text.append('hersteller')
headers_text.append('Price')
headers_text.append('Categorie')
headers_text.append('Link')
headers_text.append('Image Url')
headers_text.append('Article Number')
df = pd.DataFrame([], columns=headers_text)
df.to_csv('Results.csv', index=False, encoding='utf-8-sig')
def hasNumbers(inputString):
return any(char.isdigit() for char in inputString)
# url='https://produkte.migros.ch/milch-extra-12x100g'
# url='https://produkte.migros.ch/les-adorables-marc-de-champagne'
def profileScraper(url,index):
while(not connect()):
print('no internet')
sleep(5)
res= requests.get(url)
soup=BeautifulSoup(res.content,features='html.parser')
title=soup.find('span',class_='hidden-xs')
if title is not None:
title=title.text.strip()
else:
return 0
print(title)
Hersteller=soup.find('div',class_='col-xs-8')
if Hersteller is not None:
Hersteller=Hersteller.text.strip()
else:
Hersteller='N/A'
print(Hersteller)
Price=soup.find('span',class_='')
if Price is not None:
Price = Price.text.strip()
else:
Price='N/A'
print(Price)
Categories=soup.find_all('a',class_='overflow-hidden')
if len(Categories) >0:
# print(Categories[2].text.strip())
Categories = Categories[0].text.strip()
else:
# print(Categories[-2].text.strip())
Categories = Categories[-2].text.strip()
print(url)
menge=soup.find_next('div',class_='col-xs-8')
if menge is not None:
menge=menge.text.strip()
else:
menge='N/A'
print(menge)
img_tag1=soup.find('div',class_='details_image')
if img_tag1 is not None:
img_tag = img_tag1.find('img',class_='img-responsive')
if img_tag is not None:
img_url=img_tag['src']
img_url_text=img_url.replace('//','')
print(img_url_text)
article_no = soup.find('div',class_='col-xs-8')
if article_no is not None:
article_no=article_no.text.strip()
print(article_no)
return[index,title,'`'+Hersteller,menge,Price,Categories,url,img_url_text,article_no]
#Migrolist.csv
with open('Ikea.csv', 'r', encoding='utf-8') as readFile:
reader = csv.reader(readFile)
file_lines = list(reader)
# print(file_lines[1][8])
gernate_file()
for index, row in enumerate(file_lines[1:]):
print(index)
# print(row[7])
record=[]
record = profileScraper(row[6],index+1)
df = pd.DataFrame([record])
df.to_csv('Ikea1.csv', index=False, mode='a', encoding='utf-8-sig', header=False)
# break
print()
print()
# print(record)