Ich möchte von einer online Apotheke (Docmorris) bestimmt Daten snippen.
Leider bin ich nicht sehr geübt im Codes schreiben.
Das ist mein Python Code
Code: Alles auswählen
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
from time import sleep
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
def connect():
try:
requests.get('http://google.com') #Python 3.x
return True
except:
return False
def gernate_file():
headers_text = []
headers_text.append('Article Number')
headers_text.append('Title')
headers_text.append('Amount')
headers_text.append('Weight')
headers_text.append('Price')
headers_text.append('Categorie')
headers_text.append('Link')
headers_text.append('Image Url')
headers_text.append('Article Number')
df = pd.DataFrame([], columns=headers_text)
df.to_csv('Results.csv', index=False, encoding='utf-8-sig')
def hasNumbers(inputString):
return any(char.isdigit() for char in inputString)
# url='https://www.docmorris.de/optifast-drink-kaffee/10267833'
# url='https://www.docmorris.de/orlistat-hexal-60-mg/08982497'
def profileScraper(url,index):
while(not connect()):
print('no internet')
sleep(5)
res= requests.get(url)
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)
soup=BeautifulSoup(res.content,features='html.parser')
title=soup.find('h1',class_='name')
if title is not None:
title=title.text.strip()
else:
return 0
print(title)
Weight_3=soup.find('div',class_='hidden-md')
if Weight_3 is not None:
Weight_2=Weight_3.find('ul',class_='usps checklist')
if Weight_2 is not None:
Weight_1=Weight_2.find('table',class_='product-detail-table')
if Weight_1 is not None:
Weight=Weight_1.find('table',class_='product-detail-table')
if Weight is not None:
Weight=Weight.text.strip()
else:
Weight='N/A'
print(Weight)
# piece=soup.find('span',class_='pricefield__footer')
# print(piece.text)
Price=soup.find('mark',class_='rs-qa-price')
if Price is not None:
Price = Price.text.strip()
else:
Price='N/A'
print(Price)
Categories=soup.find_all('div',class_='lr-arrow-right')
if len(Categories) >0:
# print(Categories[2].text.strip())
Categories = Categories[0].text.strip()
else:
# print(Categories[-2].text.strip())
Categories = Categories[-2].text.strip()
#print(url)
amount=title.split('Stück')[0]
amountSplit=title.split('Stück')[0].split(' ')
for split in amountSplit:
if hasNumbers(split):
amount=split
if not hasNumbers(amount):
amount=soup.find('div',class_='pdr-Grammage')
if amount is not None:
print(amount.text.split(':')[1].split('x')[0].strip())
amount=amount.text.split(':')[1].split('x')[0].strip()
else:
amount='1 Pack'
print(amount)
sleep(2)
img_url_text = 'N/D'
try:
img_url_text = driver.find_element_by_xpath("//picture[@class ='pdr-product-details-image-link']").get_attribute('srcset')
driver.close()
finally:
print(img_url_text)
art_tags=article_no_tags.find('span',class_='pdr-CallToAction__articleNumber')
if art_tags is not None:
article_no=art_tags.text.strip()
print(article_no)
return[index,title,'`'+amount,Weight,Price,Categories,url,img_url_text,article_no]
with open('rewe.csv', 'r', encoding='utf-8') as readFile:
reader = csv.reader(readFile)
file_lines = list(reader)
# print(file_lines[1][6])
gernate_file()
for index, row in enumerate(file_lines[1:]):
print(index)
# print(row[6])
record=[]
record = profileScraper(row[6],index+1)
df = pd.DataFrame([record])
df.to_csv('Results.csv', index=False, mode='a', encoding='utf-8-sig', header=False)
# break
print()
# print(record)
Ich möchte gerne die Daten wie oben im Code geschrieben snippen. (https://www.docmorris.de/optifast-drink-kaffee/10267833)
kommen wir zu meinem Problem.
leider ist der html Code sehr verkapselt und ich weis nicht wie ich es ansprechen soll um die Ausgabe zu erzeugen.
evtl kann mir helfen wie ich die Packungsgrösse ansprechen kann
Code: Alles auswählen
Weight_3=soup.find('div',class_='hidden-md')
if Weight_3 is not None:
Weight_2=Weight_3.find('ul',class_='usps checklist')
if Weight_2 is not None:
Weight_1=Weight_2.find('table',class_='product-detail-table')
if Weight_1 is not None:
Weight=Weight_1.find('table',class_='product-detail-table')
if Weight is not None:
Weight=Weight.text.strip()
else:
Weight='N/A'
print(Weight)
lg Marcel