Hilfe bei Script

Code-Stücke können hier veröffentlicht werden.
Antworten
nexuz89
User
Beiträge: 18
Registriert: Montag 27. Juli 2020, 10:59

Sonntag 2. August 2020, 10:04

Hallo zusammen ich benötige Hilfe von euch bzgl einer Python Script.
Ich möchte von einer online Apotheke (Docmorris) bestimmt Daten snippen.
Leider bin ich nicht sehr geübt im Codes schreiben.

Das ist mein Python Code

Code: Alles auswählen

import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
from time import sleep
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys


def connect():
    try:
        requests.get('http://google.com') #Python 3.x
        return True
    except:
        return False
def gernate_file():
    headers_text = []
    headers_text.append('Article Number')
    headers_text.append('Title')
    headers_text.append('Amount')
    headers_text.append('Weight')
    headers_text.append('Price')
    headers_text.append('Categorie')
    headers_text.append('Link')
    headers_text.append('Image Url')
    headers_text.append('Article Number')

    df = pd.DataFrame([], columns=headers_text)
    df.to_csv('Results.csv', index=False, encoding='utf-8-sig')
def hasNumbers(inputString):
    return any(char.isdigit() for char in inputString)
# url='https://www.docmorris.de/optifast-drink-kaffee/10267833'
# url='https://www.docmorris.de/orlistat-hexal-60-mg/08982497'
def profileScraper(url,index):
    while(not connect()):
        print('no internet')
        sleep(5)
    res= requests.get(url)
    driver = webdriver.Chrome(ChromeDriverManager().install())
    driver.get(url)
    soup=BeautifulSoup(res.content,features='html.parser')
    
    
    title=soup.find('h1',class_='name')
    if title is not None:
        title=title.text.strip()
    else:
        return 0
    print(title)
    
    
    
    
    Weight_3=soup.find('div',class_='hidden-md')
    if Weight_3 is not None:
        Weight_2=Weight_3.find('ul',class_='usps checklist')
        if Weight_2 is not None:
            Weight_1=Weight_2.find('table',class_='product-detail-table')
            if Weight_1 is not None:
                Weight=Weight_1.find('table',class_='product-detail-table')
                if Weight is not None:
        Weight=Weight.text.strip()
    else:
        Weight='N/A'
    print(Weight)
    # piece=soup.find('span',class_='pricefield__footer')
    # print(piece.text)
    
    
    
    Price=soup.find('mark',class_='rs-qa-price')
    if Price is not None:
        Price = Price.text.strip() 
    else:
        Price='N/A'
    print(Price)
    
    
    
    Categories=soup.find_all('div',class_='lr-arrow-right')
    if len(Categories) >0:
        # print(Categories[2].text.strip())
        Categories = Categories[0].text.strip()
    else:
        # print(Categories[-2].text.strip())
        Categories = Categories[-2].text.strip()
    #print(url)
    
    
    amount=title.split('Stück')[0]
    amountSplit=title.split('Stück')[0].split(' ')
    for split in amountSplit:
        if hasNumbers(split):
            amount=split
    if not hasNumbers(amount):
        
        
        amount=soup.find('div',class_='pdr-Grammage')
        if amount is not None:
            print(amount.text.split(':')[1].split('x')[0].strip())
            amount=amount.text.split(':')[1].split('x')[0].strip()
        else:
            amount='1 Pack'
    print(amount)
    

    sleep(2)
    img_url_text = 'N/D'
    try:
        img_url_text = driver.find_element_by_xpath("//picture[@class ='pdr-product-details-image-link']").get_attribute('srcset')
        driver.close()
    finally:
        print(img_url_text)

    
    art_tags=article_no_tags.find('span',class_='pdr-CallToAction__articleNumber')
    if art_tags is not None:
        article_no=art_tags.text.strip()
    print(article_no)
    
    return[index,title,'`'+amount,Weight,Price,Categories,url,img_url_text,article_no]

with open('rewe.csv', 'r', encoding='utf-8') as readFile:
    reader = csv.reader(readFile)
    file_lines = list(reader)

# print(file_lines[1][6])
gernate_file()
for index, row in enumerate(file_lines[1:]):
    print(index)
    # print(row[6])
    record=[]
    record =  profileScraper(row[6],index+1)
    df = pd.DataFrame([record])
    df.to_csv('Results.csv', index=False, mode='a',  encoding='utf-8-sig', header=False)
    # break
print()
# print(record)

Ich möchte gerne die Daten wie oben im Code geschrieben snippen. (https://www.docmorris.de/optifast-drink-kaffee/10267833)

kommen wir zu meinem Problem.

leider ist der html Code sehr verkapselt und ich weis nicht wie ich es ansprechen soll um die Ausgabe zu erzeugen.

evtl kann mir helfen wie ich die Packungsgrösse ansprechen kann

Code: Alles auswählen

    Weight_3=soup.find('div',class_='hidden-md')
    if Weight_3 is not None:
        Weight_2=Weight_3.find('ul',class_='usps checklist')
        if Weight_2 is not None:
            Weight_1=Weight_2.find('table',class_='product-detail-table')
            if Weight_1 is not None:
                Weight=Weight_1.find('table',class_='product-detail-table')
                if Weight is not None:
        Weight=Weight.text.strip()
    else:
        Weight='N/A'
    print(Weight)
Vielen dank schon mal für eure hilfe

lg Marcel
Antworten