Seite 1 von 1

Parsen vom Daten aus Script

Verfasst: Montag 9. Dezember 2019, 14:40
von Jankie
Hey,

ich Versuche Daten von Lieferando zu Crawlen, das hat auch alles funktioniert, bis Lieferando ihren HTML Code geändert hat und die PLZ nicht mehr direkt auslesbar ist. Allerdings habe ich im Code folgende Struktur gefunden:

Musste hier leider ein Bild nehmen, da es sonst zu viele Zeichen sind.

https://www.bilder-upload.eu/bild-fb904 ... 5.jpg.html


Dort sind die Geo-Coordinaten drin aufgeführt, diese hätte ich gerne irgendwie da ausgelesen, ich weiß nur nicht wie. Anfangs habe ich angedacht beim Komma zu splitten.Die Anzahl Variert aber zu stark, hat da jemand eine Idee?
Also am besten wäre ein Dictionary mit allen IDS und den dazugehörigen Geo-Koordinaten.
Die Zuordnung geht über die Restaurant ID, welche ich für jedes Restaurant schon mit im Dict gespeichert habe, aber wie füge ich anhand dieser ID die richtigen Geo-Koordinaten zu dem richtigen Eintrag zu?


Hier mal mein kompletter Code bis jetzt:

Code: Alles auswählen

from bs4 import BeautifulSoup
import requests

HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0"}

def get_cityname_from_postcode(postcode):
    try:
        url = f"https://www.google.com/search?q={postcode}"
        page = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(page.content, "html.parser")
        cityname = soup.find('span',{'class':"LrzXr kno-fv"}).find('a',{'class':'fl'}).get_text()
        return cityname
    except AttributeError:
        return None

def show_results(restaurants_with_information):
    for key in restaurants_with_information.keys():
        print(f"Name:               {key}")
        print(f"Adresse:            {restaurants_with_information[key]['Adress']}")
        print(f"Essen:              {restaurants_with_information[key]['Food']}")
        print(f"Bewertung:          {restaurants_with_information[key]['Rating']}" if restaurants_with_information[key]['Rating'] == "Keine Angaben" else f"Bewertung:          {restaurants_with_information[key]['Rating']}/5")
        print(f"Lieferkosten:       {restaurants_with_information[key]['Delivery cost']}")
        print(f"Bestellwert:        {restaurants_with_information[key]['Min order']}")
        print(f"Geöffnet ab:        {restaurants_with_information[key]['Open time']}")
        print(f"Restaurant ID:      {restaurants_with_information[key]['Restaurant ID']}\n")
    print(f"Es wurden {len(restaurants_with_information)} Restaurants gefunden\n")

def save_results_in_file(restaurants_with_information):
    output_filename = "Lieferando_output.txt"
    with open(output_filename, 'w', encoding="UTF-8") as outputfile:
        for key in restaurants_with_information.keys():
            outputfile.write(f"Name:               {key}\n")
            outputfile.write(f"Adresse:            {restaurants_with_information[key]['Adress']}\n")
            outputfile.write(f"Essen:              {restaurants_with_information[key]['Food']}\n")
            outputfile.write(f"Bewertung:          {restaurants_with_information[key]['Rating']}\n" if restaurants_with_information[key]['Rating'] == "Keine Angaben" else f"Bewertung:          {restaurants_with_information[key]['Rating']}/5\n")
            outputfile.write(f"Lieferkosten:       {restaurants_with_information[key]['Delivery cost']}\n")
            outputfile.write(f"Bestellwert:        {restaurants_with_information[key]['Min order']}\n")
            outputfile.write(f"Geöffnet ab:        {restaurants_with_information[key]['Open time']}\n")
            outputfile.write(f"Restaurant ID:      {restaurants_with_information[key]['Restaurant ID']}\n\n")
    print(f"Ergebnisse wurden in {output_filename} gespeichert.\n")


def fill_dictionary(restaurants):
    restaurants_with_information = {}
    for restaurant in restaurants[:-1]:
        restaurant_name = get_restaurant_name(restaurant)
        restaurant_adress = get_restaurant_adress(restaurant)
        restaurant_food = get_restaurant_food(restaurant)
        restaurant_rating = get_restaurant_rating(restaurant)
        delivery_cost = get_delivery_cost(restaurant)
        minimal_order_value = get_minimal_order_value(restaurant)
        open_time = get_open_time(restaurant)
        restaurant_id = get_restaurant_id(restaurant)
        restaurants_with_information[restaurant_name] = {"Adress":restaurant_adress, "Food":restaurant_food, "Rating":restaurant_rating, "Delivery cost": delivery_cost, "Min order": minimal_order_value, "Open time":open_time, "Restaurant ID":restaurant_id}
    return restaurants_with_information

def get_restaurant_rating(restaurant):
    rating_block = restaurant.find(itemtype='http://schema.org/Rating')
    if rating_block.find(itemprop='reviewCount').get('content') != "0":
        rating = rating_block.find(itemprop='ratingValue').get('content')
    else:
        rating = "Keine Angaben"
    return rating

def get_restaurant_name(restaurant):
    try:
        restaurant_name =  restaurant.find('a', {'class':'restaurantname'}).get_text().strip()
    except AttributeError:
        restaurant_name =  "Fehler"
    return restaurant_name

def get_restaurant_adress(restaurant):
    try:
        restaurant_adress =  restaurant.find('div', {'class':'address'}).get_text().strip()
    except AttributeError:
        restaurant_adress = "Keine Angaben"
    return restaurant_adress

def get_restaurant_food(restaurant):
    try:
        restaurant_food = restaurant.find('div',{'class':'kitchens'}).get_text().strip()
    except AttributeError:
        restaurant_food =  "Keine Angaben"
    return restaurant_food

def get_delivery_cost(restaurant):
    try:
        delivery_cost = restaurant.find('div',{'class':'delivery-cost js-delivery-cost'}).get_text().strip()
    except AttributeError:
        delivery_cost = "Keine Angaben"
    return delivery_cost

def get_minimal_order_value(restaurant):
    try:
        minimal_order_value = restaurant.find('div',{'class':'min-order'}).get_text().strip()
    except AttributeError:
        minimal_order_value = "Keine Angaben"
    return minimal_order_value

def get_open_time(restaurant):
    try:
        open_time =  restaurant.find('div',{'class':'avgdeliverytime avgdeliverytimefull open'}).get_text().strip()
    except AttributeError:
        open_time = "Keine Angaben"
    return open_time

def get_restaurant_id(restaurant):
    try:
        restaurant_id = restaurant.get("id").replace("irestaurant", "")
    except AttributeError:
        restaurant_id = "Keine Angaben"
    return restaurant_id

def make_urls(postcode, cityname):
    return [f"https://www.lieferando.de/lieferservice-{cityname}-{postcode}", f"https://www.lieferando.de/lieferservice-{postcode}"]

def cityname_without_umlaut(cityname):
    chars = {'ö':'oe','ä':'ae','ü':'ue'}
    if cityname != None:
        if any(char in cityname for char in chars.keys()):
            for char in chars:
                cityname = cityname.replace(char,chars[char])
        return cityname

def get_restaurantlist(url):
    page = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(page.content, "html.parser")
    all_restaurant_infoboxes = soup.find('div',id = 'irestaurantlist')
    return all_restaurant_infoboxes


def main():
    postcode = input("Bitte gib deine Postleitzahl ein: ")
    print("Bitte warte einige Sekunden...\n")
    cityname = cityname_without_umlaut(get_cityname_from_postcode(postcode))
    url = make_urls(postcode, cityname)[1]
    all_restaurant_infoboxes = get_restaurantlist(url)
    if all_restaurant_infoboxes == None:
        url = make_urls(postcode, cityname)[0]
        all_restaurant_infoboxes = get_restaurantlist(url)
    restaurants = all_restaurant_infoboxes.find_all(itemtype="http://schema.org/Restaurant")
    result = fill_dictionary(restaurants)
    show_results(result)
    save_results = True if input("Willst du die Ergebnisse in einer Textdatei speichern? (Y/N): ") == "Y" else False
    if save_results:
        save_results_in_file(result)
    print("Programmende")


if __name__ == "__main__":
    main()

Re: Parsen vom Daten aus Script

Verfasst: Montag 9. Dezember 2019, 18:46
von Sirius3
Das mit dem Bild ist kein Argument, hier gibt es keine Längenbeschränkung, zumal Dein Python-Code länger ist.
Das was Du da zeigst, ist Javascript-Code, das kann man mit ein bißchen zuschneiden mit ›ast.literal_eval‹ parsen.

In `get_cityname_from_postcode` solltest Du statt den AttributeError abzufangen, abfragen, ob `find` None liefert. In `show_results` und `save_results_in_file` solltest Du statt `.key()` `.values()` benutzen, dann werden die Format-Strings auch deutlich einfacher.

Die ganzen get_xxx-Funktion sind fast identisch und könnten in (fast) einer zusammengefasst werden, dann mußt Du auch nicht an so vielen Stellen das mit dem AttributeError reparieren.

`cityname_without_umlaut` ist fehlerhaft. Da stimmt die Einrückung nicht. Das `if any...` ist überflüssig. Und wann benutzt Du `.keys()` und wann nicht?

Die Zeile `save_results = True if input("Willst du die Ergebnisse in einer Textdatei speichern? (Y/N): ") == "Y" else False` läßt sich auch deutlich einfacher schreiben. Was liefert denn die if-Bedingung?

Re: Parsen vom Daten aus Script

Verfasst: Mittwoch 11. Dezember 2019, 11:45
von Jankie
Hey, erst mal danke für das Feedback.

Beim Thema erstellen kam aber eine Meldung, dass nur XXXXX Zeichen erlaubt sind, vielleicht ist bei den Beiträgen keine Limitierung.

Bei der Funktion weiß ich nicht genau was ich machen soll, da der AttributeError kommt, wenn er die Class "LrzXr kno-fv" nicht findet und darin keine Class "fl" findet.

Code: Alles auswählen

def get_cityname_from_postcode(postcode):
    try:
        url = f"https://www.google.com/search?q={postcode}"
        page = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(page.content, "html.parser")
        cityname = soup.find('span',{'class':"LrzXr kno-fv"}).find('a',{'class':'fl'}).get_text()
        return cityname
    except AttributeError:
        return None
mit ast.literal_eval() komm ich nicht ganz klar, da ich nicht weiß wie ich die Daten vorher anpassen muss.

Hier noch mal der komplette Ausschnitt, bei dem ich die Geo-Koordinaten parsen will.

Code: Alles auswählen

  var restaurants = [
      ['P0PP3P', [1, 2, 4], [21, 71, 271], 1, 'Zum Rana','', 0,9,20,9.76,30,0,1,0,0,1,49.5064761,10.8875973,2,45,[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577228400,"endtime":1577228400},{"date":"2019-12-26","starttime":1577364300,"endtime":1577395800},{"date":"2019-12-31","starttime":1577786400,"endtime":1577826000},{"date":"2020-01-01","starttime":1577872800,"endtime":1577914200},{"date":"2020-01-06","starttime":1578314700,"endtime":1578346200}],[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577228400,"endtime":1577228400},{"date":"2019-12-26","starttime":1577364300,"endtime":1577395800},{"date":"2019-12-31","starttime":1577786400,"endtime":1577826000},{"date":"2020-01-01","starttime":1577872800,"endtime":1577914200},{"date":"2020-01-06","starttime":1578314700,"endtime":1578346200}],43.518518518519,false,false,0,1,[9, 389],{"name":"Zum Rana","url":"\/zum-rana","branch":"","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/P0PP3P\/logo_465x320.png","categories":"Italienisch, Indisch, Italienische Pizza"},0,0,0,false],
['PN3R1R', [1, 4, 5], [61, 221, 271], 1, 'Pizza Service Neustadt','', 0,8,8,10.22,30,1,1,0,0,0,49.57973,10.61047,2,65,[[{"starttime":"11:30:00","endtime":"21:55:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"00:00:00","endtime":"00:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577284200,"endtime":1577307600},{"date":"2019-12-26","starttime":1577356200,"endtime":1577394000},{"date":"2019-12-31","starttime":1577746800,"endtime":1577746800},{"date":"2020-01-01","starttime":1577874600,"endtime":1577912400},{"date":"2020-01-06","starttime":1578306600,"endtime":1578344400}],[[{"starttime":"11:30:00","endtime":"21:55:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"00:00:00","endtime":"00:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:30:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577284200,"endtime":1577307600},{"date":"2019-12-26","starttime":1577356200,"endtime":1577394000},{"date":"2019-12-31","starttime":1577746800,"endtime":1577746800},{"date":"2020-01-01","starttime":1577874600,"endtime":1577912400},{"date":"2020-01-06","starttime":1578306600,"endtime":1578344400}],29.907407407407,false,false,0,1,[8, 495],{"name":"Pizza Service Neustadt","url":"\/pizza-service-neustadt","branch":"","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/PN3R1R\/logo_465x320.png","categories":"Thail\u00e4ndisch, Mexikanisch, Italienische Pizza"},0,0,0,false],
['NOQN1NN', [1, 2, 5], [21, 201, 271], 1, 'Zum Pizza Hannes Kegelbahn Tuchenbach','', 0,8,9,9.16,30,2,1,0,0,1,49.52807,10.86215,2,40,[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577228400,"endtime":1577228400},{"date":"2019-12-26","starttime":1577364300,"endtime":1577395800},{"date":"2019-12-31","starttime":1577796300,"endtime":1577827800},{"date":"2020-01-01","starttime":1577882700,"endtime":1577914200},{"date":"2020-01-06","starttime":1578314700,"endtime":1578346200}],[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577228400,"endtime":1577228400},{"date":"2019-12-26","starttime":1577364300,"endtime":1577395800},{"date":"2019-12-31","starttime":1577796300,"endtime":1577827800},{"date":"2020-01-01","starttime":1577882700,"endtime":1577914200},{"date":"2020-01-06","starttime":1578314700,"endtime":1578346200}],30.833333333333,false,false,0,1,[8, 107],{"name":"Zum Pizza Hannes Kegelbahn Tuchenbach","url":"\/zum-pizza-hannes-kegelbahn-tuchenbach-birkenstr","branch":"","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/NOQN1NN\/logo_465x320.png","categories":"Italienisch, Snacks, Italienische Pizza"},0,0,0,false],
['5ROQRQN', [2, 5], [21, 221, 441], 1, 'Pizza Service','Untere Ringstrasse', 0,8,25,11.69,25,3,2,0,2,1,49.4948717,10.7977354,2,45,[[{"starttime":"11:30:00","endtime":"13:45:00"},{"starttime":"15:30:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}]],false,[[{"starttime":"11:30:00","endtime":"13:45:00"},{"starttime":"15:30:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"16:00:00","endtime":"22:00:00"}]],false,45.648148148148,false,false,0,1,[8, 963],{"name":"Pizza Service","url":"\/pizza-service-untere-ringstrasse-untereringstrasse","branch":"Untere Ringstrasse","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/5ROQRQN\/logo_465x320.png","categories":"Italienisch, Mexikanisch, Deutsche Gerichte"},0,0,0,false],
['5O5QN03', [1, 4, 5], [71, 231, 271], 1, 'Pizza Roma','Tuchenbach', 0,8,5,9.80,30,4,1,0,0,1,49.52807,10.86215,2,50,[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],false,[[{"starttime":"14:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"22:45:00"}],[{"starttime":"14:00:00","endtime":"22:45:00"}]],false,27.12962962963,false,false,0,1,[8, 68],{"name":"Pizza Roma","url":"\/pizza-roma-tuchenbach","branch":"Tuchenbach","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/5O5QN03\/logo_465x320.png","categories":"Indisch, H\u00e4hnchen, Italienische Pizza"},2,2,2,false],
['N055Q3O', [1, 2, 4], [21, 71, 271], 1, 'Bombay Tandoori','Veitsbronn', 0,9,19,10.18,50,5,2,0,1,1,49.50805,10.88041,2,45,[[{"starttime":"11:30:00","endtime":"21:30:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:30:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"21:00:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:30:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"22:00:00"}],[{"starttime":"11:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"22:00:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577282400,"endtime":1577304000},{"date":"2020-01-01","starttime":1577876400,"endtime":1577908800}],[[{"starttime":"11:30:00","endtime":"22:00:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:00:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:30:00"}],[{"starttime":"17:00:00","endtime":"21:00:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:00:00"}],[{"starttime":"10:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"21:30:00"}],[{"starttime":"11:00:00","endtime":"14:00:00"},{"starttime":"16:00:00","endtime":"22:00:00"}]],false,42.592592592593,false,false,0,1,[9, 1271],{"name":"Bombay Tandoori","url":"\/bombay-tandoori-fuerther-strasse","branch":"Veitsbronn","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/N055Q3O\/logo_465x320.png","categories":"Italienisch, Indisch, Italienische Pizza"},1,1,1,false],
['O3R3RR5', [1, 2, 3], [271, 821, 1614], 1, 'Deniz Döner & Pizza','Herzo', 0,8,1,11.40,35,6,1,0,0,1,49.5704598,10.8777599,2,50,[[{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"17:00:00","endtime":"20:30:00"}]],false,[[{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"11:00:00","endtime":"13:30:00"},{"starttime":"17:00:00","endtime":"20:30:00"}],[{"starttime":"17:00:00","endtime":"20:30:00"}]],false,23.425925925926,false,false,0,1,[8, 52],{"name":"Deniz D\u00f6ner & Pizza","url":"\/deniz-doener-pizzeria","branch":"Herzo","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/O3R3RR5\/logo_465x320.png","categories":"Italienische Pizza, D\u00f6ner, Pasta"},0,0,0,false],
['OO571N01', [1, 2, 5], [271, 1614, 1654], 1, 'Enzo\'s Pizza','', 0,9,1,8.10,21,7,2,0,0,1,49.4937769,10.7935631,2,55,[[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"00:00:00","endtime":"00:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}]],false,[[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"00:00:00","endtime":"00:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}],[{"starttime":"17:00:00","endtime":"22:00:00"}]],false,25.925925925926,false,false,0,1,[9, 18],{"name":"Enzo's Pizza","url":"\/enzos-pizza","branch":"","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/OO571N01\/logo_465x320.png","categories":"Italienische Pizza, Pasta, Schnitzel"},0,0,0,false],
['030OPP11', [1, 2, 5], [21, 271, 1456], 1, 'Restaurant Pizzeria Roma','Oberschweinach', 0,8,27,13.33,19,8,1,0,0,1,49.55581,10.6143,2,55,[[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"23:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"23:00:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577266200,"endtime":1577278800},{"date":"2019-12-25","starttime":1577287800,"endtime":1577307600},{"date":"2019-12-26","starttime":1577352600,"endtime":1577365200},{"date":"2019-12-26","starttime":1577374200,"endtime":1577394000},{"date":"2019-12-31","starttime":1577746800,"endtime":1577746800},{"date":"2020-01-01","starttime":1577871000,"endtime":1577883600},{"date":"2020-01-01","starttime":1577892600,"endtime":1577912400},{"date":"2020-01-06","starttime":1578303000,"endtime":1578315600},{"date":"2020-01-06","starttime":1578324600,"endtime":1578344400}],[[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"22:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"23:00:00"}],[{"starttime":"10:30:00","endtime":"14:00:00"},{"starttime":"16:30:00","endtime":"23:00:00"}]],[{"date":"2019-12-24","starttime":1577142000,"endtime":1577142000},{"date":"2019-12-25","starttime":1577266200,"endtime":1577278800},{"date":"2019-12-25","starttime":1577287800,"endtime":1577307600},{"date":"2019-12-26","starttime":1577352600,"endtime":1577365200},{"date":"2019-12-26","starttime":1577374200,"endtime":1577394000},{"date":"2019-12-31","starttime":1577746800,"endtime":1577746800},{"date":"2020-01-01","starttime":1577871000,"endtime":1577883600},{"date":"2020-01-01","starttime":1577892600,"endtime":1577912400},{"date":"2020-01-06","starttime":1578303000,"endtime":1578315600},{"date":"2020-01-06","starttime":1578324600,"endtime":1578344400}],47.5,false,false,0,1,[8, 40],{"name":"Restaurant Pizzeria Roma","url":"\/restaurant-colosseum-pizzeria","branch":"Oberschweinach","logo":"\/\/static.lieferando.de\/images\/restaurants\/de\/030OPP11\/logo_465x320.png","categories":"Italienisch, Italienische Pizza, Salate"},0,0,0,false]
  ];

  


und hier der aktuellste Stand meines Codes:

Code: Alles auswählen

from bs4 import BeautifulSoup
import requests

HEADERS = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0"}

def get_cityname_from_postcode(postcode):
    url = f"https://www.google.com/search?q={postcode}"
    page = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(page.content, "html.parser")
    cityname = soup.find('a',{'class':'fl'}).get_text()
    return cityname
    
def get_restaurant_rating(restaurant):
    rating_block = restaurant.find(itemtype='http://schema.org/Rating')
    if rating_block.find(itemprop='reviewCount').get('content') != "0":
        rating = rating_block.find(itemprop='ratingValue').get('content')
    else:
        rating = "Keine Bewertungen"
    return rating

def get_restaurant_id(restaurant):
    try:
        restaurant_id = restaurant.get("id").replace("irestaurant", "")
    except AttributeError:
        restaurant_id = "Keine Angaben"
    return restaurant_id
    
def get_text_from_class(restaurant, containertyp, classname):
    try:
        text =  restaurant.find(containertyp,{'class':classname}).get_text().strip()
    except AttributeError:
        text = "Keine Angaben"
    return text

def show_results(restaurants_with_information):
    for value in restaurants_with_information.values():
        print(f"Name:               {value['Restaurantname']}")
        print(f"Adresse:            {value['Adress']}")
        print(f"Essen:              {value['Food']}")
        print(f"Bewertung:          {value['Rating']}" if value['Rating'] == "Keine Angaben" else f"Bewertung:          {value['Rating']}/5")
        print(f"Lieferkosten:       {value['Delivery cost']}")
        print(f"Bestellwert:        {value['Min order']}")
        print(f"Geöffnet ab:        {value['Open time']}")
        print(f"Restaurant ID:      {value['Restaurant ID']}\n")
    print(f"Es wurden {len(restaurants_with_information)} Restaurants gefunden\n")

def save_results_in_file(restaurants_with_information):
    output_filename = "Lieferando_output.txt"
    with open(output_filename, 'w', encoding="UTF-8") as outputfile:
        for value in restaurants_with_information.values():
            outputfile.write(f"Name:               {value['Restaurantname']}\n")
            outputfile.write(f"Adresse:            {value['Adress']}\n")
            outputfile.write(f"Essen:              {value['Food']}\n")
            outputfile.write(f"Bewertung:          {value['Rating']}\n" if value['Rating'] == "Keine Bewertungen" else f"Bewertung:          {value['Rating']}/5\n")
            outputfile.write(f"Lieferkosten:       {value['Delivery cost']}\n")
            outputfile.write(f"Bestellwert:        {value['Min order']}\n")
            outputfile.write(f"Geöffnet ab:        {value['Open time']}\n")
            outputfile.write(f"Restaurant ID:      {value['Restaurant ID']}\n\n")
    print(f"Ergebnisse wurden in {output_filename} gespeichert.\n")

def fill_dictionary(restaurants):
    restaurants_with_information = {}
    for restaurant in restaurants[:-1]:
        restaurant_name = get_text_from_class(restaurant, "a", "restaurantname")
        restaurant_adress = get_text_from_class(restaurant, "div", "address")
        restaurant_food = get_text_from_class(restaurant, "div", "kitchens")
        restaurant_rating = get_restaurant_rating(restaurant)
        delivery_cost = get_text_from_class(restaurant, "div", "delivery-cost js-delivery-cost")
        minimal_order_value = get_text_from_class(restaurant, "div", "min-order")
        open_time = get_text_from_class(restaurant, "div", "avgdeliverytime avgdeliverytimefull open")
        restaurant_id = get_restaurant_id(restaurant)
        restaurants_with_information[restaurant_id] = {"Restaurantname": restaurant_name, "Adress":restaurant_adress, "Food":restaurant_food, "Rating":restaurant_rating, "Delivery cost": delivery_cost, "Min order": minimal_order_value, "Open time":open_time, "Restaurant ID":restaurant_id}
    return restaurants_with_information

def make_urls(postcode, cityname):
    return [f"https://www.lieferando.de/lieferservice-{cityname}-{postcode}", f"https://www.lieferando.de/lieferservice-{postcode}"]

def cityname_without_umlaut(cityname):
    chars = {'ö':'oe','ä':'ae','ü':'ue'}
    if cityname != None:
        for char in chars:
            cityname = cityname.replace(char,chars[char])
    return cityname

def get_restaurantlist(url):
    page = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(page.content, "html.parser")
    all_restaurant_infoboxes = soup.find('div',id = 'irestaurantlist')
    return all_restaurant_infoboxes

def main():
    postcode = input("Bitte gib deine Postleitzahl ein: ")
    print("Bitte warte einige Sekunden...\n")
    cityname = cityname_without_umlaut(get_cityname_from_postcode(postcode))
    url = make_urls(postcode, cityname)[1]
    all_restaurant_infoboxes = get_restaurantlist(url)
    if all_restaurant_infoboxes == None:
        url = make_urls(postcode, cityname)[0]
        all_restaurant_infoboxes = get_restaurantlist(url)
    restaurants = all_restaurant_infoboxes.find_all(itemtype="http://schema.org/Restaurant")
    result = fill_dictionary(restaurants)
    show_results(result)
    if input("Willst du die Ergebnisse in einer Textdatei speichern? (Y/N): ") == "Y":
        save_results_in_file(result)
    print("Programmende")

if __name__ == "__main__":
    main()

Re: Parsen vom Daten aus Script

Verfasst: Donnerstag 4. Februar 2021, 18:35
von jasonD
Hallo euch allen hat jemand dieses Problem hier gelöst. Oder kann mir damit helfen?

Edit: Kleiner Nachtrag, soll auch nicht für umsonst sein

Re: Parsen vom Daten aus Script

Verfasst: Dienstag 9. Februar 2021, 08:00
von Jankie
Welches Problem genau? Mit dem Parsen der Daten für die Geocoordinaten habe ich mich nicht mehr beschäftigt, da ist dann einfach meine Motivation flöten gegangen. Habe gerade auch nochmal das Skript getestet und wie erwartet funktioniert es nicht. Da hat Liferando wohl wieder was an seinem Quelltext geändert. Wobei brauchst du denn konkret Hilfe?