Dazu noch, wäre es nich sinnvoller, wenn ich folgendes Programm in einer Klasse zusammenfasse?
Code: Alles auswählen
#-*- coding: utf-8 -*-
import requests
from bs4 import BeautifulSoup
import time
import re
start_time = time.time()
def soup(link):
data = requests.get(link).content
datasoup = BeautifulSoup(data, "html5lib")
onesite = datasoup.find('a', {'class': 'article-toc__onesie'})
if onesite != None:
data = requests.get(f"{link}/komplettansicht").content
datasoup = BeautifulSoup(data, "html5lib")
return datasoup
def artikel(datasoup):
artikelraw = []
text = datasoup.findAll( "p", { "class": "paragraph article__item" })
for row in text:
artikelraw.append(row.text)
artikeltext = "".join(artikelraw)
return(artikeltext)
def autor(datasoup):
text = datasoup.find( "a", { "rel" : "author" })
if text == None:
autor= "N/A"
else:
autor = text.text.strip()
return autor
def datum(datasoup):
text = datasoup.findAll("time")
datum = []
for row in text:
datum.append(row.text)
return " ".join(datum)
def agents(datasoup):
text = datasoup.find("span", {"class": "metadata__source"})
if text != None:
agent = text.text
else:
agent = "None"
return agent
def kommentare(datasoup):
text = datasoup.find("a", {"class": "metadata__commentcount js-scroll"})
kommentare = []
if text != None:
kommentare = text.text
kommentare = re.findall('\d+',kommentare)
else:
kommentare.append("FEHLER")
return kommentare[0]
def keywords(datasoup):
text = datasoup.find("meta", {"name": "keywords"})
return text["content"]
def beschreibung(datasoup):
text = datasoup.find("meta", {"name": "description"})
return text["content"]
def title(datasoup):
text = datasoup.find(property = "og:title").get("content")
return text
datasoup = soup("https://www.zeit.de/wissen/2018-07/selbsterkenntnis-psychologie-forschung")
artikel = artikel(datasoup)
autor = autor(datasoup)
datum = datum(datasoup)
agent = agents(datasoup)
kommentare = kommentare(datasoup)
keywords = keywords(datasoup)
beschreibung = beschreibung(datasoup)
title = title(datasoup)
print(f"""
Titel: {title}
Autor: {autor}
Datum: {datum}
Kommentare: {kommentare}
Agentur: {agent}
Beschreibung: {beschreibung}
Artikel: {artikel}
Keywords: {keywords}
""")
print(time.time() - start_time)