ich hab eine Klasse geschrieben die Songtexte bei http://www.leoslyrics.com/ sucht und diese dann ausgibt.
Code: Alles auswählen
import urllib
import re
class Lyrics:
def __init__(self):
self.list_reg = re.compile(r"<td>[.|\s]*<font .*>[.|\s]*<a href=\".*\">(.*)</a>[.|\s]*</font>[.|\s]*</td>[.|\s]*<td>[.|\s]*<font .*>[.|\s]*<a href=\"(.*)\"><b>(.*)</b></a>[.|\s]*</font>[.|\s]*</td>")
self.result_reg = re.compile(r"<p>[.|\s]*Found <b>(\d*)</b> results\.[.|\s]*</p>")
self.sid_reg = re.compile(";jsessionid=.*\\?")
def search(self, artist = "", song = "", album = ""):
return_list = []
self.url_list = []
artist = urllib.quote(artist)
song = urllib.quote(song)
album = urllib.quote(album)
url = "http://www.leoslyrics.com/advanced.php?artistmode=0&artist="+artist+"&albummode=0&album="+album+"&songmode=0&song="+song+"&mode=0"
page = urllib.urlopen(url)
page = page.read()
results = int(re.findall(self.result_reg, page)[0])
if results%40 == 0:
pages = results/40
else:
pages = results/40+1
self.big_list = re.findall(self.list_reg, page)
n = 2
while n < pages:
page = urllib.urlopen(url+"&page="+str(n))
page = page.read()
self.big_list.append(re.findall(self.list_reg, page))
n = n+1
for n in self.big_list:
return_list.append((n[0], n[2]))
self.url_list.append("http://www.leoslyrics.com"+re.sub(self.sid_reg, "?", n[1]))
return_list = tuple(return_list)
self.url_list = tuple(self.url_list)
return return_list
def get_text(self, number):
page = urllib.urlopen(self.url_list[number]).read()
page = page.replace('\r\n', '\r')
page = page.replace('\r\n', '\r')
new = []
for zeile in page.split("\r"):
zeile = zeile.strip()
if not zeile == "":
new.append(zeile.strip())
lyric = []
first = True
for n in range(0, len(new)):
if new[n].startswith("<br />"):
if first:
lyric.append(self.__finish__(new[n-1]))
lyric.append(self.__finish__(new[n]))
first = False
else:
lyric.append(self.__finish__(new[n]))
return lyric
def __finish__(self, string):
string = string.replace("\r", "")
string = string.replace("'", "'")
string = string.replace(""", "\"")
string = string.replace("<br />", "")
return string
Code: Alles auswählen
suche = Lyrics()
ergebnis = suche.search(artist = "irgendwas")
songtext = suche.get_text(index_zahl_von_ergebnis)
Über Verbesserungsvorschläge würde ich mich freuen.
Gruß
dodo47