Re: String trennen
Verfasst: Freitag 10. Februar 2017, 23:00
ist ja auch noch nicht fertig. Es gibt bestimmt intelligentere Lösungen, aber für mich als Anfänger ist es OK. Der Code funktioniert soweit ganz gut, es tut was es soll. Unten der jetzige Stand. Kommt später noch mehr dazu.
main.py
playlist2userbouquet.py
main.py
Code: Alles auswählen
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import print_function
import io
import time
import re
from playlist2userbouquet import PLAYLIST, ANPASSEN
playlist = PLAYLIST()
anpassen = ANPASSEN()
def DICTS_ERSTELLEN():
filename_m3u = 'M3U.txt'
playlist.NAME_DICT_LINK_DICT(filename_m3u)
def NAME_DICT_ANPASSEN():
for key, value in playlist.m3u_name_dict.items():
gen = anpassen.SPLIT(value)
parts = next(gen)
anpassen.GROSS_KLEIN_SCHREIBUNG(parts)
playlist.m3u_name_dict[key] = anpassen.name_neu
def TEST():
for keys, values in playlist.m3u_name_dict.items():
print (
str(keys) + ' - ' +
values + ' - ' +
playlist.m3u_link_dict[keys]
)
def main():
DICTS_ERSTELLEN()
NAME_DICT_ANPASSEN()
TEST()
if __name__ == '__main__':
main()
Code: Alles auswählen
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import print_function
import io
import time
import re
class PLAYLIST(object):
def __init__(self):
self.m3u_name_dict = dict()
self.m3u_link_dict = dict()
def NAME_DICT_LINK_DICT(self, filename_m3u):
try:
with io.open(filename_m3u, 'r', encoding='utf-8-sig') as m3u:
i = 1
for zeile in m3u:
if zeile.startswith('#EXTINF'):
if zeile[11] != '=' and not zeile.endswith('='):
name = zeile.strip().replace('#EXTINF:-1,','')
link = m3u.readline().rstrip()
self.m3u_name_dict[i] = name
self.m3u_link_dict[i] = link
i += 1
except FileNotFoundError:
msg = 'Die Datei {!r} existiert nicht!'
print(msg.format(filename_m3u))
except IOError as error:
print('Fehler beim Öffnen:', error)
class ANPASSEN():
def __init__(self):
self.PATTERNS = [
r'\+\d', # +2, +7
r'\d[+x]\d', # 1+1, 2x2
r'(?<=\b)[A-ZА-Я]{1,2}\d{1,2}\b', # A1, AB12
r'\b[JT][ui][CJ][ei]', # JuCe TV, TiJi
r'[A-ZА-Я][a-zа-я]+\.?', # Abzde, int.
r'[A-Za-zА-Яа-я][A-ZА-Яa-zа-я]+\.?',# ABSDE, Int.
r'(?<=\b)[A-ZА-Я]{1,2}(?=\b)', # A, AB, ABC
r'\d[A-ZА-Я]\b', # 1A
r'\d\.?\d+', # 1234, 2.0, 3.123
r'\d',
]
self.SHORT_NAMES_TO_TITLE = {
'geo', 'nat', 'дом', 'моя', 'кто', 'hit', 'box',
'pro', 'doc', 'and', 'sci', 'fi', 'top', 'еда',
'дон', 'мир', 'ля', 'на', 'раз', 'рен'
}
self.LONG_NAMES_TO_IGNORE = {
'JuCe', 'TiJi', 'HITV', 'НСТВ', 'СССР', 'СТРК'
}
def SPLIT(self, value):
playlist = PLAYLIST()
patterns = self.PATTERNS
if isinstance(patterns, dict):
patterns = patterns.values()
pattern = re.compile('|'.join(patterns))
value = re.sub('[\[\(][^\(\)\[\]]*[\]\)]', '', value) # Klammer und deren Inhalt entfernen
value = re.sub('(?<!(INT))(?<!(int))[_.-](?!\d)', ' ', value) # Unnötige Sonderzeichen entfernen
value = value.strip()
yield pattern.findall(value)
def GROSS_KLEIN_SCHREIBUNG(self, parts):
renamed_parts = []
for part in parts:
if len(part) <= 3:
if part.lower() in self.SHORT_NAMES_TO_TITLE:
renamed_parts.append(part.title())
else:
renamed_parts.append(part.upper())
elif part in self.LONG_NAMES_TO_IGNORE:
renamed_parts.append(part)
else:
renamed_parts.append(part.title())
self.name_neu = ' '.join(part for part in renamed_parts)