Außerdem gibt es jetzt noch die Möglichkeit den Dictionary als Pickle zu speichern. Damit spart man ca. 50% der Zeit.
Code: Alles auswählen
#!/usr/bin/env python3
import time
from collections import defaultdict
from itertools import combinations
from pathlib import Path
import pickle
ROOTFOLDER = "scrabble"
PICKLE_PATH = Path(ROOTFOLDER, "lookup_table.p")
# https://gist.github.com/MarvinJWendt/2f4f4154b8ae218600eb091a5706b5f4
DICTIONARY_PATH = Path(ROOTFOLDER, "wordlist-german.txt")
def make_canonical_form(word):
return "".join(sorted(word))
def write_lookup_table(pickle_object):
with PICKLE_PATH.open("wb") as pickle_file:
pickle.dump(pickle_object, pickle_file)
def read_lookup_table():
if PICKLE_PATH.exists():
with PICKLE_PATH.open("rb") as pickle_file:
return pickle.load(pickle_file)
else:
return dict()
def make_lookup_table():
sorted_characters_to_words = defaultdict(set)
with DICTIONARY_PATH.open(encoding="utf-8") as lines:
for line in lines:
word = line.strip().lower()
sorted_characters_to_words[make_canonical_form(word)].add(word)
return sorted_characters_to_words
def get_lookup_table():
lookup_table = read_lookup_table()
if not lookup_table:
lookup_table = make_lookup_table()
write_lookup_table(lookup_table)
return lookup_table
def main():
needle = "qwertz"
look_up_table = get_lookup_table()
word_parts = {
make_canonical_form("".join(part))
for n in range(2, len(needle) + 1)
for part in combinations(needle, n)
}
results = [word for part in word_parts for word in look_up_table[part]]
print(results)
if __name__ == "__main__":
start = time.perf_counter()
main()
runtime = time.perf_counter() - start
print(f"program took {runtime:5.3f} seconds")
"""
Ausgabe vor dem pickeln:
['er', 're', 'wert', 'rtw', 'wz', 'qwertz', 'wez', 'we', 'qr', 'terz', 'rwe', 'wer', 'erz']
program took 8.625 seconds
Ausgaben nach dem pickeln:
['qr', 'erz', 'wez', 'rtw', 'we', 're', 'er', 'wz', 'wert', 'wer', 'rwe', 'terz', 'qwertz']
program took 4.265 seconds
"""