Hier der Code:
Code: Alles auswählen
import re
def get_phrases(text):
print "text:", text
# get all coherent words
phrases = re.findall(r"['\"](.*?)['\"]", text)
print "phrases:", phrases
# remove all coherent words
for phrase in phrases:
text = text.replace(phrase, "")
print "rest: >>>%s<<<" % text
# split the words
words = [i for i in text.split(" ") if i.strip(' \'"')]
# merge
phrases = phrases + words
print "phrases:", phrases
return phrases
get_phrases('"zu sammen" und einzeln')
print "-"*79
get_phrases("'zu sammen' und einzeln")
print "-"*79
get_phrases("'zu sammen1' einzeln \"zu sammen2\"")
print "-"*79
get_phrases("ein's zwei drei")
Code: Alles auswählen
text: "zu sammen" und einzeln
phrases: ['zu sammen']
rest: >>>"" und einzeln<<<
phrases: ['zu sammen', 'und', 'einzeln']
-------------------------------------------------------------------------------
text: 'zu sammen' und einzeln
phrases: ['zu sammen']
rest: >>>'' und einzeln<<<
phrases: ['zu sammen', 'und', 'einzeln']
-------------------------------------------------------------------------------
text: 'zu sammen1' einzeln "zu sammen2"
phrases: ['zu sammen1', 'zu sammen2']
rest: >>>'' einzeln ""<<<
phrases: ['zu sammen1', 'zu sammen2', 'einzeln']
-------------------------------------------------------------------------------
text: ein's zwei drei
phrases: []
rest: >>>ein's zwei drei<<<
phrases: ["ein's", 'zwei', 'drei']