stehe grade ziemlich auf dem Schlauch. Hier erst mal mein Code:
Code: Alles auswählen
nerList=[['London','Location'],['United Kingdom', 'Location'],['London','Institution']]
posList=[[1, 'bla', 1, 3], [1, 'London', 5, 10], [1, 'is', 12, 13],
[1, 'a', 15, 15], [1, 'nice', 17, 20], [1, 'city', 22, 25],
[1, 'it', 28, 29], [1, 'is', 31, 32], [1, 'the', 34, 36],
[1, 'capital', 38, 44], [1, 'of', 46, 47], [1, 'the', 49, 51],
[1, 'United', 53, 58], [1, 'Kingdom', 60, 66], [1, 'and', 68, 70],
[1, 'much', 72, 75], [1, 'appreciated', 77, 87], [1, 'by', 89, 90],
[1, 'Jack', 92, 95], [1, 'London', 97, 102], [2, 'bla', 1, 3]]
def getList(posList, nerList):
output=[]
for word in nerList:
entity=word[0]
name=word[1]
for item in posList:
sentNr=item[0]
wo=item[1]
startpos=item[2]
endpos=item[3]
if wo==entity:
output.append([sentNr, entity, name, startpos, endpos])
elif wo!=entity:
try:
nextWord=posList[posList.index(item)+1][1]
except IndexError:
nextWord=''
if nextWord!='':
exp='%s %s'%(wo, nextWord)
if entity==exp:
endpos=posList[posList.index(item)+1][3]
output.append([sentNr, entity, name, startpos, endpos])
return output
- [[1, 'London', 'Location', 5, 10], [1, 'London', 'Location', 97, 102], [1, 'United Kingdom', 'Location', 53, 66], [1, 'London', 'Institution', 5, 10], [1, 'London', 'Institution', 97, 102]]
- [[1, 'London', 'Location', 5, 10], [1, 'United Kingdom', 'Location', 53, 66], [1, 'London', 'Institution', 97, 102]]
Vielen Dank für jede Hilfe...