14. Example data
sentences wordれ idx襯 螳讌螻 dictionary
# word dic
word_list = []
for elm in sentences:
word_list += elm
word_list = list(set(word_list))
word_list.sort()
# '<pad>'朱 覩語 token 豢螳
word_list = ['<pad>'] + word_list
word_dic = {word : idx for idx,
word in enumerate(word_list)}
14
22. Example data
sentenceswordれidx襯手讌螻dictionary
pos tokenれ idx襯 螳讌螻 dictionary
# word dic
word_list = []
for elm in sentences:
word_list += elm
word_list = list(set(word_list))
word_list.sort()
word_list = ['<pad>'] + word_list
word_dic = {word : idx for idx, word
in enumerate(word_list)}
22
23. Example data
sentences wordれ idx襯 螳讌螻 dictionary
postokenれidx襯手讌螻dictionary
# pos dic
pos_list = []
for elm in pos:
pos_list += elm
pos_list = list(set(pos_list))
pos_list.sort()
pos_list = ['<pad>'] + pos_list
pos_dic = {pos : idx for idx, pos in enumerate(pos_list)}
23
33. Example data
sourceswordれidx襯手讌螻dictionary
targets wordれ idx襯 螳讌螻 dictionary
# word dic for sentences
source_words = []
for elm in sources:
source_words += elm
source_words = list(set(source_words))
source_words.sort()
source_words = ['<pad>'] + source_words
source_dic = {word : idx for idx, word
in enumerate(source_words)}
33
34. Example data
sources wordれ idx襯 螳讌螻 dictionary
targetswordれidx襯手讌螻dictionary
# word dic for translations
target_words = []
for elm in targets:
target_words += elm
target_words = list(set(target_words))
target_words.sort()
# 覯覓語 螻 襴 'start', 'end' token 豢螳
target_words = ['<pad>']+ ['<start>'] + ['<end>'] +
target_words
target_dic = {word : idx for idx, word
in enumerate(target_words)} 34