for (class_name, concepts) in self.class_and_concept.items(): for concept in concepts: output.write('%d\t%d\t%s\t%s\n' % (0, 0, class_name, concept)) # <unk> to every possible concepts for concept in self.concepts: output.write('%d\t%d\t%s\t%s\n' % (0, 0, '<unk>', concept)) # word_without_associated_class to the same word for word in self.word_without_associated_class: output.write('%d\t%d\t%s\t%s\n' % (0, 0, word, word)) # write last line output.write('0') if __name__ == '__main__': w2concept = W2Concept(CONCEPT_FILENAME) w2concept.set_word_without_associated_class('./w2class/word_without_class.txt') w2concept.write_w2concept_transducer('./w2concept/w2concept.fsm') lexicon = lex.read_lexicon_file(LEX_FILE) new_lexicon = w2concept.concepts.union(set(w2concept.class_and_concept.keys())) new_lexicon = new_lexicon.union(w2concept.word_without_associated_class) lexicon = lexicon.union(new_lexicon) lex.write_lexicon_to_file(LEX_FILE, list(lexicon))
output.write('%d\t%d\t%s\t%s\n' % (0, 0, '<unk>', '<unk>')) output.write('0\n') def gen_concept_classes(concept_filename): concept_classes = set() with open(concept_filename) as f: for line in f: concept_classes.add(line.strip().split('.')[-1]) with open('./tmp/concept_classes.txt', 'w') as output: for concept in concept_classes: output.write('%s\n' % concept) if __name__ == '__main__': symbols = lex.read_lexicon_file(LEX_FILE) w2class = W2Class(symbols) tmp = w2class.gen_weekdays() w2class.write_w2class_transducer(tmp, 'day_name') tmp = w2class.gen_months() w2class.write_w2class_transducer(tmp, 'month_name') tmp = w2class.gen_ordinal_numbers() w2class.write_w2class_transducer(tmp, 'day_number') tmp = w2class.gen_numbers() w2class.write_w2class_transducer(tmp, 'day_number')