def train(train_data_filename, rare_train_data_filename, hmm_model_filename): print '1. Entrenamiento hmm model' hmm_model = SimpleTagger(3) hmm_model.train(file(train_data_filename, 'r')) print . Reemplazar rare words rare_words = hmm_model.rare_words util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), rare_words, util.rare_words_rule_p1) print '3. Entrenamiento modelo hmm de nuevo utilizando la nueva data (con RARE_TAG)' hmm_model_rare = SimpleTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename): print '1. train hmm model' hmm_model = SimpleTagger(3) hmm_model.train(file(train_data_filename, 'r')) print '2. replace rare words' rare_words = hmm_model.rare_words util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), rare_words, util.rare_words_rule_p1) print '3. train hmm model again using the new train data (with RARE_TAG)' hmm_model_rare = SimpleTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))