def train(train_data_filename, rare_train_data_filename, hmm_model_filename, rare_words_rule): print '1. Modo entrenamiento hmm' hmm_model = ViterbiTagger(3) hmm_model.rare_words_rule = rare_words_rule hmm_model.train(file(train_data_filename)) print '2. Procesando palabras raras' util.process_rare_words(file(train_data_filename), file(rare_train_data_filename, 'w'), hmm_model.rare_words, hmm_model.rare_words_rule) print '3. Entrenando hmm usando la nueva data de entrenamiento' hmm_model_rare = ViterbiTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename): print '1. Entrenamiento hmm model' hmm_model = SimpleTagger(3) hmm_model.train(file(train_data_filename, 'r')) print . Reemplazar rare words rare_words = hmm_model.rare_words util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), rare_words, util.rare_words_rule_p1) print '3. Entrenamiento modelo hmm de nuevo utilizando la nueva data (con RARE_TAG)' hmm_model_rare = SimpleTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename, rare_words_rule): print '1. train hmm model' hmm_model = ViterbiTagger(3) hmm_model.rare_words_rule = rare_words_rule hmm_model.train(file(train_data_filename)) print '2. process rare words' util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), hmm_model.rare_words, hmm_model.rare_words_rule) print '3. train hmm model again using the new train data' hmm_model_rare = ViterbiTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))
def train(train_data_filename, rare_train_data_filename, hmm_model_filename): print '1. train hmm model' hmm_model = SimpleTagger(3) hmm_model.train(file(train_data_filename, 'r')) print '2. replace rare words' rare_words = hmm_model.rare_words util.process_rare_words( file(train_data_filename), file(rare_train_data_filename, 'w'), rare_words, util.rare_words_rule_p1) print '3. train hmm model again using the new train data (with RARE_TAG)' hmm_model_rare = SimpleTagger(3) hmm_model_rare.train(file(rare_train_data_filename)) hmm_model_rare.write_counts(file(hmm_model_filename, 'w'))