def main(): if len(sys.argv) != 3: print 'Usage: %s training_filename test_filename' % sys.argv[0] return 1 train_filename, test_filename = sys.argv[1:] training_data = nlp_common.read_part_of_speech_file(train_filename) known_words = set(word for pos, word in training_data) main_start_time = time.time() print >> sys.stderr, 'Training baseline model' start_time = time.time() baseline_model = nlp.BaselineModel(training_data) print "Baseline trained in " + str(time.time() - start_time) + " seconds." print >> sys.stderr, 'Evaluating baseline model' baseline_unknown_accuracy, baseline_accuracy = compute_score( nlp_common.get_predictions( test_filename, baseline_model.predict_sentence), known_words) start_time = time.time() print >> sys.stderr, 'Training hmm model' hmm_model = nlp.HiddenMarkovModel.train(training_data) print "HMM trained in " + str(time.time() - start_time) + " seconds." print >> sys.stderr, 'Evaluating hmm model' hmm_unknown_accuracy, hmm_accuracy = compute_score( nlp_common.get_predictions( test_filename, hmm_model.predict_sentence), known_words) print '%s Baseline accuracy' % baseline_accuracy print '%s Baseline accuracy on unknown words' % baseline_unknown_accuracy print '%s HMM accuracy' % hmm_accuracy print '%s HMM accuracy on unknown words' % hmm_unknown_accuracy print 'Score for Part III: %d/50' % ( math.ceil(max(baseline_accuracy.value(), hmm_accuracy.value()) * 50)) print 'Score for Part IV-unknown words: %d/20' % ( max(0, math.ceil((hmm_unknown_accuracy.value() - 0.6) * 50))) print 'Finished in ' + str(time.time() - main_start_time)
def main(): parser = optparse.OptionParser() parser.add_option('-s', '--smoothing', choices=(NO_SMOOTHING, ADD_ONE_SMOOTHING), default=NO_SMOOTHING) parser.add_option('-o', '--order', default=1, type=int) parser.add_option('-u', '--unknown', choices=(PREDICT_ZERO, PREDICT_MOST_COMMON_PART_OF_SPEECH,), default=PREDICT_ZERO) options, args = parser.parse_args() train_filename, test_filename = args training_data = nlp_common.read_part_of_speech_file(train_filename) if options.order == 0: model = BaselineModel(training_data) else: model = HiddenMarkovModel.train( training_data, options.smoothing, options.unknown, options.order) predictions = nlp_common.get_predictions( test_filename, model.predict_sentence) for word, prediction, true_pos in predictions: print word, prediction, true_pos