def evaluate_model(predicted_templates, source_words, source_feature_dicts, target_words, target_feature_dicts, feature_types, print_results=True): if print_results: print 'evaluating model...' # 2 possible approaches: one - predict template, instantiate, check if equal to word # TODO: other option - predict template, generate template using the correct word, check if templates are equal test_data = zip(source_words, source_feature_dicts, target_words, target_feature_dicts) c = 0 for i, (source_word, source_feat_dict, target_word, target_feat_dict) in enumerate(test_data): joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predicted_word = instantiate_template(predicted_templates[joint_index], source_word) if predicted_word == target_word: c += 1 sign = 'V' else: sign = 'X' if print_results: print 'source word: ' + source_word + ' gold: ' + target_words[i] + ' template:' + ''.join(predicted_templates[joint_index]) \ + ' prediction: ' + predicted_word + ' ' + sign accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types): predictions = {} for i, (source_word, source_feat_dict, target_feat_dict) in enumerate(zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True): if print_results: print 'evaluating model...' # 2 possible approaches: one - predict template, instantiate, check if equal to word # for now, go with one, maybe try two later # TODO: two - predict template, generate template using the correct word, check if templates are equal test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_word = instantiate_template(predicted_templates[joint_index], lemma) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results or sign == 'X': print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \ + ' prediction: ' + predicted_word + ' ' + sign accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types): predictions = {} for i, (source_word, source_feat_dict, target_feat_dict) in enumerate( zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True): if print_results: print 'evaluating model...' # 2 possible approaches: one - predict template, instantiate, check if equal to word # for now, go with one, maybe try two later # TODO: two - predict template, generate template using the correct word, check if templates are equal test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_word = instantiate_template(predicted_templates[joint_index], lemma) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results: print 'lemma: ' + lemma + ' gold: ' + words[i] + ' template: ' + ''.join(predicted_templates[joint_index]) \ + ' prediction: ' + predicted_word + ' ' + sign accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def evaluate_model(predicted_sequences, lemmas, feature_dicts, words, feature_types, print_results=False): if print_results: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_template = predicted_sequences[joint_index] predicted_word = predicted_sequences[joint_index].replace(STEP, '') if predicted_word == word: c += 1 sign = u'V' else: sign = u'X' if print_results:# and sign == 'X': enc_l = lemma.encode('utf8') enc_w = word.encode('utf8') enc_t = ''.join([t.encode('utf8') for t in predicted_template]) enc_p = predicted_word.encode('utf8') print 'lemma: {}'.format(enc_l) print 'gold: {}'.format(enc_w) print 'template: {}'.format(enc_t) print 'prediction: {}'.format(enc_p) print sign accuracy = float(c) / len(predicted_sequences) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_sequences)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_sequences), accuracy
def evaluate_predictions(predictions, lemmas, feature_dicts, words, feature_types, print_res=False): if print_res: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) if predictions[joint_index] == word: c += 1 sign = 'V' else: sign = 'X' if print_res: print 'lemma: ' + lemma + ' gold: ' + word + ' prediction: ' + predictions[ joint_index] + ' ' + sign accuracy = float(c) / len(predictions) if print_res: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predictions)) + '=' + str(accuracy) + \ '\n\n' return len(predictions), accuracy
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True): if print_results: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_word = ''.join(predicted_templates[joint_index]) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results: print u'lemma: {} gold: {} template: {} prediction: {} correct: {}'.format( lemma, words[i], ''.join(predicted_templates[joint_index]), predicted_word, sign) accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=False): if print_results: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predicted_word = ''.join(predicted_templates[joint_index]) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results: enc_l = lemma.encode('utf8') enc_w = word.encode('utf8') enc_p = predicted_word.encode('utf8') print 'lemma: {}'.format(enc_l) print 'gold: {}'.format(enc_w) print 'prediction: {}'.format(enc_p) print sign accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def evaluate_model(predicted_templates, lemmas, feature_dicts, words, feature_types, print_results=True): if print_results: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) predicted_word = ''.join(predicted_templates[joint_index]) if predicted_word == word: c += 1 sign = 'V' else: sign = 'X' if print_results: print u'lemma: {} gold: {} template: {} prediction: {} correct: {}'.format( lemma, words[i], ''.join(predicted_templates[joint_index]), predicted_word, sign) accuracy = float(c) / len(predicted_templates) if print_results: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predicted_templates)) + '=' + \ str(accuracy) + '\n\n' return len(predicted_templates), accuracy
def predict_with_ensemble_majority(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, input_dim, inverse_alphabet_index, layers, test_feat_dicts, test_lemmas, test_words): ensemble_model_names = ensemble.split(',') print 'ensemble paths:\n' print '\n'.join(ensemble_model_names) ensemble_models = [] # load ensemble models for ens in ensemble_model_names: model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model( alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) ensemble_models.append( (model, encoder_frnn, encoder_rrnn, decoder_rnn)) # predict the entire test set with each model in the ensemble ensemble_predictions = [] for em in ensemble_models: model, encoder_frnn, encoder_rrnn, decoder_rnn = em predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) ensemble_predictions.append(predicted_sequences) # perform voting for each test input - joint_index is a lemma+feats representation majority_predicted_sequences = {} string_to_template = {} test_data = zip(test_lemmas, test_feat_dicts, test_words) for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) prediction_counter = defaultdict(int) for ens in ensemble_predictions: prediction_str = ''.join(ens[joint_index]) prediction_counter[prediction_str] += 1 string_to_template[prediction_str] = ens[joint_index] print u'template: {} prediction: {}'.format( ens[joint_index], prediction_str) # return the most predicted output majority_prediction_string = max(prediction_counter, key=prediction_counter.get) print u'chosen:{} with {} votes\n'.format( majority_prediction_string, prediction_counter[majority_prediction_string]) majority_predicted_sequences[joint_index] = string_to_template[ majority_prediction_string] return majority_predicted_sequences
def predict(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, feat_index, feature_types, lemmas, feature_dicts): test_data = zip(lemmas, feature_dicts) predictions = {} for lemma, feat_dict in test_data: predicted_word = predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_word return predictions
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): predictions = {} for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): predictions = {} for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) # index each output by its matching inputs - lemma + features joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): predictions = {} for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_sequence = predict_output_sequence(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) # index each output by its matching inputs - lemma + features joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_sequence return predictions
def predict_with_ensemble_majority(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, input_dim, inverse_alphabet_index, layers, test_feat_dicts, test_lemmas, test_words): ensemble_model_names = ensemble.split(',') print 'ensemble paths:\n' print '\n'.join(ensemble_model_names) ensemble_models = [] # load ensemble models for ens in ensemble_model_names: model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn)) # predict the entire test set with each model in the ensemble ensemble_predictions = [] for em in ensemble_models: model, encoder_frnn, encoder_rrnn, decoder_rnn = em predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) ensemble_predictions.append(predicted_sequences) # perform voting for each test input - joint_index is a lemma+feats representation majority_predicted_sequences = {} string_to_template = {} test_data = zip(test_lemmas, test_feat_dicts, test_words) for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) prediction_counter = defaultdict(int) for ens in ensemble_predictions: prediction_str = ''.join(ens[joint_index]) prediction_counter[prediction_str] += 1 string_to_template[prediction_str] = ens[joint_index] print u'template: {} prediction: {}'.format(ens[joint_index], prediction_str) # return the most predicted output majority_prediction_string = max(prediction_counter, key=prediction_counter.get) print u'chosen:{} with {} votes\n'.format(majority_prediction_string, prediction_counter[majority_prediction_string]) majority_predicted_sequences[joint_index] = string_to_template[majority_prediction_string] return majority_predicted_sequences
def predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): predictions = {} for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_inflection_template( model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): print 'predicting...' predictions = {} data_len = len(lemmas) for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_output_sequence(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) if i % 1000 == 0 and i > 0: print 'predicted {} examples out of {}'.format(i, data_len) joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def predict(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, feat_index, feature_types, lemmas, feature_dicts): test_data = zip(lemmas, feature_dicts) predictions = {} for lemma, feat_dict in test_data: predicted_word = predict_inflection(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) predictions[joint_index] = predicted_word return predictions
def predict_nbest_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types, nbest, words): predictions = {} fix_count = 0 for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) predicted_nbest = predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types,nbest) # DEBUG: greedy_guess = instantiate_template(predicted_template, lemma) if words[i] == greedy_guess: gsign = 'V' else: gsign = 'X' for j, n in enumerate(predicted_nbest): s, p = n nbest_guess = instantiate_template(s, lemma) if words[i] == nbest_guess: nsign = 'V' else: nsign = 'X' if gsign == 'X' and nsign == 'V': fix_count += 1 print str(i) + ' out of ' + str(len(lemmas)) print lemma + '\n' print 'GREEDY: \n' + str(''.join(predicted_template).encode('utf8')) print greedy_guess + ' ' + gsign + '\n' print '{0}-BEST:'.format(j+1) print str(''.join(s).encode('utf8')) + ' ' + str(p) print nbest_guess + ' ' + nsign + '\n' joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) predictions[joint_index] = predicted_nbest print '================================================================' print 'beam search fixed {0} out of {1}, {2}%'.format(fix_count, len(lemmas), float(fix_count)/len(lemmas)*100) print '================================================================' return predictions
def predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, lemmas, feats, feat_index, feature_types): print 'predicting...' predictions = {} data_len = len(lemmas) for i, (lemma, feat_dict) in enumerate(zip(lemmas, feats)): predicted_template = predict_output_sequence( model, encoder_frnn, encoder_rrnn, decoder_rnn, lemma, feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) if i % 1000 == 0 and i > 0: print 'predicted {} examples out of {}'.format(i, data_len) joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) predictions[joint_index] = predicted_template return predictions
def evaluate_predictions(predictions, lemmas, feature_dicts, words, feature_types, print_res=False): if print_res: print 'evaluating model...' test_data = zip(lemmas, feature_dicts, words) c = 0 for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) if predictions[joint_index] == word: c += 1 sign = 'V' else: sign = 'X' if print_res: print 'lemma: ' + lemma + ' gold: ' + word + ' prediction: ' + predictions[joint_index] + ' ' + sign accuracy = float(c) / len(predictions) if print_res: print 'finished evaluating model. accuracy: ' + str(c) + '/' + str(len(predictions)) + '=' + str(accuracy) + \ '\n\n' return len(predictions), accuracy
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, override): hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate } (initial_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, alphabet_index, feat_index, feature_types, inverse_alphabet_index, dev_words, dev_lemmas, dev_feat_dicts) = init_model(dev_path, feat_input_dim, hidden_dim, input_dim, layers, results_file_path, test_path, train_path) # char_lookup = initial_model["char_lookup"] # feat_lookup = initial_model["feat_lookup"] # "what is learned by the encoder" experiment: # get lots of input words (dev set) # run blstm encoder on them (encode feats and chars) # experiments: # we want to understand what's captured/whats more significant: current symbol, context or all? # to do so: # take the blstm rep. for the same character, same context, different positions. how will it cluster by position? # i.e: abbbbbb, babbbb, bbabbbb, bbbabbbb, bbbbabb, bbbbbba... # take the blstm rep. for the same character, same position, diff. contexts. how will it cluster by context? # aaaabaaaa, bbbbbbbbb, cccbcccc, dddbdddd, eeeebeeee... # take the blstm rep. for diff characters, same position, same contexts. how will it cluster by character? # aaaaaaaa, aaabaaa, aaacaaa, aaadaaa, aaaeaaa, aaafaaa... # other option: take (all?) "natural" (dev) examples, throw on SVD, paint by location, character, context (last one # is more complex but can probably think about something) start = 0 end = len(dev_lemmas) - 1 encoded_vecs = {} index_to_feats_and_lemma = {} # get bilstm encoder representation for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]): index = common.get_morph_string(feats, feature_types) + lemma index_to_feats_and_lemma[index] = (feats, lemma) encoded_vecs[index] = soft_attention.encode_feats_and_chars( alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # get examples (encoder hidden states) by location: 1, 2, 3, 4, 5... location_to_vec = {} for encoded_rep_index in encoded_vecs: encoded_rep = encoded_vecs[encoded_rep_index] for location, vec in enumerate(encoded_rep): if location in location_to_vec: location_to_vec[location].append(vec) else: location_to_vec[location] = [vec] location_labels = [] vecs = [] # take 10 samples from each character for key in location_to_vec: for value in location_to_vec[key][0:100]: location_labels.append(key) vecs.append(value.vec_value()) # plot_svd_reduction(hidden_states, location_labels, title='SVD for encoder hidden states by location') # get examples (encoder hidden states) by character: א,ב,ג,ד,ה,ו... char_to_vec = {} char_vecs = [] char_labels = [] char_location_labels = [] current_char_labels = [] feat_vecs = [] feat_labels = [] for encoded_rep_index in encoded_vecs: # get bilstm encoding for the sequence encoded_rep = encoded_vecs[encoded_rep_index] # should skip the feat vecs (?) # get matching lemma and features feats, lemma = index_to_feats_and_lemma[encoded_rep_index] sorted_feats = [] for feat in sorted(feature_types): if feat in feats: sorted_feats.append(u'{}:{}'.format(feat, feats[feat])) seq_symbols = ['<'] + list(sorted_feats) + list(lemma) + ['>'] # sort vectors by symbol for i, symbol in enumerate(seq_symbols): if symbol in lemma: char_vecs.append(encoded_rep[i]) if i > 0: prev_symbol = seq_symbols[i - 1] else: prev_symbol = '_' if i < len(seq_symbols) - 1: next_symbol = seq_symbols[i + 1] else: next_symbol = '_' char_labels.append(u'{} ({},{},{})'.format( symbol, prev_symbol, i, next_symbol)) char_location_labels.append(u'{}'.format(i)) current_char_labels.append(u'{}'.format(symbol)) else: if symbol in sorted_feats: feat_vecs.append(encoded_rep[i]) feat_labels.append(symbol) if symbol in char_to_vec: char_to_vec[symbol].append(encoded_rep[i]) else: char_to_vec[symbol] = [encoded_rep[i]] symbol_labels = [] vecs = [] # take 20 samples from each symbol for key in char_to_vec: for value in char_to_vec[key][0:20]: symbol_labels.append(key) vecs.append(value.vec_value()) # plot_svd_reduction(all_hidden_states, symbol_labels, title='SVD for encoder hidden states by symbol') char_hidden_states = np.array([v.vec_value() for v in char_vecs]) # plot_svd_reduction(char_hidden_states[0:100], char_labels[0:100], title='SVD for encoder hidden states by symbol (characters only)') plot_svd_reduction( char_hidden_states[0:200], char_labels[0:200], color_labels=char_location_labels[0:200], title='SVD for encoder hidden states by location (characters only)') plot_svd_reduction( char_hidden_states[0:200], char_labels[0:200], color_labels=current_char_labels[0:200], title='SVD for encoder hidden states by character (characters only)') plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=char_location_labels[0:500], title='Soft Attention - Encoded Inputs by Location') plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=current_char_labels[0:500], title='Soft Attention - Encoded Inputs by Character') feat_hidden_states = np.array([v.vec_value() for v in feat_vecs]) plot_svd_reduction( feat_hidden_states[0:50], feat_labels[0:50], color_labels=[f[0:4] for f in feat_labels[0:50]], title='SVD for encoder hidden states by type (features only)') # TODO: get examples (encoder hidden states) by context: after/before א,ב,ג,ד,ה... char_embeddings = {} char_embeddings_matrix = [] clean_alphabet_index = {} # print SVD for char embeddings # workaround to remove feat embeddings from plot for char in alphabet_index: if not len(char) > 1 and not char.isdigit() and char not in [ UNK, UNK_FEAT, EPSILON, NULL ]: clean_alphabet_index[char] = alphabet_index[char] for char in clean_alphabet_index: char_embeddings[char] = char_lookup[ clean_alphabet_index[char]].vec_value() char_embeddings_matrix.append( char_lookup[clean_alphabet_index[char]].vec_value()) X = np.array(char_embeddings_matrix) plot_svd_reduction(X, clean_alphabet_index, title='SVD for character embeddings') # print SVD for feat embeddings feat_embeddings = {} feat_embeddings_matrix = [] for feat in feat_index: feat_embeddings[feat] = feat_lookup[feat_index[feat]].vec_value() feat_embeddings_matrix.append( feat_lookup[feat_index[feat]].vec_value()) Y = np.array(feat_embeddings_matrix) plot_svd_reduction(Y, feat_index, title='SVD for feature embeddings') start = 1000 end = 1001 for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]): if len(lemma) < 6: plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, lemma) return # get user input word and features feats = { u'pos': u'VB', u'num': u'S', u'per': u'2', u'gen': u'M', u'binyan': u'HITPAEL', u'tense': u'PAST' } user_input = u'ספר' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = {u'pos': u'JJ', u'num': u'P', u'def': u'DEF', u'gen': u'F'} user_input = u'צמחוני' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = { u'pos': u'VB', u'num': u'S', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL' } user_input = u'שש' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) # feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'} feats = { u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P' } # u'tense' : u'FUTURE', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'} user_input = u'כלב' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = { u'pos': u'VB', u'num': u'P', u'gen': u'M', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL' } user_input = u'ישן' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = { u'pos': u'VB', u'num': u'P', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL' } user_input = u'ישן' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) print 'Bye!'
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data( train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data( test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # cluster the data by POS type (features) train_cluster_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): # get the inflection-specific data train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]] if len(train_cluster_words) < 1: print 'only ' + str(len(train_cluster_words)) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_words)) + ' examples' # test best model try: test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]] # load best model best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) predicted_templates = task1_joint_structured_inflection.predict_templates(best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) accuracy = task1_joint_structured_inflection.evaluate_model(predicted_templates, test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words, feature_types, True) accuracies.append(accuracy) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) inflection = task1_joint_structured_inflection.instantiate_template(predicted_templates[joint_index], test_lemmas[i]) final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) except KeyError: print 'could not find relevant examples in test data for cluster: ' + cluster_type accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))] macro_avg_accuracy = sum(accuracy_vals) / len(accuracies) print 'macro avg accuracy: ' + str(macro_avg_accuracy) mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))]) mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))]) micro_average_accuracy = mic_nom / mic_denom print 'micro avg accuracy: ' + str(micro_average_accuracy) if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file(hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results)
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim, nbest): hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST': nbest } print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) = prepare_sigmorphon_data.load_data( train_path, 2) (test_target_words, test_source_words, test_target_feat_dicts, test_source_feat_dicts) = prepare_sigmorphon_data.load_data(test_path, 2) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_target_words, train_source_words, train_target_feat_dicts, train_source_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_source_feat_dicts + train_target_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = { index: char for char, index in alphabet_index.items() } # cluster the data by POS type (features) # TODO: do we need to cluster on both source and target feats? # probably enough to cluster on source here becasue pos will be same # (no derivational morphology in this task) # train_cluster_to_data_indices = common.cluster_data_by_pos(train_source_feat_dicts) # test_cluster_to_data_indices = common.cluster_data_by_pos(test_source_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) # no clustering, single model train_cluster_to_data_indices = common.get_single_pseudo_cluster( train_source_feat_dicts) test_cluster_to_data_indices = common.get_single_pseudo_cluster( test_source_feat_dicts) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate( train_cluster_to_data_indices): # get the inflection-specific data train_cluster_target_words = [ train_target_words[i] for i in train_cluster_to_data_indices[cluster_type] ] if len(train_cluster_target_words) < 1: print 'only ' + str( len(train_cluster_target_words )) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_target_words)) + ' examples' # test best model test_cluster_source_words = [ test_source_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_target_words = [ test_target_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_source_feat_dicts = [ test_source_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_target_feat_dicts = [ test_target_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] # load best model best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'), '') # handle greedy prediction if nbest == 1: is_nbest = False predicted_templates = task2_ms2s.predict_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types) accuracy = task2_ms2s.evaluate_model( predicted_templates, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_words, test_cluster_target_feat_dicts, feature_types, print_results=False) accuracies.append(accuracy) print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1]) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) inflection = task2_ms2s.instantiate_template( predicted_templates[joint_index], test_source_words[i]) final_results[i] = (test_source_words[i], test_source_feat_dicts[i], inflection, test_target_feat_dicts[i]) micro_average_accuracy = accuracy[1] # handle n-best prediction else: is_nbest = True predicted_nbset_templates = task2_ms2s.predict_nbest_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_source_words, test_cluster_source_feat_dicts, test_cluster_target_feat_dicts, feat_index, feature_types, nbest, test_cluster_target_words) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_source_words[i] + ':' + common.get_morph_string(test_source_feat_dicts[i], feature_types) \ + ':' + common.get_morph_string(test_target_feat_dicts[i], feature_types) nbest_inflections = [] templates = [ t for (t, p) in predicted_nbset_templates[joint_index] ] for template in templates: nbest_inflections.append( task2_ms2s.instantiate_template( template, test_source_words[i])) final_results[i] = (test_source_words[i], test_source_feat_dicts[i], nbest_inflections, test_target_feat_dicts[i]) micro_average_accuracy = -1 if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' task2_joint_structured_inflection.write_results_file( hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results, is_nbest)
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim, nbest): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'NBEST':nbest} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data( train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data( test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # cluster the data by POS type (features) train_cluster_to_data_indices = common.cluster_data_by_pos(train_feat_dicts) test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): # get the inflection-specific data train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]] if len(train_cluster_words) < 1: print 'only ' + str(len(train_cluster_words)) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_words)) + ' examples' # test best model test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]] # load best model best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) lang = train_path.split('/')[-1].replace('-task{0}-train'.format('1'),'') if nbest == 1: is_nbest = False predicted_templates = task1_joint_structured_inflection_blstm_feedback_fix.predict_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) accuracy = task1_joint_structured_inflection_blstm_feedback_fix.evaluate_model(predicted_templates, test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words, feature_types, print_results=False) accuracies.append(accuracy) print '{0} {1} accuracy: {2}'.format(lang, cluster_type, accuracy[1]) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) inflection = task1_joint_structured_inflection_blstm_feedback_fix.instantiate_template( predicted_templates[joint_index], test_lemmas[i]) final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) micro_average_accuracy = accuracy[1] else: is_nbest = True predicted_nbset_templates = task1_joint_structured_inflection_blstm_feedback_fix.predict_nbest_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types, nbest, test_cluster_words) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) nbest_inflections = [] templates = [t for (t,p) in predicted_nbset_templates[joint_index]] for template in templates: nbest_inflections.append( task1_joint_structured_inflection_blstm_feedback_fix.instantiate_template( template, test_lemmas[i])) final_results[i] = (test_lemmas[i], test_feat_dicts[i], nbest_inflections) micro_average_accuracy = -1 if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file(hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results, is_nbest)
def predict_nbest_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, source_words, source_feats, target_feats, feat_index, feature_types, nbest, words): predictions = {} fix_count = 0 for i, (source_word, source_feat_dict, target_feat_dict) in enumerate( zip(source_words, source_feats, target_feats)): predicted_template = predict_inflection_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types) predicted_nbest = predict_nbest_template(model, encoder_frnn, encoder_rrnn, decoder_rnn, source_word, source_feat_dict, target_feat_dict, alphabet_index, inverse_alphabet_index, feat_index, feature_types, nbest) # DEBUG: greedy_guess = instantiate_template(predicted_template, source_word) if words[i] == greedy_guess: gsign = 'V' else: gsign = 'X' for j, n in enumerate(predicted_nbest): s, p = n nbest_guess = instantiate_template(s, source_word) if words[i] == nbest_guess: nsign = 'V' else: nsign = 'X' if gsign == 'X' and nsign == 'V': fix_count += 1 print str(i) + ' out of ' + str(len(source_words)) print source_word.encode('utf8') + '\n' encoded_template = [c.encode('utf8') for c in predicted_template] joined = ''.join(encoded_template) print 'GREEDY: \n' + joined print greedy_guess.encode('utf8') + ' ' + gsign + '\n' print u'{0}-BEST:'.format(j + 1) print str(''.join(s).encode('utf8')) + ' ' + str(p) print nbest_guess.encode('utf8') + ' ' + nsign + '\n' joint_index = source_word + ':' + common.get_morph_string(source_feat_dict, feature_types) \ + ':' + common.get_morph_string(target_feat_dict, feature_types) predictions[joint_index] = predicted_nbest print '================================================================' print 'beam search fixed {0} out of {1}, {2}%'.format(fix_count, len(source_words), float(fix_count) / len(source_words) * 100) print '================================================================' return predictions
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, override): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} (initial_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, alphabet_index, feat_index, feature_types, inverse_alphabet_index, dev_words, dev_lemmas, dev_feat_dicts) = init_model(dev_path, feat_input_dim, hidden_dim, input_dim, layers, results_file_path, test_path, train_path) # char_lookup = initial_model["char_lookup"] # feat_lookup = initial_model["feat_lookup"] # "what is learned by the encoder" experiment: # get lots of input words (dev set) # run blstm encoder on them (encode feats and chars) # experiments: # we want to understand what's captured/whats more significant: current symbol, context or all? # to do so: # take the blstm rep. for the same character, same context, different positions. how will it cluster by position? # i.e: abbbbbb, babbbb, bbabbbb, bbbabbbb, bbbbabb, bbbbbba... # take the blstm rep. for the same character, same position, diff. contexts. how will it cluster by context? # aaaabaaaa, bbbbbbbbb, cccbcccc, dddbdddd, eeeebeeee... # take the blstm rep. for diff characters, same position, same contexts. how will it cluster by character? # aaaaaaaa, aaabaaa, aaacaaa, aaadaaa, aaaeaaa, aaafaaa... # other option: take (all?) "natural" (dev) examples, throw on SVD, paint by location, character, context (last one # is more complex but can probably think about something) start = 0 end = len(dev_lemmas) - 1 encoded_vecs = {} index_to_feats_and_lemma = {} # get bilstm encoder representation for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]): index = common.get_morph_string(feats, feature_types) + lemma index_to_feats_and_lemma[index] = (feats, lemma) encoded_vecs[index] = soft_attention.encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index, feat_lookup, feats, feature_types, lemma) # get examples (encoder hidden states) by location: 1, 2, 3, 4, 5... location_to_vec = {} for encoded_rep_index in encoded_vecs: encoded_rep = encoded_vecs[encoded_rep_index] for location, vec in enumerate(encoded_rep): if location in location_to_vec: location_to_vec[location].append(vec) else: location_to_vec[location] = [vec] location_labels = [] vecs = [] # take 10 samples from each character for key in location_to_vec: for value in location_to_vec[key][0:100]: location_labels.append(key) vecs.append(value.vec_value()) # plot_svd_reduction(hidden_states, location_labels, title='SVD for encoder hidden states by location') # get examples (encoder hidden states) by character: א,ב,ג,ד,ה,ו... char_to_vec = {} char_vecs = [] char_labels = [] char_location_labels = [] current_char_labels = [] feat_vecs = [] feat_labels = [] for encoded_rep_index in encoded_vecs: # get bilstm encoding for the sequence encoded_rep = encoded_vecs[encoded_rep_index] # should skip the feat vecs (?) # get matching lemma and features feats, lemma = index_to_feats_and_lemma[encoded_rep_index] sorted_feats = [] for feat in sorted(feature_types): if feat in feats: sorted_feats.append(u'{}:{}'.format(feat, feats[feat])) seq_symbols = ['<'] + list(sorted_feats) + list(lemma) + ['>'] # sort vectors by symbol for i, symbol in enumerate(seq_symbols): if symbol in lemma: char_vecs.append(encoded_rep[i]) if i > 0: prev_symbol = seq_symbols[i-1] else: prev_symbol = '_' if i < len(seq_symbols) - 1: next_symbol = seq_symbols[i+1] else: next_symbol = '_' char_labels.append(u'{} ({},{},{})'.format(symbol, prev_symbol, i, next_symbol)) char_location_labels.append(u'{}'.format(i)) current_char_labels.append(u'{}'.format(symbol)) else: if symbol in sorted_feats: feat_vecs.append(encoded_rep[i]) feat_labels.append(symbol) if symbol in char_to_vec: char_to_vec[symbol].append(encoded_rep[i]) else: char_to_vec[symbol] = [encoded_rep[i]] symbol_labels = [] vecs = [] # take 20 samples from each symbol for key in char_to_vec: for value in char_to_vec[key][0:20]: symbol_labels.append(key) vecs.append(value.vec_value()) # plot_svd_reduction(all_hidden_states, symbol_labels, title='SVD for encoder hidden states by symbol') char_hidden_states = np.array([v.vec_value() for v in char_vecs]) # plot_svd_reduction(char_hidden_states[0:100], char_labels[0:100], title='SVD for encoder hidden states by symbol (characters only)') plot_svd_reduction(char_hidden_states[0:200], char_labels[0:200], color_labels=char_location_labels[0:200], title='SVD for encoder hidden states by location (characters only)') plot_svd_reduction(char_hidden_states[0:200], char_labels[0:200], color_labels=current_char_labels[0:200], title='SVD for encoder hidden states by character (characters only)') plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=char_location_labels[0:500], title='Soft Attention - Encoded Inputs by Location') plot_svd_reduction(char_hidden_states[0:500], current_char_labels[0:500], color_labels=current_char_labels[0:500], title='Soft Attention - Encoded Inputs by Character') feat_hidden_states = np.array([v.vec_value() for v in feat_vecs]) plot_svd_reduction(feat_hidden_states[0:50], feat_labels[0:50], color_labels=[f[0:4] for f in feat_labels[0:50]], title = 'SVD for encoder hidden states by type (features only)') # TODO: get examples (encoder hidden states) by context: after/before א,ב,ג,ד,ה... char_embeddings = {} char_embeddings_matrix = [] clean_alphabet_index = {} # print SVD for char embeddings # workaround to remove feat embeddings from plot for char in alphabet_index: if not len(char) > 1 and not char.isdigit() and char not in [UNK, UNK_FEAT, EPSILON, NULL]: clean_alphabet_index[char] = alphabet_index[char] for char in clean_alphabet_index: char_embeddings[char] = char_lookup[clean_alphabet_index[char]].vec_value() char_embeddings_matrix.append(char_lookup[clean_alphabet_index[char]].vec_value()) X = np.array(char_embeddings_matrix) plot_svd_reduction(X, clean_alphabet_index, title = 'SVD for character embeddings') # print SVD for feat embeddings feat_embeddings = {} feat_embeddings_matrix = [] for feat in feat_index: feat_embeddings[feat] = feat_lookup[feat_index[feat]].vec_value() feat_embeddings_matrix.append(feat_lookup[feat_index[feat]].vec_value()) Y = np.array(feat_embeddings_matrix) plot_svd_reduction(Y, feat_index, title = 'SVD for feature embeddings') start = 1000 end = 1001 for lemma, feats in zip(dev_lemmas[start:end], dev_feat_dicts[start:end]): if len(lemma) < 6: plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, lemma) return # get user input word and features feats = {u'pos': u'VB', u'num': u'S', u'per': u'2', u'gen': u'M', u'binyan': u'HITPAEL', u'tense': u'PAST'} user_input = u'ספר' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = {u'pos': u'JJ', u'num': u'P', u'def': u'DEF', u'gen': u'F'} user_input = u'צמחוני' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = {u'pos': u'VB', u'num': u'S', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'} user_input = u'שש' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) # feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'} feats = {u'pos': u'NN', u'num': u'P', u'gen': u'F', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'} # u'tense' : u'FUTURE', u'poss_per': u'2', u'poss_gen': u'M', u'poss_num': u'P'} user_input = u'כלב' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = {u'pos': u'VB', u'num': u'P', u'gen': u'M', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'} user_input = u'ישן' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) feats = {u'pos': u'VB', u'num': u'P', u'gen': u'F', u'per': u'3', u'tense': u'FUTURE', u'binyan': u'PAAL'} user_input = u'ישן' plot_attn_for_inflection(alphabet_index, decoder_rnn, encoder_frnn, encoder_rrnn, feat_index, feature_types, initial_model, inverse_alphabet_index, dev_path, feat_input_dim, feats, hidden_dim, hyper_params, input_dim, layers, results_file_path, test_path, train_path, user_input) print 'Bye!'
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, override, eval_only, ensemble): hyper_params = {'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate} print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet(train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = {index: char for char, index in alphabet_index.items()} # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) model_file_name = results_file_path + '_bestmodel.txt' if os.path.isfile(model_file_name) and not override: print 'loading existing model from {}'.format(model_file_name) model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model(alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) print 'loaded existing model successfully' else: print 'could not find existing model or explicit override was requested. starting training from scratch...' model, encoder_frnn, encoder_rrnn, decoder_rnn = build_model(alphabet, input_dim, hidden_dim, layers, feature_types, feat_input_dim, feature_alphabet) if not eval_only: # start training trained_model, last_epoch, best_epoch = train_model(model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, feat_index, feature_types, plot) model = trained_model print 'last epoch is {}'.format(last_epoch) print 'best epoch is {}'.format(best_epoch) print 'finished training' else: print 'skipped training, evaluating on test set...' if ensemble: predicted_sequences = predict_with_ensemble_majority(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, input_dim, inverse_alphabet_index, layers, test_feat_dicts, test_lemmas, test_words) else: predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) if len(predicted_sequences) > 0: # evaluate last model on test amount, accuracy = evaluate_model(predicted_sequences, test_lemmas, test_feat_dicts, test_words, feature_types, print_results=False) print 'initial eval: {}% accuracy'.format(accuracy) final_results = {} for i in xrange(len(test_lemmas)): joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) inflection = predicted_sequences[joint_index] final_results[i] = (test_lemmas[i], test_feat_dicts[i], ''.join(inflection)) # evaluate best models common.write_results_file_and_evaluate_externally(hyper_params, accuracy, train_path, test_path, results_file_path + '.external_eval.txt', sigmorphon_root_dir, final_results) return
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_feat_dicts, test_lemmas, test_path, test_words, train_path, print_results=False): print "<<<<<<<<<<<<<<<<<< DEBUG ==>evaluate ndst" accuracies = [] final_results = {} if ensemble: # load ensemble models ensemble_model_names = ensemble.split(',') print 'ensemble paths:\n' print '\n'.join(ensemble_model_names) ensemble_models = [] for ens in ensemble_model_names: model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn)) # predict the entire test set with each model in the ensemble print 'predicting...' ensemble_predictions = [] count = 0 for em in ensemble_models: count += 1 model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = em predicted_sequences = predict_sequences(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) ensemble_predictions.append(predicted_sequences) print 'finished to predict with ensemble: {}/{}'.format(count, len(ensemble_model_names)) predicted_sequences = {} string_to_sequence = {} # perform voting for each test input - joint_index is a lemma+feats representation test_data = zip(test_lemmas, test_feat_dicts, test_words) for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) prediction_counter = defaultdict(int) # count votes for en in ensemble_predictions: prediction_str = ''.join(en[joint_index]).replace(STEP, '') prediction_counter[prediction_str] += 1 string_to_sequence[prediction_str] = en[joint_index] if print_results: print 'template: {} prediction: {}'.format(en[joint_index].encode('utf8'), prediction_str.encode('utf8')) # return the most predicted output predicted_sequence_string = max(prediction_counter, key=prediction_counter.get) # hack: if chosen without majority, pick shortest prediction if prediction_counter[predicted_sequence_string] == 1: predicted_sequence_string = min(prediction_counter, key=len) if print_results: print 'chosen:{} with {} votes\n'.format(predicted_sequence_string.encode('utf8'), prediction_counter[predicted_sequence_string]) predicted_sequences[joint_index] = string_to_sequence[predicted_sequence_string] # progress indication sys.stdout.write("\r%d%%" % (float(i) / len(test_lemmas) * 100)) sys.stdout.flush() else: # load best model - no ensemble best_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model(alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) try: print "predicting" predicted_sequences = predict_sequences(best_model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) except Exception as e: print "except1!" print e traceback.print_exc() # run internal evaluation try: accuracy = evaluate_model(predicted_sequences, test_lemmas, test_feat_dicts, test_words, feature_types, print_results=False) accuracies.append(accuracy) except Exception as e: print "except2!" print e traceback.print_exc() # get predicted_sequences in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i, lemma in enumerate(test_lemmas): joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) inflection = ''.join(predicted_sequences[joint_index]).replace(STEP, '') final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))] macro_avg_accuracy = sum(accuracy_vals) / len(accuracies) print 'macro avg accuracy: ' + str(macro_avg_accuracy) mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))]) mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))]) micro_average_accuracy = mic_nom / mic_denom print 'micro avg accuracy: ' + str(micro_average_accuracy) if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file_and_evaluate_externally(hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results)
def main(train_path, dev_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, feat_input_dim, epochs, layers, optimization, regularization, learning_rate, plot, override, eval_only, ensemble): hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'FEAT_INPUT_DIM': feat_input_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization, 'PATIENCE': MAX_PATIENCE, 'REGULARIZATION': regularization, 'LEARNING_RATE': learning_rate } print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load train and test data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) (dev_words, dev_lemmas, dev_feat_dicts) = prepare_sigmorphon_data.load_data(dev_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = { index: char for char, index in alphabet_index.items() } # feat 2 int feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) model_file_name = results_file_path + '_bestmodel.txt' if os.path.isfile(model_file_name) and not override: print 'loading existing model from {}'.format(model_file_name) model, encoder_frnn, encoder_rrnn, decoder_rnn = task1_attention_implementation.load_best_model( alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) print 'loaded existing model successfully' else: print 'could not find existing model or explicit override was requested. starting training from scratch...' model, encoder_frnn, encoder_rrnn, decoder_rnn = build_model( alphabet, input_dim, hidden_dim, layers, feature_types, feat_input_dim, feature_alphabet) if not eval_only: # start training trained_model, last_epoch, best_epoch = train_model( model, encoder_frnn, encoder_rrnn, decoder_rnn, train_lemmas, train_feat_dicts, train_words, dev_lemmas, dev_feat_dicts, dev_words, alphabet_index, inverse_alphabet_index, epochs, optimization, results_file_path, feat_index, feature_types, plot) model = trained_model print 'last epoch is {}'.format(last_epoch) print 'best epoch is {}'.format(best_epoch) print 'finished training' else: print 'skipped training, evaluating on test set...' if ensemble: predicted_sequences = predict_with_ensemble_majority( alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, input_dim, inverse_alphabet_index, layers, test_feat_dicts, test_lemmas, test_words) else: predicted_sequences = predict_sequences(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_lemmas, test_feat_dicts, feat_index, feature_types) if len(predicted_sequences) > 0: # evaluate last model on test amount, accuracy = evaluate_model(predicted_sequences, test_lemmas, test_feat_dicts, test_words, feature_types, print_results=False) print 'initial eval: {}% accuracy'.format(accuracy) final_results = {} for i in xrange(len(test_lemmas)): joint_index = test_lemmas[i] + ':' + common.get_morph_string( test_feat_dicts[i], feature_types) inflection = predicted_sequences[joint_index] final_results[i] = (test_lemmas[i], test_feat_dicts[i], ''.join(inflection)) # evaluate best models common.write_results_file_and_evaluate_externally( hyper_params, accuracy, train_path, test_path, results_file_path + '.external_eval.txt', sigmorphon_root_dir, final_results) return
def main(train_path, test_path, results_file_path, sigmorphon_root_dir, input_dim, hidden_dim, epochs, layers, optimization, feat_input_dim): hyper_params = { 'INPUT_DIM': input_dim, 'HIDDEN_DIM': hidden_dim, 'EPOCHS': epochs, 'LAYERS': layers, 'MAX_PREDICTION_LEN': MAX_PREDICTION_LEN, 'OPTIMIZATION': optimization } print 'train path = ' + str(train_path) print 'test path =' + str(test_path) for param in hyper_params: print param + '=' + str(hyper_params[param]) # load data (train_words, train_lemmas, train_feat_dicts) = prepare_sigmorphon_data.load_data(train_path) (test_words, test_lemmas, test_feat_dicts) = prepare_sigmorphon_data.load_data(test_path) alphabet, feature_types = prepare_sigmorphon_data.get_alphabet( train_words, train_lemmas, train_feat_dicts) # used for character dropout alphabet.append(NULL) alphabet.append(UNK) # used during decoding alphabet.append(EPSILON) alphabet.append(BEGIN_WORD) alphabet.append(END_WORD) feature_alphabet = common.get_feature_alphabet(train_feat_dicts) feature_alphabet.append(UNK_FEAT) # add indices to alphabet - used to indicate when copying from lemma to word for marker in [str(i) for i in xrange(MAX_PREDICTION_LEN)]: alphabet.append(marker) # feat 2 int feat_index = dict(zip(feature_alphabet, range(0, len(feature_alphabet)))) # char 2 int alphabet_index = dict(zip(alphabet, range(0, len(alphabet)))) inverse_alphabet_index = { index: char for char, index in alphabet_index.items() } # cluster the data by POS type (features) train_cluster_to_data_indices = common.cluster_data_by_pos( train_feat_dicts) test_cluster_to_data_indices = common.cluster_data_by_pos(test_feat_dicts) # cluster the data by inflection type (features) # train_cluster_to_data_indices = common.cluster_data_by_morph_type(train_feat_dicts, feature_types) # test_cluster_to_data_indices = common.cluster_data_by_morph_type(test_feat_dicts, feature_types) accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate( train_cluster_to_data_indices): # get the inflection-specific data train_cluster_words = [ train_words[i] for i in train_cluster_to_data_indices[cluster_type] ] if len(train_cluster_words) < 1: print 'only ' + str( len(train_cluster_words )) + ' samples for this inflection type. skipping' continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_words)) + ' examples' # test best model try: test_cluster_lemmas = [ test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_words = [ test_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_feat_dicts = [ test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] # load best model best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) predicted_templates = task1_joint_structured_inflection_feedback_fix.predict_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) accuracy = task1_joint_structured_inflection_feedback_fix.evaluate_model( predicted_templates, test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words, feature_types, print_results=False) accuracies.append(accuracy) # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string( test_feat_dicts[i], feature_types) inflection = task1_joint_structured_inflection_feedback_fix.instantiate_template( predicted_templates[joint_index], test_lemmas[i]) final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) except KeyError: print 'could not find relevant examples in test data for cluster: ' + cluster_type accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))] macro_avg_accuracy = sum(accuracy_vals) / len(accuracies) print 'macro avg accuracy: ' + str(macro_avg_accuracy) mic_nom = sum( [accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))]) mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))]) micro_average_accuracy = mic_nom / mic_denom print 'micro avg accuracy: ' + str(micro_average_accuracy) if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file_and_evaluate_externally( hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results)
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_cluster_to_data_indices, test_feat_dicts, test_lemmas, test_path, test_words, train_cluster_to_data_indices, train_path, train_words): accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate(train_cluster_to_data_indices): # get the inflection-specific data train_cluster_words = [train_words[i] for i in train_cluster_to_data_indices[cluster_type]] if len(train_cluster_words) < 1: print 'only {} samples for this inflection type. skipping'.format(str(len(train_cluster_words))) continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_words)) + ' examples' # test best model try: test_cluster_lemmas = [test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_words = [test_words[i] for i in test_cluster_to_data_indices[cluster_type]] test_cluster_feat_dicts = [test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type]] if ensemble: # load ensemble models ensemble_model_names = ensemble.split(',') print 'ensemble paths:\n' print '\n'.join(ensemble_model_names) ensemble_models = [] for ens in ensemble_model_names: model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) ensemble_models.append((model, encoder_frnn, encoder_rrnn, decoder_rnn)) # predict the entire test set with each model in the ensemble ensemble_predictions = [] for em in ensemble_models: model, encoder_frnn, encoder_rrnn, decoder_rnn = em predicted_templates = predict_templates(model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) ensemble_predictions.append(predicted_templates) predicted_templates = {} string_to_template = {} # perform voting for each test input - joint_index is a lemma+feats representation test_data = zip(test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words) for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string(feat_dict, feature_types) prediction_counter = defaultdict(int) for ens in ensemble_predictions: prediction_str = ''.join(instantiate_template(ens[joint_index], lemma)) prediction_counter[prediction_str] += 1 string_to_template[prediction_str] = ens[joint_index] print u'template: {} prediction: {}'.format(ens[joint_index], prediction_str) # return the most predicted output predicted_template_string = max(prediction_counter, key=prediction_counter.get) # hack: if chosen without majority, pick shortest prediction if prediction_counter[predicted_template_string] == 1: predicted_template_string = min(prediction_counter, key=len) print u'chosen:{} with {} votes\n'.format(predicted_template_string, prediction_counter[predicted_template_string]) predicted_templates[joint_index] = string_to_template[predicted_template_string] # progress indication sys.stdout.write("\r%d%%" % (float(i) / len(test_cluster_lemmas) * 100)) sys.stdout.flush() ## else: # load best model - no ensemble best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) print 'starting to predict for cluster: {}'.format(cluster_type) try: predicted_templates = predict_templates(best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) except Exception as e: print e traceback.print_exc() print 'evaluating predictions for cluster: {}'.format(cluster_type) try: accuracy = evaluate_model(predicted_templates, test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words, feature_types, print_results=True) accuracies.append(accuracy) except Exception as e: print e traceback.print_exc() # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string(test_feat_dicts[i], feature_types) inflection = instantiate_template(predicted_templates[joint_index], test_lemmas[i]) final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) except KeyError: print 'could not find relevant examples in test data for cluster: ' + cluster_type print 'clusters in test are: {}'.format(test_cluster_to_data_indices.keys()) print 'clusters in train are: {}'.format(train_cluster_to_data_indices.keys()) accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))] macro_avg_accuracy = sum(accuracy_vals) / len(accuracies) print 'macro avg accuracy: ' + str(macro_avg_accuracy) mic_nom = sum([accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))]) mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))]) micro_average_accuracy = mic_nom / mic_denom print 'micro avg accuracy: ' + str(micro_average_accuracy) if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file_and_evaluate_externally(hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results)
def evaluate_ndst(alphabet, alphabet_index, ensemble, feat_index, feat_input_dim, feature_alphabet, feature_types, hidden_dim, hyper_params, input_dim, inverse_alphabet_index, layers, results_file_path, sigmorphon_root_dir, test_cluster_to_data_indices, test_feat_dicts, test_lemmas, test_path, test_words, train_cluster_to_data_indices, train_path, train_words): accuracies = [] final_results = {} # factored model: new model per inflection type for cluster_index, cluster_type in enumerate( train_cluster_to_data_indices): # get the inflection-specific data train_cluster_words = [ train_words[i] for i in train_cluster_to_data_indices[cluster_type] ] if len(train_cluster_words) < 1: print 'only {} samples for this inflection type. skipping'.format( str(len(train_cluster_words))) continue else: print 'now evaluating model for cluster ' + str(cluster_index + 1) + '/' + \ str(len(train_cluster_to_data_indices)) + ': ' + cluster_type + ' with ' + \ str(len(train_cluster_words)) + ' examples' # test best model try: test_cluster_lemmas = [ test_lemmas[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_words = [ test_words[i] for i in test_cluster_to_data_indices[cluster_type] ] test_cluster_feat_dicts = [ test_feat_dicts[i] for i in test_cluster_to_data_indices[cluster_type] ] if ensemble: # load ensemble models ensemble_model_names = ensemble.split(',') print 'ensemble paths:\n' print '\n'.join(ensemble_model_names) ensemble_models = [] for ens in ensemble_model_names: model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, ens, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) ensemble_models.append( (model, encoder_frnn, encoder_rrnn, decoder_rnn)) # predict the entire test set with each model in the ensemble ensemble_predictions = [] for em in ensemble_models: model, encoder_frnn, encoder_rrnn, decoder_rnn = em predicted_templates = predict_templates( model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) ensemble_predictions.append(predicted_templates) predicted_templates = {} string_to_template = {} # perform voting for each test input - joint_index is a lemma+feats representation test_data = zip(test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words) for i, (lemma, feat_dict, word) in enumerate(test_data): joint_index = lemma + ':' + common.get_morph_string( feat_dict, feature_types) prediction_counter = defaultdict(int) for ens in ensemble_predictions: prediction_str = ''.join( instantiate_template(ens[joint_index], lemma)) prediction_counter[prediction_str] += 1 string_to_template[prediction_str] = ens[joint_index] print u'template: {} prediction: {}'.format( ens[joint_index], prediction_str) # return the most predicted output predicted_template_string = max(prediction_counter, key=prediction_counter.get) # hack: if chosen without majority, pick shortest prediction if prediction_counter[predicted_template_string] == 1: predicted_template_string = min(prediction_counter, key=len) print u'chosen:{} with {} votes\n'.format( predicted_template_string, prediction_counter[predicted_template_string]) predicted_templates[joint_index] = string_to_template[ predicted_template_string] # progress indication sys.stdout.write( "\r%d%%" % (float(i) / len(test_cluster_lemmas) * 100)) sys.stdout.flush() ## else: # load best model - no ensemble best_model, encoder_frnn, encoder_rrnn, decoder_rnn = load_best_model( str(cluster_index), alphabet, results_file_path, input_dim, hidden_dim, layers, feature_alphabet, feat_input_dim, feature_types) print 'starting to predict for cluster: {}'.format( cluster_type) try: predicted_templates = predict_templates( best_model, decoder_rnn, encoder_frnn, encoder_rrnn, alphabet_index, inverse_alphabet_index, test_cluster_lemmas, test_cluster_feat_dicts, feat_index, feature_types) except Exception as e: print e traceback.print_exc() print 'evaluating predictions for cluster: {}'.format(cluster_type) try: accuracy = evaluate_model(predicted_templates, test_cluster_lemmas, test_cluster_feat_dicts, test_cluster_words, feature_types, print_results=True) accuracies.append(accuracy) except Exception as e: print e traceback.print_exc() # get predicted_templates in the same order they appeared in the original file # iterate through them and foreach concat morph, lemma, features in order to print later in the task format for i in test_cluster_to_data_indices[cluster_type]: joint_index = test_lemmas[i] + ':' + common.get_morph_string( test_feat_dicts[i], feature_types) inflection = instantiate_template( predicted_templates[joint_index], test_lemmas[i]) final_results[i] = (test_lemmas[i], test_feat_dicts[i], inflection) except KeyError: print 'could not find relevant examples in test data for cluster: ' + cluster_type print 'clusters in test are: {}'.format( test_cluster_to_data_indices.keys()) print 'clusters in train are: {}'.format( train_cluster_to_data_indices.keys()) accuracy_vals = [accuracies[i][1] for i in xrange(len(accuracies))] macro_avg_accuracy = sum(accuracy_vals) / len(accuracies) print 'macro avg accuracy: ' + str(macro_avg_accuracy) mic_nom = sum( [accuracies[i][0] * accuracies[i][1] for i in xrange(len(accuracies))]) mic_denom = sum([accuracies[i][0] for i in xrange(len(accuracies))]) micro_average_accuracy = mic_nom / mic_denom print 'micro avg accuracy: ' + str(micro_average_accuracy) if 'test' in test_path: suffix = '.best.test' else: suffix = '.best' common.write_results_file_and_evaluate_externally( hyper_params, micro_average_accuracy, train_path, test_path, results_file_path + suffix, sigmorphon_root_dir, final_results)