def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths): if dataset_type == 'deploy': print('Predict labels for the {0} set'.format(dataset_type)) else: print('Evaluate model on the {0} set'.format(dataset_type)) all_predictions = [] all_y_true = [] output_filepath = os.path.join(stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type,epoch_number)) output_file = codecs.open(output_filepath, 'w', 'UTF-8') original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8') for i in range(len(dataset.token_indices[dataset_type])): feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i], model.dropout_keep_prob: 1. } unary_scores, predictions = sess.run([model.unary_scores, model.predictions], feed_dict) if parameters['use_crf']: predictions, _ = tf.contrib.crf.viterbi_decode(unary_scores, transition_params_trained) predictions = predictions[1:-1] else: predictions = predictions.tolist() assert(len(predictions) == len(dataset.tokens[dataset_type][i])) output_string = '' prediction_labels = [dataset.index_to_label[prediction] for prediction in predictions] gold_labels = dataset.labels[dataset_type][i] if parameters['tagging_format'] == 'bioes': prediction_labels = utils_nlp.bioes_to_bio(prediction_labels) gold_labels = utils_nlp.bioes_to_bio(gold_labels) for prediction, token, gold_label in zip(prediction_labels, dataset.tokens[dataset_type][i], gold_labels): while True: line = original_conll_file.readline() split_line = line.strip().split(' ') if '-DOCSTART-' in split_line[0] or len(split_line) == 0 or len(split_line[0]) == 0: continue else: token_original = split_line[0] if parameters['tagging_format'] == 'bioes': split_line.pop() gold_label_original = split_line[-1] assert(token == token_original and gold_label == gold_label_original) break split_line.append(prediction) output_string += ' '.join(split_line) + '\n' output_file.write(output_string+'\n') all_predictions.extend(predictions) all_y_true.extend(dataset.label_indices[dataset_type][i]) output_file.close() original_conll_file.close() if dataset_type != 'deploy': if parameters['main_evaluation_mode'] == 'conll': conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepath) shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepath, conll_output_filepath) os.system(shell_command) with open(conll_output_filepath, 'r') as f: classification_report = f.read() print(classification_report) else: new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(all_predictions, all_y_true, dataset, parameters['main_evaluation_mode']) print(sklearn.metrics.classification_report(new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names)) return all_predictions, all_y_true, output_filepath
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths, for_adapter=False): if dataset_type == 'deploy': print('Predict labels for the {0} set'.format(dataset_type)) else: print('Evaluate model on the {0} set'.format(dataset_type)) if for_adapter == True: all_predictions_per_sentence = [] all_y_true_per_sentence = [] all_prediction_labels_per_sentence = [] all_predictions = [] all_y_true = [] output_filepath = os.path.join( stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type, epoch_number)) output_file = codecs.open(output_filepath, 'w', 'latin-1', errors='replace') original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'latin-1', errors='replace') for i in range(len(dataset.token_indices[dataset_type])): if parameters['use_adapter']: feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i], model.input_label_adapter_indices_vector: dataset.label_adapter_vector_indices[dataset_type][i], model.dropout_keep_prob: 1., model.adapter_keep_prob: 1. } if parameters['include_pos']: feed_dict[ model. input_label_pos_indices_vector] = dataset.label_pos_vector_indices[ dataset_type][i] elif for_adapter == True: # use for pred/eval step, not to provide the gold labels in dataset feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.dropout_keep_prob: 1. } else: feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i], model.dropout_keep_prob: 1. } unary_scores, predictions = sess.run( [model.unary_scores, model.predictions], feed_dict) if parameters['use_crf']: predictions, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_trained) predictions = predictions[1:-1] else: predictions = predictions.tolist() assert (len(predictions) == len(dataset.tokens[dataset_type][i])) output_string = '' prediction_labels = [ dataset.index_to_label[prediction] for prediction in predictions ] gold_labels = dataset.labels[dataset_type][i] if parameters['tagging_format'] == 'bioes': prediction_labels = utils_nlp.bioes_to_bio(prediction_labels) gold_labels = utils_nlp.bioes_to_bio(gold_labels) try: assert len(prediction_labels) == len(gold_labels) except AssertionError: print(dataset.tokens[dataset_type][i]) print(gold_labels) print(prediction_labels) for z, (prediction, token, gold_label) in enumerate( zip(prediction_labels, dataset.tokens[dataset_type][i], gold_labels)): while True: line = original_conll_file.readline() split_line = line.strip().split(' ') if '-DOCSTART-' in split_line[0] or len( split_line) == 0 or len(split_line[0]) == 0: continue else: token_original = split_line[0] if parameters['tagging_format'] == 'bioes': split_line.pop() gold_label_original = split_line[-1] try: assert (token == dataset.tokenize(token_original) and gold_label == gold_label_original) except AssertionError: print(' '.join([ dataset.tokens[dataset_type][i][x] + '/' + gold_labels[x] for x in range(len(gold_labels)) ])) print( 'token: {:s} - gold_label: {:s} - gold_label_original: {:s}' .format(dataset.tokens[dataset_type][i][z], gold_label, gold_label_original)) break split_line.append(prediction) output_string += ' '.join(split_line) + '\n' newstr = output_string + '\n' output_file.write(newstr) if for_adapter == True: all_predictions_per_sentence.append(predictions) all_y_true_per_sentence.append( dataset.label_indices[dataset_type][i]) all_prediction_labels_per_sentence.append(prediction_labels) all_predictions.extend(predictions) all_y_true.extend(dataset.label_indices[dataset_type][i]) output_file.close() original_conll_file.close() if dataset_type != 'deploy': if parameters['main_evaluation_mode'] == 'conll': conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format( output_filepath) shell_command = '/usr/bin/perl {0} < {1} > {2}'.format( conll_evaluation_script, output_filepath, conll_output_filepath) os.system(shell_command) with open(conll_output_filepath, 'r') as f: classification_report = f.read() print(classification_report) else: new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels( all_predictions, all_y_true, dataset, parameters['main_evaluation_mode']) print( sklearn.metrics.classification_report( new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names)) if for_adapter == True: return all_prediction_labels_per_sentence, all_predictions, all_y_true, output_filepath else: return all_predictions, all_y_true, output_filepath
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths, demo=False): if dataset_type == 'deploy': print('Predict labels for the {0} set'.format(dataset_type)) else: print('Evaluate model on the {0} set'.format(dataset_type)) all_predictions = [] all_predictions_label = [] all_y_true = [] all_y_true_label = [] output_filepath = os.path.join(stats_graph_folder, '{1:07.3f}_{0}.txt'.format(dataset_type,epoch_number)) encoding = "utf-8" output_file = codecs.open(output_filepath, 'w', encoding) if "combined" in dataset_filepaths[dataset_type]: label_idx = 2 else: label_idx = -1 #original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', encoding=encoding) sequence_numbers = list(range(len(dataset.token_indices[dataset_type]))) for i in tqdm(range(0,len(dataset.token_indices[dataset_type]), parameters['batch_size']), "Testing on {} at epoch {}".format(dataset_type, epoch_number)): # sequence_number = sequence_numbers[i: i + parameters['batch_size']] batch = utils.pad_batch(dataset, sequence_number, dataset_type) feed_dict = { model.input_token_indices: batch['token_indices_padded'], model.input_sequence_lengths: batch['sequence_lengths'], model.input_token_character_indices: batch['character_indices_padded'], model.input_token_lengths: batch['token_lengths'], model.input_label_indices_flat: batch['label_indices'], model.input_label_indices_vector: batch['label_vector_indices'], model.dropout_keep_prob: 1 } batch_unary_scores, batch_predictions = sess.run([model.unary_scores, model.predictions], feed_dict) for unary_score, y, sequence_length, predictions, j in zip(batch_unary_scores, batch['label_indices'], batch['sequence_lengths'], batch_predictions, sequence_number): if parameters['use_crf']: # Remove padding from the scores and tag sequence. unary_score =unary_score[:sequence_length] y = y[:sequence_length] # Compute the highest scoring sequence. predictions, _ = tf.contrib.crf.viterbi_decode(unary_score, transition_params_trained) else: predictions = predictions[:sequence_length].tolist() assert(len(predictions) == len(np.array(dataset.tokens[dataset_type])[j])) output_string = [] prediction_labels = [dataset.index_to_label[pred] for pred in predictions] gold_labels = np.array(dataset.labels[dataset_type])[j] if parameters['tagging_format'] == 'bioes': prediction_labels = utils_nlp.bioes_to_bio(prediction_labels) gold_labels = utils_nlp.bioes_to_bio(gold_labels) for prediction, token, gold_label in zip(prediction_labels, np.array(dataset.tokens[dataset_type])[j], gold_labels): #while True: # line = original_conll_file.readline() # split_line = line.strip().split(' ') # if "combined" in dataset_filepaths[dataset_type]: # split_line = split_line[:label_idx+1] # if len(split_line) == 0 or '-DOCSTART-' in split_line[0] or len(split_line[0]) == 0: # continue # else: # token_original = split_line[0] # if parameters['tagging_format'] == 'bioes': # split_line.pop() # gold_label_original = split_line[label_idx] # #if token != token_original or gold_label != gold_label_original: # assert(token == token_original and gold_label == gold_label_original) # break #split_line.append(prediction) #output_string.append(' '.join(split_line)) output_string.append(' '.join([token, gold_label, prediction])) output_file.write("\n".join(output_string)) output_file.write("\n\n") all_predictions.extend(predictions) all_y_true.extend(np.array(dataset.label_indices[dataset_type])[j]) all_predictions_label.append(prediction_labels) all_y_true_label.append(np.array(dataset.labels[dataset_type])[j]) output_file.close() #original_conll_file.close() if demo: return all_predictions, all_y_true, output_filepath if dataset_type != 'deploy': if parameters['main_evaluation_mode'] == 'conll': conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepath) #if "labelled_yelp_tips_th06" in parameters["dataset_train"]: # shell_command = 'perl {0} -r < {1} > {2}'.format(conll_evaluation_script, output_filepath, conll_output_filepath) #else: shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepath, conll_output_filepath) os.system(shell_command) with open(conll_output_filepath, 'r') as f: classification_report = f.read() print(classification_report) else: raise AssertionError("Not implemented") new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels(all_predictions, all_y_true, dataset, parameters['main_evaluation_mode']) print(sklearn.metrics.classification_report(new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names)) exact_score, inexact_score = report_fscore(all_y_true_label, all_predictions_label) exact_inexact_evaluation = '{0}_exact_inexact_evaluation.txt'.format(output_filepath) with open(exact_inexact_evaluation, "w") as file: file.write("Exact score\n") file.write(json.dumps(exact_score) + "\n") file.write(json.dumps(inexact_score) + "\n") return all_predictions, all_y_true, output_filepath
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths): """ Predict. """ if dataset_type == 'deploy': print('=> Predict labels for the {0} set'.format(dataset_type)) else: print('Evaluate model on the {0} set'.format(dataset_type)) """ Comment out writing to file only for efficiency experiment """ all_predictions = [] all_y_true = [] output_filepath = os.path.join( stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type, epoch_number)) mentions_output_filepath = os.path.join(stats_graph_folder, 'mentions_output.txt') print('output_file: ', output_filepath) print('mentions_output_file: ', mentions_output_filepath) output_file = codecs.open(output_filepath, 'w', 'UTF-8') original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8') # mentions_file= codecs.open(mentions_output_filepath, 'w', 'UTF-8') output_tokens_list = [] token_list_outer = [] token_list_inner = [] for i in range(len(dataset.token_indices[dataset_type])): feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i], model.dropout_keep_prob: 1. } unary_scores, predictions = sess.run( [model.unary_scores, model.predictions], feed_dict) if parameters['use_crf']: predictions, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_trained) predictions = predictions[1:-1] else: predictions = predictions.tolist() assert (len(predictions) == len(dataset.tokens[dataset_type][i])) output_string = '' prediction_labels = [ dataset.index_to_label[prediction] for prediction in predictions ] unary_score_list = unary_scores.tolist()[1:-1] gold_labels = dataset.labels[dataset_type][i] if parameters['tagging_format'] == 'bioes': prediction_labels = utils_nlp.bioes_to_bio(prediction_labels) gold_labels = utils_nlp.bioes_to_bio(gold_labels) for prediction, token, gold_label, scores in zip( prediction_labels, dataset.tokens[dataset_type][i], gold_labels, unary_score_list): while True: line = original_conll_file.readline() split_line = line.strip().split(' ') if '-DOCSTART-' in split_line[0] or len(split_line) == 0 \ or len(split_line[0]) == 0: continue else: token_original = split_line[0] if parameters['tagging_format'] == 'bioes': split_line.pop() gold_label_original = split_line[-1] assert (token == token_original and gold_label == gold_label_original) # print('prediction and label: ', prediction, token) if (token != '--eosc'): token_list_inner.append(token) if (prediction != 'O'): output_tokens_list.append(token + '//' + prediction) else: token_list_inner.append(token) token_list_outer.append(token_list_inner) token_list_inner = [] output_tokens_list.append(token + '//' + prediction) break split_line.append(prediction) # print('========================') if parameters['output_scores']: # space separated scores scores = ' '.join([str(i) for i in scores]) split_line.append('{}'.format(scores)) # print(split_line) output_string += ' '.join(split_line) + '\n' output_file.write(output_string + '\n') all_predictions.extend(predictions) all_y_true.extend(dataset.label_indices[dataset_type][i]) output_file.close() original_conll_file.close() mention_count = 0 mentions_list_inner = [] mentions_list_outer = [] candidateMention = "" for outputStr in output_tokens_list: #outputStr=output[index] candidate = " ".join((outputStr.split("//"))[:-1]) tag = (outputStr.split("//"))[-1] #text="".join(candidate) if (candidate != '--eosc'): if (tag.startswith('B-')): #print candidateMention if (candidateMention != ""): mentions_list_inner.append(candidateMention) candidateMention = candidate else: candidateMention += " " + candidate else: if (candidateMention != ""): mentions_list_inner.append(candidateMention) candidateMention = "" mentions_list_outer.append(mentions_list_inner) mentions_list_inner = [] # print('sentence list: ', token_list_outer) # print('mentions: ', mentions_list_outer) for inner_list in mentions_list_outer: mention_count += len(inner_list) mentions_string = ','.join(inner_list) + '\n' # mentions_file.write(mentions_string) # mentions_string='' # mentions_string += ' '.join(mentions_list_outer) + '\n' # mentions_file.close() print('tally:', len(token_list_outer), len(mentions_list_outer), 'total mentions discovered:', mention_count) if dataset_type != 'deploy': if parameters['main_evaluation_mode'] == 'conll': # run perl evaluation script in python package # conll_evaluation_script = os.path.join('.', 'conlleval') package_name = 'neuroner' root_dir = os.path.dirname( pkg_resources.resource_filename(package_name, '__init__.py')) conll_evaluation_script = os.path.join(root_dir, 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format( output_filepath) shell_command = 'perl {0} < {1} > {2}'.format( conll_evaluation_script, output_filepath, conll_output_filepath) os.system(shell_command) with open(conll_output_filepath, 'r') as f: classification_report = f.read() print(classification_report) else: new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels( all_predictions, all_y_true, dataset, parameters['main_evaluation_mode']) print( sklearn.metrics.classification_report( new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names)) return all_predictions, all_y_true, output_filepath
def prediction_step(sess, dataset, dataset_type, model, transition_params_trained, stats_graph_folder, epoch_number, parameters, dataset_filepaths): if dataset_type == 'deploy': print('Predict labels for the {0} set'.format(dataset_type)) else: print('Evaluate model on the {0} set'.format(dataset_type)) all_predictions = [] all_y_true = [] output_filepath = os.path.join( stats_graph_folder, '{1:03d}_{0}.txt'.format(dataset_type, epoch_number)) output_file = codecs.open(output_filepath, 'w', 'UTF-8') original_conll_file = codecs.open(dataset_filepaths[dataset_type], 'r', 'UTF-8') # A = [] # B=[] C = [] # ti=[] # chi=[] # tl=[] # lvi=[] # for i in range(len(dataset.token_indices[dataset_type])): feed_dict = { model.input_token_indices: dataset.token_indices[dataset_type][i], model.input_token_character_indices: dataset.character_indices_padded[dataset_type][i], model.input_token_lengths: dataset.token_lengths[dataset_type][i], model.input_label_indices_vector: dataset.label_vector_indices[dataset_type][i], model.dropout_keep_prob: 1. } unary_scores, predictions = sess.run( [model.unary_scores, model.predictions], feed_dict) # #np.save("SalmanTest/DICT%s%s"%(i,dataset_type),feed_dict) #save_path = saver.save(sess,"./SalmanTest/CHECKDIC.ckpt") #np.save("SalmanTest/SICT%s%s"%(i,dataset_type),predictions) #print("SALMAAAAANNNNNNNNNNNNNN = %s"%feed_dict) #print("SALMAAAAAN11111111NNNNNNNNNNNNN = %s"%dataset.token_indices[dataset_type][i]) #print("SALMAAAAAN22222222NNNNNNNNNNNNN = %s"%dataset.character_indices_padded[dataset_type][i]) #print("SALMAAAAAN33333333NNNNNNNNNNNNN = %s"%dataset.token_lengths[dataset_type][i]) #print("SALMAAAAAN44444444NNNNNNNNNNNNN = %s"%dataset.label_vector_indices[dataset_type][i]) # ti.append(dataset.token_indices[dataset_type][i]) # chi.append(dataset.character_indices_padded[dataset_type][i]) # tl.append(dataset.token_lengths[dataset_type][i]) # lvi.append(dataset.label_vector_indices[dataset_type][i]) A.append(unary_scores[1:, :-2][:-1]) # B.append(predictions) # if parameters['use_crf']: predictions, _ = tf.contrib.crf.viterbi_decode( unary_scores, transition_params_trained) predictions = predictions[1:-1] else: predictions = predictions.tolist() # C.append(predictions) # assert (len(predictions) == len(dataset.tokens[dataset_type][i])) output_string = '' prediction_labels = [ dataset.index_to_label[prediction] for prediction in predictions ] gold_labels = dataset.labels[dataset_type][i] if parameters['tagging_format'] == 'bioes': prediction_labels = utils_nlp.bioes_to_bio(prediction_labels) gold_labels = utils_nlp.bioes_to_bio(gold_labels) for prediction, token, gold_label in zip( prediction_labels, dataset.tokens[dataset_type][i], gold_labels): while True: line = original_conll_file.readline() split_line = line.strip().split(' ') if '-DOCSTART-' in split_line[0] or len( split_line) == 0 or len(split_line[0]) == 0: continue else: token_original = split_line[0] if parameters['tagging_format'] == 'bioes': split_line.pop() gold_label_original = split_line[-1] assert (token == token_original and gold_label == gold_label_original) break split_line.append(prediction) output_string += ' '.join(split_line) + '\n' output_file.write(output_string + '\n') all_predictions.extend(predictions) all_y_true.extend(dataset.label_indices[dataset_type][i]) # # np.save('SalmanTest/SalmanPred%s%s'%(i,dataset_type),predictions) #A.append(np.zeros(99)) #C.append(np.zeros(99)) np.save('SalmanTest/NAMEMr1mainSalmanUnary_scores%s' % dataset_type, A) # np.save('SalmanTest/SalmanUnary_scores%s%s'%(i,dataset_type),unary_scores) # np.save('SalmanTest/myx3mainSalmanPred%s'%dataset_type,B) np.save('SalmanTest/NAMEMr1mainSalmanCCC%s' % dataset_type, C) # np.save('SalmanTest/myx3ti',ti) # np.save('SalmanTest/myx3chi',chi) # np.save('SalmanTest/myx3tl',tl) # np.save('SalmanTest/myx3lvi',lvi) # output_file.close() original_conll_file.close() if dataset_type != 'deploy': if parameters['main_evaluation_mode'] == 'conll': conll_evaluation_script = os.path.join('.', 'conlleval') conll_output_filepath = '{0}_conll_evaluation.txt'.format( output_filepath) shell_command = 'perl {0} < {1} > {2}'.format( conll_evaluation_script, output_filepath, conll_output_filepath) os.system(shell_command) with open(conll_output_filepath, 'r') as f: classification_report = f.read() print(classification_report) else: new_y_pred, new_y_true, new_label_indices, new_label_names, _, _ = remap_labels( all_predictions, all_y_true, dataset, parameters['main_evaluation_mode']) print( sklearn.metrics.classification_report( new_y_true, new_y_pred, digits=4, labels=new_label_indices, target_names=new_label_names)) return all_predictions, all_y_true, output_filepath