for num_votes in counts.keys(): print('%d examples have %d votes each.' % (counts[num_votes], num_votes)) print('%s votes in total.' % (total_votes)) if __name__ == '__main__': args = parser.parse_args() # Read the examples into a dictionary with open(args.target_list, 'r') as fin: target_files = fin.readlines() target_files = [x.strip() for x in target_files] print('Model 1 is: ' + target_files[0]) print('Model 2 is: ' + target_files[1]) examples = utils.process_source_and_responses(args.source_file, target_files) examples_dict = {} for example in examples: examples_dict[example.key] = example with open(args.responses_path, 'rb') as f_in: worker_results_list = pickle.load(f_in) utils.process_amt_hit_responses(worker_results_list, examples_dict) print_num_annotators(examples_dict) print() print_vote_counts(examples_dict) print() print_annotator_agreement(examples_dict)
human_responses = dict() human_responses['NCM'] = {} human_responses['DBDC'] = {} for line in open(args.runs_file).readlines(): evalset, model, response_file = line.strip('\n').split(',') # print(evalset,model,response_file) target_files = [response_file] print('Evaluation set is ' + evalset + ' model is: ' + model + ' response file: ' + response_file) response_files[evalset][model] = target_files[0] if evalset == 'NCM': examples = utils.process_source_and_responses( '/data2/chatbot_eval_issues/results/AMT_NCM_Test_NCM_Cakechat/neural_conv_model_eval_source.txt', target_files) human_responses['NCM']['Human1'] = [ _.strip('\n') for _ in open( '/home/jsedoc/Chatbot_evaluation/eval_data/ncm/neural_conv_model_eval_responses_human_1.txt' ).readlines() ] human_responses['NCM']['Human2'] = [ _.strip('\n') for _ in open( '/home/jsedoc/Chatbot_evaluation/eval_data/ncm/neural_conv_model_eval_responses_human_2.txt' ).readlines() ] elif evalset == 'DBDC': examples = utils.process_source_and_responses( '/data2/chatbot_eval_issues/results/AMT_DBDC_Test_OSQ_Harvard/dbdc_eval_minus_CIC_200rand.txt', target_files)