def get_results_from_file(filename): with open(filename, encoding='utf-8', newline='') as queries_csv: reader = csv.DictReader(queries_csv) fieldnames = reader.fieldnames model_names = [] for field in fieldnames: split_field = field.split('_') if len(split_field) > 1 and split_field[1] == 'value': model_names.append(split_field[0]) results = [] for row in reader: query_name = row['query'] value_label = row['value_label'] class_label = get_class(value_label) result_entry = { 'query': query_name, 'value_label': value_label, 'class_label': class_label } for model in model_names: model_value_entry = model + '_value' model_value_prediction = int(row[model_value_entry]) model_class_prediction = get_class(model_value_prediction) result_entry[model] = Prediction(model_value_prediction, model_class_prediction) results.append(result_entry) return results, model_names
def update_metrics(self,value_label, model_value_prediction): class_label = get_class(value_label, self.mode) model_class_prediction = get_class(value_label, self.mode) if model_value_prediction < 0: return self.conf['num_rel'] += 1 self.update_confusion(actual_class=class_label, prediction_class=model_class_prediction) self.update_prediction_err(actual_val=value_label, prediction_val=model_value_prediction, actual_class=class_label, prediction_class=model_class_prediction)
def cmp_md(md_file, query_report_full_file_name, output_file, reports_dir, feature_file): md_lables = get_md_labels(md_file) md_rows = [] with open(query_report_full_file_name, 'r', encoding='utf-8', newline='') as report_csv: reader = csv.DictReader(report_csv) fieldnames = reader.fieldnames for row in reader: query = row['query'] if query in md_lables: new_row = {'query': row['query']} for f in fieldnames: if f == 'query': continue new_row['annotators_' + f] = row[f] new_row['value_label'] = md_lables[query] new_row['class_label'] = get_class( md_lables[query], ValToClassMode.THREE_CLASSES_PESSIMISTIC) md_rows.append(new_row) with open(reports_dir + output_file + '.csv', 'w', encoding='utf-8', newline='') as out: fieldnames = reader.fieldnames fieldnames.extend( ['annotators_value_label', 'annotators_class_label']) writer = csv.DictWriter(out, fieldnames=fieldnames) writer.writeheader() for row in md_rows: writer.writerow(row)
def test_query_set(self, queries): errors = [] accurate = 0 for query, df in queries.items(): x, y = dataHelper.split_x_y(df) X = Variable(torch.Tensor(x).float()) out = self.net(X) _, predicted = torch.max(out.data, 1) print(query) y_predicted = predicted.numpy() rms = np.sqrt(mean_squared_error(y, y_predicted)) mean_prediction = np.mean(y_predicted) actual_value = np.mean(y) prediction = dataHelper.get_class(mean_prediction) if prediction - actual_value == 0: accurate += 1 errors.append(np.math.fabs(actual_value - prediction)) print(' predicted value:' + str(mean_prediction)) print(' actual value: ' + str(actual_value)) print('root mean squared error:' + str(rms)) mae = np.mean(errors) acc = accurate / len(queries) print('Total absolute squared error:' + str(mae)) print(' Accuracy:' + str(acc)) return Stats(mae=mae, acc=acc)
def gen_google_labels_error_report(label_file, google_file, output_file): actual_values = gen_actual_labels_dict(label_file) google_stats = [] with open(google_file, 'r', newline='') as res_csv: res_reader = csv.DictReader(res_csv) for row in res_reader: query = row['query'] value_label = int(actual_values[query]) actual_class = get_class(value_label, ValToClassMode.THREE_CLASSES_PESSIMISTIC) predicted_value = int(row['Google_value']) if predicted_value < 0: continue predicted_class = get_class( predicted_value, ValToClassMode.THREE_CLASSES_PESSIMISTIC) mae = math.fabs(value_label - predicted_value) acc = int(actual_class == predicted_class) google_stats.append({ 'query': query, 'value_label': value_label, 'class_label': actual_class, 'predicted_value': predicted_value, 'predicted_class': predicted_class, 'google_mae': mae, 'google_acc': acc }) with open(output_file, 'w', encoding='utf-8', newline='') as out: fieldnames = [ 'query', 'value_label', 'class_label', 'predicted_value', 'predicted_class', 'google_mae', 'google_acc' ] writer = csv.DictWriter(out, fieldnames=fieldnames) writer.writeheader() for stat_entry in google_stats: writer.writerow(stat_entry)
def compute_class_label_distribution(labels, mode): dist = { 'actual_rejects': 0, 'actual_neutral': 0, 'actual_support': 0, 'actual_initial': 0 } for q, value_label in labels.items(): if 'dummy' in q or not value_label: continue class_label = get_class(int(value_label), mode) if class_label == REJECT: dist['actual_rejects'] += 1 elif class_label == NEUTRAL: dist['actual_neutral'] += 1 elif class_label == SUPPORT: dist['actual_support'] += 1 return dist
def test_query_set2(model, queries, method): errors = [] accurate = 0 for query, df in queries.items(): __, X, Y = dataHelper.split_x_y(df, method) predicted_y = model.predict(X) mean_prediction = np.mean(predicted_y) actual_value = np.mean(Y) prediction = dataHelper.get_class(mean_prediction) if prediction - actual_value == 0: accurate += 1 errors.append(np.math.fabs(actual_value - prediction)) print(query) print(' predicted value:' + str(np.mean(predicted_y))) print(' actual value: ' + str(np.mean(Y))) mae = np.mean(errors) acc = accurate / len(queries) print('Total mean absolute mean error:' + str(mae)) print(' Accuracy:' + str(acc)) return Stats(mae, acc)
def create_report_files_old(report_fname, confusion_fname, queries, learners, predictions, labels): with open(report_fname, 'w', encoding='utf-8', newline='') as out: model_names = [learner.model_name() for learner in learners] model_names.append('majority') #predictions['majority'] = majority_classifier.get_predictions() fieldnames = ['query', 'value_label', 'class_label'] fieldnames.extend( [model_name + '_class' for model_name in model_names]) fieldnames.extend( [model_name + '_value' for model_name in model_names]) fieldnames.extend([model_name + '_acc' for model_name in model_names]) fieldnames.extend( [model_name + '_error' for model_name in model_names]) fieldnames.extend( [model_name + '_val_pessim' for model_name in model_names]) fieldnames.extend( [model_name + '_val_optim' for model_name in model_names]) fieldnames.extend( [model_name + '_class_pessim' for model_name in model_names]) fieldnames.extend( [model_name + '_class_optim' for model_name in model_names]) writer = csv.DictWriter(out, fieldnames=fieldnames) writer.writeheader() metrics = {model_name: Metrics() for model_name in model_names} for q in queries: value_label = labels[q] class_label = get_class(value_label) row = { 'query': q, 'value_label': value_label, 'class_label': class_label } for model_name in model_names: predicted_class = predictions[model_name][q].class_prediction predicted_val = predictions[model_name][q].mean_prediction metrics[model_name].update_confusion( actual_class=class_label, prediction_class=predicted_class) metrics[model_name].update_prediction_err( actual_val=value_label, prediction_val=predicted_val, actual_class=class_label, prediction_class=predicted_class) row[model_name + "_class"] = predicted_class row[model_name + "_value"] = predicted_val row[model_name + "_acc"] = int(class_label == predicted_class) row[model_name + "_error"] = math.fabs(value_label - predicted_val) row[model_name + "_val_pessim"] = 1 if predicted_val < value_label else 0 row[model_name + "_val_optim"] = 1 if predicted_val > value_label else 0 row[model_name + "_class_pessim"] = 1 if predicted_class < class_label else 0 row[model_name + "_class_optim"] = 1 if predicted_class > class_label else 0 writer.writerow(row) for model_name in model_names: metrics[model_name].process_results() with open(confusion_fname, 'w', encoding='utf-8', newline='') as conf_out: fieldnames = ['metric_name'] fieldnames.extend([model_name for model_name in model_names]) writer = csv.DictWriter(conf_out, fieldnames=fieldnames) writer.writeheader() for k in metrics['majority'].conf.keys(): row = {'metric_name': k} for model_name in model_names: row[model_name] = metrics[model_name].conf[k] writer.writerow(row)
def create_query_report_file(report_fname, input_dir, feature_file, queries, learners, predictions, labels, val2class, md=False): with open(report_fname, 'w', encoding='utf-8', newline='') as out: model_names = [learner.model_name() for learner in learners] error_queries = {x: {} for x in model_names} fieldnames = ['query', 'value_label', 'class_label'] fieldnames.extend( [model_name + '_class' for model_name in model_names]) fieldnames.extend( [model_name + '_value' for model_name in model_names]) fieldnames.extend([model_name + '_acc' for model_name in model_names]) fieldnames.extend( [model_name + '_error' for model_name in model_names]) fieldnames.extend( [model_name + '_val_pessim' for model_name in model_names]) fieldnames.extend( [model_name + '_val_optim' for model_name in model_names]) fieldnames.extend( [model_name + '_class_pessim' for model_name in model_names]) fieldnames.extend( [model_name + '_class_optim' for model_name in model_names]) writer = csv.DictWriter(out, fieldnames=fieldnames) writer.writeheader() for q in queries: if 'dummy' in q: continue if md and not q in labels: continue value_label = labels[q] class_label = get_class(value_label, val2class) row = { 'query': q, 'value_label': value_label, 'class_label': class_label } for model_name in model_names: predicted_class = predictions[model_name][q].class_prediction predicted_val = predictions[model_name][q].mean_prediction if predicted_class != class_label: error_queries[model_name][q] = predicted_class row[model_name + "_class"] = predicted_class row[model_name + "_value"] = predicted_val row[model_name + "_acc"] = int(class_label == predicted_class) row[model_name + "_error"] = math.fabs(value_label - predicted_val) row[model_name + "_val_pessim"] = 1 if predicted_val < value_label else 0 row[model_name + "_val_optim"] = 1 if predicted_val > value_label else 0 row[model_name + "_class_pessim"] = 1 if predicted_class < class_label else 0 row[model_name + "_class_optim"] = 1 if predicted_class > class_label else 0 writer.writerow(row) create_false_predictions_feature_file(input_dir, feature_file, error_queries)