Exemplo n.º 1
0
def get_results_from_file(filename):
    with open(filename, encoding='utf-8', newline='') as queries_csv:
        reader = csv.DictReader(queries_csv)
        fieldnames = reader.fieldnames
        model_names = []
        for field in fieldnames:
            split_field = field.split('_')
            if len(split_field) > 1 and split_field[1] == 'value':
                model_names.append(split_field[0])
        results = []
        for row in reader:
            query_name = row['query']
            value_label = row['value_label']
            class_label = get_class(value_label)
            result_entry = {
                'query': query_name,
                'value_label': value_label,
                'class_label': class_label
            }
            for model in model_names:
                model_value_entry = model + '_value'
                model_value_prediction = int(row[model_value_entry])
                model_class_prediction = get_class(model_value_prediction)
                result_entry[model] = Prediction(model_value_prediction,
                                                 model_class_prediction)
                results.append(result_entry)
        return results, model_names
Exemplo n.º 2
0
 def update_metrics(self,value_label, model_value_prediction):
     class_label = get_class(value_label, self.mode)
     model_class_prediction = get_class(value_label, self.mode)
     if model_value_prediction < 0:
         return
     self.conf['num_rel'] += 1
     self.update_confusion(actual_class=class_label,
                                     prediction_class=model_class_prediction)
     self.update_prediction_err(actual_val=value_label,
                                          prediction_val=model_value_prediction,
                                          actual_class=class_label,
                                          prediction_class=model_class_prediction)
Exemplo n.º 3
0
def cmp_md(md_file, query_report_full_file_name, output_file, reports_dir,
           feature_file):
    md_lables = get_md_labels(md_file)
    md_rows = []
    with open(query_report_full_file_name, 'r', encoding='utf-8',
              newline='') as report_csv:
        reader = csv.DictReader(report_csv)
        fieldnames = reader.fieldnames

        for row in reader:
            query = row['query']
            if query in md_lables:
                new_row = {'query': row['query']}
                for f in fieldnames:
                    if f == 'query':
                        continue
                    new_row['annotators_' + f] = row[f]
                new_row['value_label'] = md_lables[query]
                new_row['class_label'] = get_class(
                    md_lables[query], ValToClassMode.THREE_CLASSES_PESSIMISTIC)
                md_rows.append(new_row)

        with open(reports_dir + output_file + '.csv',
                  'w',
                  encoding='utf-8',
                  newline='') as out:
            fieldnames = reader.fieldnames
            fieldnames.extend(
                ['annotators_value_label', 'annotators_class_label'])
            writer = csv.DictWriter(out, fieldnames=fieldnames)
            writer.writeheader()
            for row in md_rows:
                writer.writerow(row)
Exemplo n.º 4
0
    def test_query_set(self, queries):
        errors = []
        accurate = 0
        for query, df in queries.items():
            x, y = dataHelper.split_x_y(df)
            X = Variable(torch.Tensor(x).float())
            out = self.net(X)
            _, predicted = torch.max(out.data, 1)
            print(query)
            y_predicted = predicted.numpy()
            rms = np.sqrt(mean_squared_error(y, y_predicted))
            mean_prediction = np.mean(y_predicted)
            actual_value = np.mean(y)
            prediction = dataHelper.get_class(mean_prediction)
            if prediction - actual_value == 0:
                accurate += 1
            errors.append(np.math.fabs(actual_value - prediction))

            print(' predicted value:' + str(mean_prediction))
            print(' actual value: ' + str(actual_value))
            print('root mean squared error:' + str(rms))
        mae = np.mean(errors)
        acc = accurate / len(queries)

        print('Total absolute squared error:' + str(mae))
        print(' Accuracy:' + str(acc))
        return  Stats(mae=mae, acc=acc)
Exemplo n.º 5
0
def gen_google_labels_error_report(label_file, google_file, output_file):
    actual_values = gen_actual_labels_dict(label_file)
    google_stats = []
    with open(google_file, 'r', newline='') as res_csv:
        res_reader = csv.DictReader(res_csv)
        for row in res_reader:
            query = row['query']
            value_label = int(actual_values[query])
            actual_class = get_class(value_label,
                                     ValToClassMode.THREE_CLASSES_PESSIMISTIC)
            predicted_value = int(row['Google_value'])
            if predicted_value < 0:
                continue
            predicted_class = get_class(
                predicted_value, ValToClassMode.THREE_CLASSES_PESSIMISTIC)
            mae = math.fabs(value_label - predicted_value)
            acc = int(actual_class == predicted_class)
            google_stats.append({
                'query': query,
                'value_label': value_label,
                'class_label': actual_class,
                'predicted_value': predicted_value,
                'predicted_class': predicted_class,
                'google_mae': mae,
                'google_acc': acc
            })

    with open(output_file, 'w', encoding='utf-8', newline='') as out:
        fieldnames = [
            'query', 'value_label', 'class_label', 'predicted_value',
            'predicted_class', 'google_mae', 'google_acc'
        ]
        writer = csv.DictWriter(out, fieldnames=fieldnames)
        writer.writeheader()
        for stat_entry in google_stats:
            writer.writerow(stat_entry)
Exemplo n.º 6
0
def compute_class_label_distribution(labels, mode):
    dist = {
        'actual_rejects': 0,
        'actual_neutral': 0,
        'actual_support': 0,
        'actual_initial': 0
    }
    for q, value_label in labels.items():
        if 'dummy' in q or not value_label:
            continue
        class_label = get_class(int(value_label), mode)
        if class_label == REJECT:
            dist['actual_rejects'] += 1
        elif class_label == NEUTRAL:
            dist['actual_neutral'] += 1
        elif class_label == SUPPORT:
            dist['actual_support'] += 1
    return dist
Exemplo n.º 7
0
def test_query_set2(model, queries, method):
    errors = []
    accurate = 0
    for query, df in queries.items():
        __, X, Y = dataHelper.split_x_y(df, method)
        predicted_y = model.predict(X)
        mean_prediction = np.mean(predicted_y)
        actual_value = np.mean(Y)
        prediction = dataHelper.get_class(mean_prediction)
        if prediction - actual_value == 0:
            accurate += 1
        errors.append(np.math.fabs(actual_value - prediction))
        print(query)
        print(' predicted value:' + str(np.mean(predicted_y)))
        print(' actual value: ' + str(np.mean(Y)))
    mae = np.mean(errors)
    acc = accurate / len(queries)
    print('Total mean absolute mean error:' + str(mae))
    print(' Accuracy:' + str(acc))
    return Stats(mae, acc)
Exemplo n.º 8
0
def create_report_files_old(report_fname, confusion_fname, queries, learners,
                            predictions, labels):
    with open(report_fname, 'w', encoding='utf-8', newline='') as out:
        model_names = [learner.model_name() for learner in learners]
        model_names.append('majority')
        #predictions['majority'] = majority_classifier.get_predictions()
        fieldnames = ['query', 'value_label', 'class_label']
        fieldnames.extend(
            [model_name + '_class' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_value' for model_name in model_names])
        fieldnames.extend([model_name + '_acc' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_error' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_val_pessim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_val_optim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_class_pessim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_class_optim' for model_name in model_names])
        writer = csv.DictWriter(out, fieldnames=fieldnames)
        writer.writeheader()
        metrics = {model_name: Metrics() for model_name in model_names}
        for q in queries:
            value_label = labels[q]
            class_label = get_class(value_label)
            row = {
                'query': q,
                'value_label': value_label,
                'class_label': class_label
            }
            for model_name in model_names:
                predicted_class = predictions[model_name][q].class_prediction
                predicted_val = predictions[model_name][q].mean_prediction
                metrics[model_name].update_confusion(
                    actual_class=class_label, prediction_class=predicted_class)
                metrics[model_name].update_prediction_err(
                    actual_val=value_label,
                    prediction_val=predicted_val,
                    actual_class=class_label,
                    prediction_class=predicted_class)
                row[model_name + "_class"] = predicted_class
                row[model_name + "_value"] = predicted_val
                row[model_name + "_acc"] = int(class_label == predicted_class)
                row[model_name + "_error"] = math.fabs(value_label -
                                                       predicted_val)
                row[model_name +
                    "_val_pessim"] = 1 if predicted_val < value_label else 0
                row[model_name +
                    "_val_optim"] = 1 if predicted_val > value_label else 0
                row[model_name +
                    "_class_pessim"] = 1 if predicted_class < class_label else 0
                row[model_name +
                    "_class_optim"] = 1 if predicted_class > class_label else 0
            writer.writerow(row)

        for model_name in model_names:
            metrics[model_name].process_results()

        with open(confusion_fname, 'w', encoding='utf-8',
                  newline='') as conf_out:
            fieldnames = ['metric_name']
            fieldnames.extend([model_name for model_name in model_names])
            writer = csv.DictWriter(conf_out, fieldnames=fieldnames)
            writer.writeheader()
            for k in metrics['majority'].conf.keys():
                row = {'metric_name': k}
                for model_name in model_names:
                    row[model_name] = metrics[model_name].conf[k]
                writer.writerow(row)
Exemplo n.º 9
0
def create_query_report_file(report_fname,
                             input_dir,
                             feature_file,
                             queries,
                             learners,
                             predictions,
                             labels,
                             val2class,
                             md=False):
    with open(report_fname, 'w', encoding='utf-8', newline='') as out:
        model_names = [learner.model_name() for learner in learners]
        error_queries = {x: {} for x in model_names}
        fieldnames = ['query', 'value_label', 'class_label']
        fieldnames.extend(
            [model_name + '_class' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_value' for model_name in model_names])
        fieldnames.extend([model_name + '_acc' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_error' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_val_pessim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_val_optim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_class_pessim' for model_name in model_names])
        fieldnames.extend(
            [model_name + '_class_optim' for model_name in model_names])
        writer = csv.DictWriter(out, fieldnames=fieldnames)
        writer.writeheader()
        for q in queries:
            if 'dummy' in q:
                continue
            if md and not q in labels:
                continue
            value_label = labels[q]
            class_label = get_class(value_label, val2class)
            row = {
                'query': q,
                'value_label': value_label,
                'class_label': class_label
            }
            for model_name in model_names:
                predicted_class = predictions[model_name][q].class_prediction
                predicted_val = predictions[model_name][q].mean_prediction
                if predicted_class != class_label:
                    error_queries[model_name][q] = predicted_class
                row[model_name + "_class"] = predicted_class
                row[model_name + "_value"] = predicted_val
                row[model_name + "_acc"] = int(class_label == predicted_class)
                row[model_name + "_error"] = math.fabs(value_label -
                                                       predicted_val)
                row[model_name +
                    "_val_pessim"] = 1 if predicted_val < value_label else 0
                row[model_name +
                    "_val_optim"] = 1 if predicted_val > value_label else 0
                row[model_name +
                    "_class_pessim"] = 1 if predicted_class < class_label else 0
                row[model_name +
                    "_class_optim"] = 1 if predicted_class > class_label else 0
            writer.writerow(row)
    create_false_predictions_feature_file(input_dir, feature_file,
                                          error_queries)