Пример #1
0
def train_and_test(config: Config, classifier: EarlyClassifier) -> None:
    predictions = []

    if config.variate == 1 or config.strategy == 'merge' or config.strategy == 'normal':
        if config.variate > 1:
            logger.info("Merging multivariate time-series ...")
            config.train_data = [utils.df_merge(config.train_data)]
            config.test_data = [utils.df_merge(config.test_data)]
        if config.java is True:
            temp = pd.concat([config.train_labels, config.train_data[0]], axis=1, sort=False)
            temp.to_csv('train', index=False, header=False, sep=delim_1)
            temp2 = pd.concat([config.test_labels, config.test_data[0]], axis=1, sort=False)
            temp2.to_csv('test', index=False, header=False, sep=delim_2)
            res = classifier.predict(pd.DataFrame())
            predictions = res[0]
            click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                       file=config.output)
            click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                       file=config.output)
        elif config.cplus is True:
            a = config.train_labels.value_counts()
            a = a.sort_index()
            classifier.train(config.train_data[0], config.train_labels)
            res = classifier.predict2(test_data=config.test_data[0], labels=config.test_labels, numbers=a,types=1)
            predictions = res[0]
            click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                       file=config.output)
            click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                       file=config.output)
        elif config.strategy == 'normal':
            start = time.time()
            # click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)),
            #           file=config.output)

            # Make predictions
            start = time.time()
            result = classifier.true_predict(config.train_data, config.test_data, config.train_labels,
                                             config.test_labels)
            predictions = result[0]
            click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output)
            click.echo('Total testing time := {}'.format(timedelta(seconds=result[2])), file=config.output)
            click.echo('Best earl:={}'.format(result[3]), file=config.output)
        else:
            # Train the classifier
            start = time.time()
            trip = classifier.train(config.train_data[0], config.train_labels)
            click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output)

            # Make predictions
            start = time.time()
            predictions = classifier.predict(config.test_data[0])
            # classifier.graphs(config.test_data[0],trip)
            click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output)
    else:
        logger.info("Voting over the multivariate time-series attributes ...")

        votes = []
        for i in range(config.variate):
            if config.java is True:
                temp = pd.concat([config.train_labels, config.train_data[i]], axis=1, sort=False)
                temp.to_csv('train', index=False, header=False, sep=delim_1)
                temp2 = pd.concat([config.test_labels, config.test_data[i]], axis=1, sort=False)
                temp2.to_csv('test', index=False, header=False, sep=delim_2)
                res = classifier.predict(pd.DataFrame())
                votes.append(res[0])
                click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                        file=config.output)
                click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                        file=config.output)
            elif config.cplus is True:
                a = config.train_labels.value_counts()
                a = a.sort_index()
                classifier.train(config.train_data[i], config.train_labels)
                res = classifier.predict2(test_data=config.test_data[0], labels=config.test_labels, numbers=a,types=1)
                votes.append(res[0])
                click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                        file=config.output)
                click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                        file=config.output)
            elif config.strategy == 'normal':
                start = time.time()
                # click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)),
                #           file=config.output)

                # Make predictions
                start = time.time()
                result = classifier.true_predict(config.train_data, config.test_data, config.train_labels,
                                                config.test_labels)
                votes.append(result[0])
                click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output)
                click.echo('Total training time := {}'.format(timedelta(seconds=result[2])), file=config.output)
                click.echo('Best earl:={}'.format(result[3]), file=config.output)
            else:
                # Train the classifier
                start = time.time()
                trip = classifier.train(config.train_data[i], config.train_labels)
                click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output)

                # Make predictions
                start = time.time()
                votes.append(classifier.predict(config.test_data[i]))
                # classifier.graphs(config.test_data[0],trip)
                click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output)

        # Make predictions from the votes of each test example
        for i in range(len(votes[0])):
            max_timestamp = max(map(lambda x: x[i][0], votes))
            most_predicted = Counter(map(lambda x: x[i][1], votes)).most_common(1)[0][0]
            predictions.append((max_timestamp, most_predicted))

    # Calculate accuracy and earliness
    # acc = utils.temp_accuracy(predictions, config.test_labels.tolist())
    # print(acc)
    accuracy = utils.accuracy(predictions, config.test_labels.tolist())
    earliness = utils.earliness(predictions, config.ts_length - 1)
    harmonic = utils.harmonic_mean(accuracy, earliness)
    click.echo('Accuracy: ' + str(round(accuracy, 4)) + ' Earliness: ' + str(round(earliness * 100, 4)) + '%',
               file=config.output)
    click.echo('Harmonic mean: ' + str(round(harmonic, 4)),
               file=config.output)

    # Calculate counts, precision, recall and f1-score if a target class is provided
    if config.target_class == -1:
        items = config.train_labels.unique()
        for item in items:
            click.echo('For the class: ' + str(item), file=config.output)
            config.target_class = item
            tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels)
            click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output)
            precision = utils.precision(tp, fp)
            click.echo('Precision: ' + str(round(precision, 4)), file=config.output)
            recall = utils.recall(tp, fn)
            click.echo('Recall: ' + str(round(recall, 4)), file=config.output)
            f1 = utils.f_measure(tp, fp, fn)
            click.echo('F1-score: ' + str(round(f1, 4)) + "\n", file=config.output)
            click.echo('Predictions' + str(predictions), file=config.output)
    elif config.target_class:
        tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels)
        click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output)
        precision = utils.precision(tp, fp)
        click.echo('Precision: ' + str(round(precision, 4)), file=config.output)
        recall = utils.recall(tp, fn)
        click.echo('Recall: ' + str(round(recall, 4)), file=config.output)
        f1 = utils.f_measure(tp, fp, fn)
        click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
Пример #2
0
def cv(config: Config, classifier: EarlyClassifier) -> None:
    sum_accuracy, sum_earliness, sum_precision, sum_recall, sum_f1 = 0, 0, 0, 0, 0
    predictions = []
    all_predictions: List[Tuple[int, int]] = list()
    all_labels: List[int] = list()
    my_dict = {}
    indices = zip(StratifiedKFold(config.folds).split(config.cv_data[0], config.cv_labels),
                  range(1, config.folds + 1))

    for ((train_indices, test_indices), i) in indices:

        click.echo('== Fold ' + str(i), file=config.output)
        if config.variate == 1 or config.strategy == 'merge' or config.strategy == 'normal':

            """ Merge is a method that turns a multivariate time-series to a univariate """
            if config.variate > 1 and config.strategy == 'merge':
                logger.info("Merging multivariate time-series ...")
                config.cv_data = [utils.df_merge(config.cv_data)]
            """ Normal is used for algorithms that support multivariate time-series """
            if config.variate > 1 and config.strategy == 'normal':
                fold_train_data = [config.cv_data[i].iloc[train_indices].reset_index(drop=True) for i in
                                   range(0, config.variate)]
                fold_test_data = [config.cv_data[i].iloc[test_indices].reset_index(drop=True) for i in
                                  range(0, config.variate)]
                fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True)
                fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True)

            else:
                fold_train_data = config.cv_data[0].iloc[train_indices].reset_index(drop=True)
                fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True)
                fold_test_data = config.cv_data[0].iloc[test_indices].reset_index(drop=True)

            """In case we call algorithms implemented in Java (TEASER, ECTS)"""
            if config.java is True:
                temp = pd.concat([fold_train_labels, fold_train_data], axis=1, sort=False)
                temp.to_csv('train', index=False, header=False, sep=delim_1)
                temp2 = pd.concat([config.cv_labels[test_indices].reset_index(drop=True), fold_test_data], axis=1,
                                  sort=False)
                temp2.to_csv('test', index=False, header=False, sep=delim_2)
                predictions = classifier.predict(pd.DataFrame())
            elif config.cplus is True:
                fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True)

                classifier.train(fold_train_data, fold_train_labels)
                a = fold_train_labels.value_counts()
                a = a.sort_index(ascending=False)
                res = classifier.predict2(test_data=fold_test_data, labels=fold_train_labels, numbers=a,types=0)
                predictions = res[0]
                click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                           file=config.output)
                click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                           file=config.output)
            else:
                # Train the MLSTM
                start = time.time()
                result = classifier.true_predict(fold_train_data, fold_test_data, fold_train_labels,
                                                 fold_test_labels)
                predictions = result[0]
                click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output)
                click.echo('Total testing time := {}'.format(timedelta(seconds=result[2])), file=config.output)
                click.echo('Best earl:={}'.format(result[3]), file=config.output)
                # predictions = classifier.predict(fold_test_data)
                # click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)),
                #           file=config.output)
        else:
            votes = []
            for ii in range(config.variate):
                fold_train_data = config.cv_data[ii].iloc[train_indices].reset_index(drop=True)
                fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True)
                fold_test_data = config.cv_data[ii].iloc[test_indices].reset_index(drop=True)
                if config.java is True:
                    temp = pd.concat([fold_train_labels, fold_train_data], axis=1, sort=False)
                    temp.to_csv('train', index=False, header=False, sep=delim_1)
                    temp2 = pd.concat([config.cv_labels[test_indices].reset_index(drop=True), fold_test_data], axis=1,
                                      sort=False)
                    temp2.to_csv('test', index=False, header=False, sep=delim_2)
                    res = classifier.predict(pd.DataFrame())
                    click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                               file=config.output)
                    click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                               file=config.output)
                    votes.append(res[0])
                elif config.cplus is True:
                    fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True)
                    classifier.train(fold_train_data, fold_train_labels)
                    a = fold_train_labels.value_counts()
                    a = a.sort_index(ascending=False)
                    res = classifier.predict2(test_data=fold_test_data, labels=fold_test_labels, numbers=a,types=0)
                    votes.append(res[0])
                    click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))),
                               file=config.output)
                    click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))),
                               file=config.output)
                else:
                    # Train the classifier
                    start = time.time()
                    classifier.train(fold_train_data, fold_train_labels)
                    click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)),
                               file=config.output)

                    # Make predictions
                    start = time.time()
                    votes.append(classifier.predict(fold_test_data))
                    click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)),
                               file=config.output)

            # Make predictions from the votes of each test example
            for ii in range(len(votes[0])):
                max_timestamp = max(map(lambda x: x[ii][0], votes))
                most_predicted = Counter(map(lambda x: x[ii][1], votes)).most_common(1)[0][0]
                predictions.append((max_timestamp, most_predicted))

        all_predictions.extend(predictions)
        all_labels.extend(config.cv_labels[test_indices])

        # Calculate accuracy and earliness
        accuracy = utils.accuracy(predictions, config.cv_labels[test_indices].tolist())
        # my_dict = utils.results_organize(predictions, config.cv_labels[test_indices].tolist(), my_dict, test_indices)
        sum_accuracy += accuracy
        earliness = utils.earliness(predictions, config.ts_length - 1)
        sum_earliness += earliness
        click.echo('Accuracy: ' + str(round(accuracy, 4)) + ' Earliness: ' + str(round(earliness * 100, 4)) + '%',
                   file=config.output)

        # Calculate counts, precision, recall and f1-score if a target class is provided
        if config.target_class:
            tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.cv_labels[test_indices].tolist())
            click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output)
            precision = utils.precision(tp, fp)
            sum_precision += precision
            click.echo('Precision: ' + str(round(precision, 4)), file=config.output)
            recall = utils.recall(tp, fn)
            sum_recall += recall
            click.echo('Recall: ' + str(round(recall, 4)), file=config.output)
            f1 = utils.f_measure(tp, fp, fn)
            sum_f1 += f1
            click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
    # utils.results_smart_print(my_dict, config.output)
    click.echo('== Macro-average', file=config.output)
    macro_accuracy = sum_accuracy / config.folds
    macro_earliness = sum_earliness / config.folds
    click.echo('Accuracy: ' + str(round(macro_accuracy, 4)) +
               ' Earliness: ' + str(round(macro_earliness * 100, 4)) + '%',
               file=config.output)

    if config.target_class:
        macro_precision = sum_precision / config.folds
        macro_recall = sum_recall / config.folds
        macro_f1 = sum_f1 / config.folds
        click.echo('Precision: ' + str(round(macro_precision, 4)), file=config.output)
        click.echo('Recall: ' + str(round(macro_recall, 4)), file=config.output)
        click.echo('F1-score: ' + str(round(macro_f1, 4)), file=config.output)

    click.echo('== Micro-average:', file=config.output)
    micro_accuracy = utils.accuracy(all_predictions, all_labels)
    micro_earliness = utils.earliness(all_predictions, config.ts_length - 1)
    click.echo('Accuracy: ' + str(round(micro_accuracy, 4)) +
               ' Earliness: ' + str(round(micro_earliness * 100, 4)) + '%',
               file=config.output)
    # Calculate counts, precision, recall and f1-score if a target class is provided
    if config.target_class == -1:
        items = config.train_labels.unique()
        for item in items:
            click.echo('For the class: ' + str(item), file=config.output)
            config.target_class = item
            tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels)
            click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output)
            precision = utils.precision(tp, fp)
            click.echo('Precision: ' + str(round(precision, 4)), file=config.output)
            recall = utils.recall(tp, fn)
            click.echo('Recall: ' + str(round(recall, 4)), file=config.output)
            f1 = utils.f_measure(tp, fp, fn)
            click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
    elif config.target_class:
        tp, tn, fp, fn = utils.counts(config.target_class, all_predictions, all_labels)
        click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output)
        precision = utils.precision(tp, fp)
        click.echo('Precision: ' + str(round(precision, 4)), file=config.output)
        recall = utils.recall(tp, fn)
        click.echo('Recall: ' + str(round(recall, 4)), file=config.output)
        f1 = utils.f_measure(tp, fp, fn)
        click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
Пример #3
0
def test_f_measure_binary_classification():
    # Test f-measure for non-zero counts.
    assert utils.f_measure(10, 40, 40) == pytest.approx(0.2)
Пример #4
0
def test_f_measure_zero():
    # F-measure should be None if precision and recall are zero.
    assert utils.f_measure(0, 10, 20) is None
Пример #5
0
def test_f_measure_no_recall():
    # F-measure should be None if precision is None.
    assert utils.f_measure(0, 20, 0) is None
Пример #6
0
def test_f_measure_zero_counts():
    # F-measure should be None if tp, fp and fn are zero.
    assert utils.f_measure(0, 0, 0) is None