def train_and_test(config: Config, classifier: EarlyClassifier) -> None: predictions = [] if config.variate == 1 or config.strategy == 'merge' or config.strategy == 'normal': if config.variate > 1: logger.info("Merging multivariate time-series ...") config.train_data = [utils.df_merge(config.train_data)] config.test_data = [utils.df_merge(config.test_data)] if config.java is True: temp = pd.concat([config.train_labels, config.train_data[0]], axis=1, sort=False) temp.to_csv('train', index=False, header=False, sep=delim_1) temp2 = pd.concat([config.test_labels, config.test_data[0]], axis=1, sort=False) temp2.to_csv('test', index=False, header=False, sep=delim_2) res = classifier.predict(pd.DataFrame()) predictions = res[0] click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) elif config.cplus is True: a = config.train_labels.value_counts() a = a.sort_index() classifier.train(config.train_data[0], config.train_labels) res = classifier.predict2(test_data=config.test_data[0], labels=config.test_labels, numbers=a,types=1) predictions = res[0] click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) elif config.strategy == 'normal': start = time.time() # click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), # file=config.output) # Make predictions start = time.time() result = classifier.true_predict(config.train_data, config.test_data, config.train_labels, config.test_labels) predictions = result[0] click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=result[2])), file=config.output) click.echo('Best earl:={}'.format(result[3]), file=config.output) else: # Train the classifier start = time.time() trip = classifier.train(config.train_data[0], config.train_labels) click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) # Make predictions start = time.time() predictions = classifier.predict(config.test_data[0]) # classifier.graphs(config.test_data[0],trip) click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) else: logger.info("Voting over the multivariate time-series attributes ...") votes = [] for i in range(config.variate): if config.java is True: temp = pd.concat([config.train_labels, config.train_data[i]], axis=1, sort=False) temp.to_csv('train', index=False, header=False, sep=delim_1) temp2 = pd.concat([config.test_labels, config.test_data[i]], axis=1, sort=False) temp2.to_csv('test', index=False, header=False, sep=delim_2) res = classifier.predict(pd.DataFrame()) votes.append(res[0]) click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) elif config.cplus is True: a = config.train_labels.value_counts() a = a.sort_index() classifier.train(config.train_data[i], config.train_labels) res = classifier.predict2(test_data=config.test_data[0], labels=config.test_labels, numbers=a,types=1) votes.append(res[0]) click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) elif config.strategy == 'normal': start = time.time() # click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), # file=config.output) # Make predictions start = time.time() result = classifier.true_predict(config.train_data, config.test_data, config.train_labels, config.test_labels) votes.append(result[0]) click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output) click.echo('Total training time := {}'.format(timedelta(seconds=result[2])), file=config.output) click.echo('Best earl:={}'.format(result[3]), file=config.output) else: # Train the classifier start = time.time() trip = classifier.train(config.train_data[i], config.train_labels) click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) # Make predictions start = time.time() votes.append(classifier.predict(config.test_data[i])) # classifier.graphs(config.test_data[0],trip) click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) # Make predictions from the votes of each test example for i in range(len(votes[0])): max_timestamp = max(map(lambda x: x[i][0], votes)) most_predicted = Counter(map(lambda x: x[i][1], votes)).most_common(1)[0][0] predictions.append((max_timestamp, most_predicted)) # Calculate accuracy and earliness # acc = utils.temp_accuracy(predictions, config.test_labels.tolist()) # print(acc) accuracy = utils.accuracy(predictions, config.test_labels.tolist()) earliness = utils.earliness(predictions, config.ts_length - 1) harmonic = utils.harmonic_mean(accuracy, earliness) click.echo('Accuracy: ' + str(round(accuracy, 4)) + ' Earliness: ' + str(round(earliness * 100, 4)) + '%', file=config.output) click.echo('Harmonic mean: ' + str(round(harmonic, 4)), file=config.output) # Calculate counts, precision, recall and f1-score if a target class is provided if config.target_class == -1: items = config.train_labels.unique() for item in items: click.echo('For the class: ' + str(item), file=config.output) config.target_class = item tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels) click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output) precision = utils.precision(tp, fp) click.echo('Precision: ' + str(round(precision, 4)), file=config.output) recall = utils.recall(tp, fn) click.echo('Recall: ' + str(round(recall, 4)), file=config.output) f1 = utils.f_measure(tp, fp, fn) click.echo('F1-score: ' + str(round(f1, 4)) + "\n", file=config.output) click.echo('Predictions' + str(predictions), file=config.output) elif config.target_class: tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels) click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output) precision = utils.precision(tp, fp) click.echo('Precision: ' + str(round(precision, 4)), file=config.output) recall = utils.recall(tp, fn) click.echo('Recall: ' + str(round(recall, 4)), file=config.output) f1 = utils.f_measure(tp, fp, fn) click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
def cv(config: Config, classifier: EarlyClassifier) -> None: sum_accuracy, sum_earliness, sum_precision, sum_recall, sum_f1 = 0, 0, 0, 0, 0 predictions = [] all_predictions: List[Tuple[int, int]] = list() all_labels: List[int] = list() my_dict = {} indices = zip(StratifiedKFold(config.folds).split(config.cv_data[0], config.cv_labels), range(1, config.folds + 1)) for ((train_indices, test_indices), i) in indices: click.echo('== Fold ' + str(i), file=config.output) if config.variate == 1 or config.strategy == 'merge' or config.strategy == 'normal': """ Merge is a method that turns a multivariate time-series to a univariate """ if config.variate > 1 and config.strategy == 'merge': logger.info("Merging multivariate time-series ...") config.cv_data = [utils.df_merge(config.cv_data)] """ Normal is used for algorithms that support multivariate time-series """ if config.variate > 1 and config.strategy == 'normal': fold_train_data = [config.cv_data[i].iloc[train_indices].reset_index(drop=True) for i in range(0, config.variate)] fold_test_data = [config.cv_data[i].iloc[test_indices].reset_index(drop=True) for i in range(0, config.variate)] fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True) fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True) else: fold_train_data = config.cv_data[0].iloc[train_indices].reset_index(drop=True) fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True) fold_test_data = config.cv_data[0].iloc[test_indices].reset_index(drop=True) """In case we call algorithms implemented in Java (TEASER, ECTS)""" if config.java is True: temp = pd.concat([fold_train_labels, fold_train_data], axis=1, sort=False) temp.to_csv('train', index=False, header=False, sep=delim_1) temp2 = pd.concat([config.cv_labels[test_indices].reset_index(drop=True), fold_test_data], axis=1, sort=False) temp2.to_csv('test', index=False, header=False, sep=delim_2) predictions = classifier.predict(pd.DataFrame()) elif config.cplus is True: fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True) classifier.train(fold_train_data, fold_train_labels) a = fold_train_labels.value_counts() a = a.sort_index(ascending=False) res = classifier.predict2(test_data=fold_test_data, labels=fold_train_labels, numbers=a,types=0) predictions = res[0] click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) else: # Train the MLSTM start = time.time() result = classifier.true_predict(fold_train_data, fold_test_data, fold_train_labels, fold_test_labels) predictions = result[0] click.echo('Total training time := {}'.format(timedelta(seconds=result[1])), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=result[2])), file=config.output) click.echo('Best earl:={}'.format(result[3]), file=config.output) # predictions = classifier.predict(fold_test_data) # click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), # file=config.output) else: votes = [] for ii in range(config.variate): fold_train_data = config.cv_data[ii].iloc[train_indices].reset_index(drop=True) fold_train_labels = config.cv_labels[train_indices].reset_index(drop=True) fold_test_data = config.cv_data[ii].iloc[test_indices].reset_index(drop=True) if config.java is True: temp = pd.concat([fold_train_labels, fold_train_data], axis=1, sort=False) temp.to_csv('train', index=False, header=False, sep=delim_1) temp2 = pd.concat([config.cv_labels[test_indices].reset_index(drop=True), fold_test_data], axis=1, sort=False) temp2.to_csv('test', index=False, header=False, sep=delim_2) res = classifier.predict(pd.DataFrame()) click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) votes.append(res[0]) elif config.cplus is True: fold_test_labels = config.cv_labels[test_indices].reset_index(drop=True) classifier.train(fold_train_data, fold_train_labels) a = fold_train_labels.value_counts() a = a.sort_index(ascending=False) res = classifier.predict2(test_data=fold_test_data, labels=fold_test_labels, numbers=a,types=0) votes.append(res[0]) click.echo('Total training time := {}'.format(timedelta(seconds=float(res[1]))), file=config.output) click.echo('Total testing time := {}'.format(timedelta(seconds=float(res[2]))), file=config.output) else: # Train the classifier start = time.time() classifier.train(fold_train_data, fold_train_labels) click.echo('Total training time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) # Make predictions start = time.time() votes.append(classifier.predict(fold_test_data)) click.echo('Total testing time := {}'.format(timedelta(seconds=time.time() - start)), file=config.output) # Make predictions from the votes of each test example for ii in range(len(votes[0])): max_timestamp = max(map(lambda x: x[ii][0], votes)) most_predicted = Counter(map(lambda x: x[ii][1], votes)).most_common(1)[0][0] predictions.append((max_timestamp, most_predicted)) all_predictions.extend(predictions) all_labels.extend(config.cv_labels[test_indices]) # Calculate accuracy and earliness accuracy = utils.accuracy(predictions, config.cv_labels[test_indices].tolist()) # my_dict = utils.results_organize(predictions, config.cv_labels[test_indices].tolist(), my_dict, test_indices) sum_accuracy += accuracy earliness = utils.earliness(predictions, config.ts_length - 1) sum_earliness += earliness click.echo('Accuracy: ' + str(round(accuracy, 4)) + ' Earliness: ' + str(round(earliness * 100, 4)) + '%', file=config.output) # Calculate counts, precision, recall and f1-score if a target class is provided if config.target_class: tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.cv_labels[test_indices].tolist()) click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output) precision = utils.precision(tp, fp) sum_precision += precision click.echo('Precision: ' + str(round(precision, 4)), file=config.output) recall = utils.recall(tp, fn) sum_recall += recall click.echo('Recall: ' + str(round(recall, 4)), file=config.output) f1 = utils.f_measure(tp, fp, fn) sum_f1 += f1 click.echo('F1-score: ' + str(round(f1, 4)), file=config.output) # utils.results_smart_print(my_dict, config.output) click.echo('== Macro-average', file=config.output) macro_accuracy = sum_accuracy / config.folds macro_earliness = sum_earliness / config.folds click.echo('Accuracy: ' + str(round(macro_accuracy, 4)) + ' Earliness: ' + str(round(macro_earliness * 100, 4)) + '%', file=config.output) if config.target_class: macro_precision = sum_precision / config.folds macro_recall = sum_recall / config.folds macro_f1 = sum_f1 / config.folds click.echo('Precision: ' + str(round(macro_precision, 4)), file=config.output) click.echo('Recall: ' + str(round(macro_recall, 4)), file=config.output) click.echo('F1-score: ' + str(round(macro_f1, 4)), file=config.output) click.echo('== Micro-average:', file=config.output) micro_accuracy = utils.accuracy(all_predictions, all_labels) micro_earliness = utils.earliness(all_predictions, config.ts_length - 1) click.echo('Accuracy: ' + str(round(micro_accuracy, 4)) + ' Earliness: ' + str(round(micro_earliness * 100, 4)) + '%', file=config.output) # Calculate counts, precision, recall and f1-score if a target class is provided if config.target_class == -1: items = config.train_labels.unique() for item in items: click.echo('For the class: ' + str(item), file=config.output) config.target_class = item tp, tn, fp, fn = utils.counts(config.target_class, predictions, config.test_labels) click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output) precision = utils.precision(tp, fp) click.echo('Precision: ' + str(round(precision, 4)), file=config.output) recall = utils.recall(tp, fn) click.echo('Recall: ' + str(round(recall, 4)), file=config.output) f1 = utils.f_measure(tp, fp, fn) click.echo('F1-score: ' + str(round(f1, 4)), file=config.output) elif config.target_class: tp, tn, fp, fn = utils.counts(config.target_class, all_predictions, all_labels) click.echo('TP: ' + str(tp) + ' TN: ' + str(tn) + ' FP: ' + str(fp) + ' FN: ' + str(fn), file=config.output) precision = utils.precision(tp, fp) click.echo('Precision: ' + str(round(precision, 4)), file=config.output) recall = utils.recall(tp, fn) click.echo('Recall: ' + str(round(recall, 4)), file=config.output) f1 = utils.f_measure(tp, fp, fn) click.echo('F1-score: ' + str(round(f1, 4)), file=config.output)
def test_earliness_length_100(): # Test earliness on time-series of length 100. assert utils.earliness([(1, CLASS_ONE)], 100) == 0.01 assert utils.earliness([(1, CLASS_ONE), (10, CLASS_ONE)], 100) == 0.055 assert utils.earliness([(20, CLASS_ONE), (50, CLASS_ONE)], 100) == 0.35
def test_earliness_length_5(): # Test earliness on time-series of length 5. assert utils.earliness([(1, CLASS_ONE)], 5) == 0.2 assert utils.earliness([(1, CLASS_ONE), (1, CLASS_ONE)], 5) == 0.2 assert utils.earliness([(1, CLASS_ONE), (4, CLASS_ONE)], 5) == 0.5
def test_earliness_empty_predictions(): # Earliness should be None if no predictions are provided. assert utils.earliness([], 5) is None
def test_earliness_zero_length(): # Earliness should be None if time-series length is zero. assert utils.earliness([], 0) is None assert utils.earliness([(1, CLASS_ONE), (4, CLASS_ONE)], 0) is None