start = time.time() with open(results_file, 'w') as fout: csv_writer = csv.writer(fout, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow([ "dataset", "cls", "params", "nr_events", "nr_events_predy", "metric", "score" ]) correct_all_train = 0 correct_all_val = 0 correct_all_test = 0 for i in range(max_len): X_test, y_test = dataset_manager.generate_3d_data_for_prefix_length( dt_test, max_len, i + 1) y_pred_test = model.predict(X_test) correct_train = np.sum( [0 if res < 0.5 else 1 for res in np.ravel(y_pred[:, i, 0])] == np.ravel(y[:, i, 0])) correct_val = np.sum([ 0 if res < 0.5 else 1 for res in np.ravel(y_pred_val[:, i, 0]) ] == np.ravel(y_val[:, i, 0])) correct_test = np.sum([ 0 if res < 0.5 else 1 for res in np.ravel(y_pred_test[:, i, 0]) ] == np.ravel(y_test[:, i, 0])) print(i, correct_train, correct_val, correct_test) csv_writer.writerow([ dataset_name, cls_method, params, i, -1, "tp_train", correct_train
print("Done: %s" % (time.time() - start)) print('Evaluating...') start = time.time() with open(results_file, 'w') as fout: csv_writer = csv.writer(fout, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow( ["dataset", "cls", "params", "nr_events", "metric", "score"]) correct_all_train = 0 correct_all_test = 0 for nr_events in range(1, max_len + 1): X, y = dataset_manager.generate_3d_data_for_prefix_length( dt_train, nr_events, nr_events) X_test, y_test = dataset_manager.generate_3d_data_for_prefix_length( dt_test, nr_events, nr_events) y = y[:, 0, 0].reshape(y.shape[0]) y_test = y_test[:, 0, 0].reshape(y_test.shape[0]) X_reshaped = X.reshape((X.shape[0], X.shape[1] * X.shape[2])) X_reshaped_test = X_test.reshape( (X_test.shape[0], X.shape[1] * X.shape[2])) cls = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features) cls.fit(X_reshaped, y) y_pred = cls.predict(X_reshaped) y_pred_test = cls.predict(X_reshaped_test) correct_train = np.sum(y == y_pred)
print("Done: %s"%(time.time() - start)) print('Evaluating...') start = time.time() with open(results_file, 'w') as fout: csv_writer = csv.writer(fout, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow(["dataset", "cls", "params", "nr_events", "metric", "score"]) total = 0 total_acc = 0 total_mae = 0 for nr_events in range(2, max_len-1): # encode only prefixes of this length X, y_a, y_t = dataset_manager.generate_3d_data_for_prefix_length(dt_test, max_len, nr_events) if X.shape[0] == 0: break y_t = y_t * dataset_manager.divisors["timesincelastevent"] pred_y = model.predict(X, verbose=0) pred_y_a = pred_y[0] pred_y_t = pred_y[1] pred_y_t = pred_y_t.flatten() pred_y_t[pred_y_t < 0] = 0 pred_y_t = pred_y_t * dataset_manager.divisors["timesincelastevent"] acc = accuracy_score(np.argmax(y_a, axis=1), np.argmax(pred_y_a, axis=1)) mae = mean_absolute_error(y_t, pred_y_t) total += X.shape[0] total_acc += acc * X.shape[0]