Exemplo n.º 1
0
    start = time.time()
    with open(results_file, 'w') as fout:
        csv_writer = csv.writer(fout,
                                delimiter=';',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow([
            "dataset", "cls", "params", "nr_events", "nr_events_predy",
            "metric", "score"
        ])

        correct_all_train = 0
        correct_all_val = 0
        correct_all_test = 0
        for i in range(max_len):
            X_test, y_test = dataset_manager.generate_3d_data_for_prefix_length(
                dt_test, max_len, i + 1)
            y_pred_test = model.predict(X_test)

            correct_train = np.sum(
                [0 if res < 0.5 else 1
                 for res in np.ravel(y_pred[:, i, 0])] == np.ravel(y[:, i, 0]))
            correct_val = np.sum([
                0 if res < 0.5 else 1 for res in np.ravel(y_pred_val[:, i, 0])
            ] == np.ravel(y_val[:, i, 0]))
            correct_test = np.sum([
                0 if res < 0.5 else 1 for res in np.ravel(y_pred_test[:, i, 0])
            ] == np.ravel(y_test[:, i, 0]))
            print(i, correct_train, correct_val, correct_test)
            csv_writer.writerow([
                dataset_name, cls_method, params, i, -1, "tp_train",
                correct_train
    print("Done: %s" % (time.time() - start))

    print('Evaluating...')
    start = time.time()
    with open(results_file, 'w') as fout:
        csv_writer = csv.writer(fout,
                                delimiter=';',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(
            ["dataset", "cls", "params", "nr_events", "metric", "score"])

        correct_all_train = 0
        correct_all_test = 0
        for nr_events in range(1, max_len + 1):
            X, y = dataset_manager.generate_3d_data_for_prefix_length(
                dt_train, nr_events, nr_events)
            X_test, y_test = dataset_manager.generate_3d_data_for_prefix_length(
                dt_test, nr_events, nr_events)
            y = y[:, 0, 0].reshape(y.shape[0])
            y_test = y_test[:, 0, 0].reshape(y_test.shape[0])

            X_reshaped = X.reshape((X.shape[0], X.shape[1] * X.shape[2]))
            X_reshaped_test = X_test.reshape(
                (X_test.shape[0], X.shape[1] * X.shape[2]))

            cls = RandomForestClassifier(n_estimators=n_estimators,
                                         max_features=max_features)
            cls.fit(X_reshaped, y)
            y_pred = cls.predict(X_reshaped)
            y_pred_test = cls.predict(X_reshaped_test)
            correct_train = np.sum(y == y_pred)
print("Done: %s"%(time.time() - start))


print('Evaluating...')
start = time.time()
with open(results_file, 'w') as fout:
    csv_writer = csv.writer(fout, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    csv_writer.writerow(["dataset", "cls", "params", "nr_events", "metric", "score"])

    total = 0
    total_acc = 0
    total_mae = 0
    for nr_events in range(2, max_len-1):
        
        # encode only prefixes of this length
        X, y_a, y_t = dataset_manager.generate_3d_data_for_prefix_length(dt_test, max_len, nr_events)
        if X.shape[0] == 0:
            break
        
        y_t = y_t * dataset_manager.divisors["timesincelastevent"]
        
        pred_y = model.predict(X, verbose=0)
        pred_y_a = pred_y[0] 
        pred_y_t = pred_y[1]
        pred_y_t = pred_y_t.flatten()
        pred_y_t[pred_y_t < 0] = 0
        pred_y_t = pred_y_t * dataset_manager.divisors["timesincelastevent"]
        acc = accuracy_score(np.argmax(y_a, axis=1), np.argmax(pred_y_a, axis=1))
        mae = mean_absolute_error(y_t, pred_y_t)
        total += X.shape[0]
        total_acc += acc * X.shape[0]