Пример #1
    def test_classif(self, cls_data):
        X, Y = cls_data
        kernel = kernels.GaussianKernel(2.0)

        def error_fn(t, p):
            return 100 * torch.sum(t * p <= 0).to(
                torch.float32) / t.shape[0], "c-err"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
        flk.fit(X, Y)
        preds = flk.predict(X)
        err = error_fn(preds, Y)[0]
        assert err < 5
Пример #2
    def test_regression(self, reg_data):
        Xtr, Ytr, Xts, Yts = reg_data
        kernel = kernels.GaussianKernel(20.0)

        def error_fn(t, p):
            return torch.sqrt(torch.mean((t - p)**2)), "RMSE"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
        flk.fit(Xtr, Ytr, Xts=Xts, Yts=Yts)

        assert flk.predict(Xts).shape == (Yts.shape[0], 1)
        ts_err = error_fn(flk.predict(Xts), Yts)[0]
        tr_err = error_fn(flk.predict(Xtr), Ytr)[0]
        assert tr_err < ts_err
        assert ts_err < 2.5
Пример #3
    def test_multiclass(self, multicls_data):
        X, Y = multicls_data
        kernel = kernels.GaussianKernel(10.0)

        def error_fn(t, p):
            t = torch.argmax(t, dim=1)
            p = torch.argmax(p, dim=1)
            return torch.mean((t.reshape(-1, ) != p.reshape(-1, )).to(
                torch.float64)), "multic-err"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
        flk.fit(X, Y)
        preds = flk.predict(X)
        err = error_fn(preds, Y)[0]
        assert err < 0.23
Пример #4
def main(path, n_labeled, kernel_function, max_iterations, gpu):
    # loading dataset as ndarray
    dataset = np.load(path).astype(np.float32)
    print("Dataset loaded ({} points, {} features per point)".format(dataset.shape[0], dataset.shape[1] - 1))

    # adjusting label's range {-1, 1}
    dataset[:, 0] = (2 * dataset[:, 0]) - 1

    # defining train and test set
    x_train, x_test, y_train, y_test = train_test_split(dataset[:, 1:], dataset[:, 0], test_size=50000, random_state=None)
    print("Train and test set defined (test: {} + , train: {} +, {} -)".format(np.sum(y_test == 1.), np.sum(y_train == 1.), np.sum(y_train == -1.)))

    # removing the mean and scaling to unit variance
    scaler = StandardScaler()
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    print("Standardization done")

    # defining labeled and unlabeled set
    labeled = np.random.choice(np.where(y_train == 1)[0], size=int(n_labeled/2), replace=False)
    labeled = np.concatenate((labeled, np.random.choice(np.where(y_train == -1)[0], size=int(n_labeled/2), replace=False)), axis=0)
    x_labeled = x_train[labeled, :].copy()  # train
    y_labeled = y_train[labeled].copy()  # train
    x_train = np.delete(x_train, obj=labeled, axis=0)
    y_train = np.delete(y_train, obj=labeled)

    x_unlabeled = x_train.copy()  # test
    y_unlabeled = y_train.copy()  # test

    # choosing kernel function
    kernel = Kernel(kernel_function=kernel_function, gpu=gpu)

    # fitting falkon (semi-supervised scenario)
    best_score = -np.infty
    best_gamma, best_ker_param = None, None
    print("First training...")
    for gamma in [1e-6]:
        for ker_param in [4]:
            falkon = Falkon(nystrom_length=x_labeled.shape[0], gamma=gamma, kernel_fun=kernel.get_kernel(), kernel_param=ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
            falkon.fit(x_labeled,  y_labeled)
            score = accuracy_score(y_labeled, np.sign(falkon.predict(x_labeled)))
            best_score, best_gamma, best_ker_param = (score, gamma, ker_param) if (score > best_score) else (best_score, best_gamma, best_ker_param)

    print("  -> [debug info] best score {:.3f} -- best gamma {} -- best kernel_param {}".format(best_score, best_gamma, best_ker_param))
    falkon = Falkon(nystrom_length=x_labeled.shape[0], gamma=best_gamma, kernel_fun=kernel.get_kernel(), kernel_param=best_ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
    falkon.fit(x_labeled, y_labeled)
    functional_margin = falkon.predict(x_test)

    print(np.sum(functional_margin >= 0), np.sum(functional_margin < 0))

    print("Starting falkon testing routine...")
    accuracy = accuracy_score(y_test, np.sign(functional_margin))
    auc = roc_auc_score(y_test, functional_margin)
    print("Accuracy: {:.3f} - AUC: {:.3f}".format(accuracy, auc))

    print("Annealing loop...")
    functional_margin = falkon.predict(x_unlabeled)
    falkon = Falkon(nystrom_length=10000, gamma=best_gamma, kernel_fun=kernel.get_kernel(), kernel_param=best_ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
    balance_constraint = (2 * 0.5) - 1  # 2r - 1
    start_ = time()
    for idx, weight in enumerate([0.1, 0.25, 0.5, 0.75, 1.]):
        print(" -> iteration {}".format(idx+1))

        lam0 = ((2/x_unlabeled.shape[0])*np.sum(functional_margin)) - (2*balance_constraint)
        y_u, lam, iter = labelling(functional_margin, balance_constraint, lam0, 1., int(x_unlabeled.shape[0]*0.005))
        print("  -> [debug info] balance constraint {:.2}".format(np.divide(np.sum(y_u), x_unlabeled.shape[0])))
        print("  -> [debug info] lambda from {:.3e} to {:.3e} in {} iterations".format(lam0, lam, iter+1))
        print("  -> [debug info] wrong labels {}".format(np.sum(y_u != y_unlabeled)))

        sample_weights = ([1.] * x_labeled.shape[0]) + ([weight] * x_unlabeled.shape[0])
        falkon.fit(np.vstack((x_labeled, x_unlabeled)), np.concatenate((y_labeled, y_u)).astype(np.float32), sample_weights=sample_weights)
        functional_margin = falkon.predict(x_unlabeled)
    print("Annealing done in {:.3} seconds".format(time()-start_))

    # testing semi-supervised falkon
    print("Starting falkon testing routine...")
    functional_margin = falkon.predict(x_test)
    accuracy = accuracy_score(y_test, np.sign(functional_margin))
    auc = roc_auc_score(y_test, functional_margin)
    print("Accuracy: {:.3f} - AUC: {:.3f}".format(accuracy, auc))
Пример #5
def main(path, n_labeled, kernel_function, max_iterations, gpu):
    # loading dataset as ndarray
    dataset = np.load(path).astype(np.float32)
    print("Dataset loaded ({} points, {} features per point)".format(dataset.shape[0], dataset.shape[1] - 1))

    scaler = StandardScaler()
    scaler.fit(dataset[:, 1:])
    dataset[:, 1:] = scaler.transform(dataset[:, 1:])
    print("Standardization done")

    # defining labeled, unlabeled and validation set
    labeled = np.random.choice(np.where(dataset[:, 0] == 1)[0], size=int(n_labeled/2), replace=False)
    labeled = np.concatenate((labeled, np.random.choice(np.where(dataset[:, 0] == -1)[0], size=int(n_labeled/2), replace=False)), axis=0)
    x_labeled = dataset[labeled, 1:].copy()  # train
    y_labeled = dataset[labeled, 0].copy()  # train
    dataset = np.delete(dataset, obj=labeled, axis=0)

    # validation = np.random.choice(np.where(dataset[:, 0] == 1)[0], size=int(dataset.shape[0]*0.06), replace=False)
    # validation = np.concatenate((validation, np.random.choice(np.where(dataset[:, 0] == -1)[0], size=int(dataset.shape[0]*0.06), replace=False)), axis=0)
    # x_validation = dataset[validation, 1:].copy()  # validation
    # y_validation = dataset[validation, 0].copy()  # validation
    # dataset = np.delete(dataset, obj=validation, axis=0)

    x_unlabeled = dataset[:, 1:].copy()  # test
    y_unlabeled = dataset[:, 0].copy()  # test

    # print("train: {} - validation: {} - test: {}".format(x_labeled.shape[0], x_validation.shape[0], x_unlabeled.shape[0]))

    # choosing kernel function
    kernel = Kernel(kernel_function=kernel_function, gpu=gpu)

    print(x_labeled.shape, y_labeled.shape, x_unlabeled.shape, y_unlabeled.shape)
    # fitting falkon (semi-supervised scenario)
    best_score = -np.infty
    best_gamma, best_ker_param = None, None
    print("First training...")
    for gamma in [1e-6]:
        for ker_param in [0.5]:
            falkon = Falkon(nystrom_length=x_labeled.shape[0], gamma=gamma, kernel_fun=kernel.get_kernel(), kernel_param=ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
            falkon.fit(x_labeled,  y_labeled)
            score = accuracy_score(y_labeled, np.sign(falkon.predict(x_labeled)))
            best_score, best_gamma, best_ker_param = (score, gamma, ker_param) if (score > best_score) else (best_score, best_gamma, best_ker_param)

    print("  -> [debug info] best score {:.3f} -- best gamma {} -- best kernel_param {}".format(best_score, best_gamma, best_ker_param))
    falkon = Falkon(nystrom_length=x_labeled.shape[0], gamma=best_gamma, kernel_fun=kernel.get_kernel(), kernel_param=best_ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
    falkon.fit(x_labeled, y_labeled)
    functional_margin = falkon.predict(x_unlabeled)

    print(np.sum(functional_margin >= 0), np.sum(functional_margin < 0))

    print("Starting falkon testing routine...")
    accuracy = accuracy_score(y_unlabeled, np.sign(functional_margin))
    auc = roc_auc_score(y_unlabeled, functional_margin)
    print("Accuracy: {:.3f} - AUC: {:.3f}".format(accuracy, auc))

    # plot_2d_dataset(x_labeled, x_validation, y_labeled, falkon.predict(x_validation), filepath='./fig.png')
    # plot_2d_dataset(x_labeled, x_unlabeled, y_labeled, functional_margin, filepath='./fig0.png')

    print("Annealing loop...")
    falkon = Falkon(nystrom_length=10000, gamma=best_gamma, kernel_fun=kernel.get_kernel(), kernel_param=best_ker_param, optimizer_max_iter=max_iterations, gpu=gpu)
    balance_constraint = (2 * 0.5) - 1  # 2r - 1
    start_ = time()
    for idx, weight in enumerate([0.1, 0.25, 0.5, 1.]):
        print(" -> iteration {}".format(idx+1))

        lam0 = ((2/x_unlabeled.shape[0])*np.sum(functional_margin)) - (2*balance_constraint)
        y_u, lam, iter = labelling(functional_margin, balance_constraint, lam0, 1., int(x_unlabeled.shape[0]*0.005))
        print("  -> [debug info] balance constraint {:.2}".format(np.divide(np.sum(y_u), x_unlabeled.shape[0])))
        print("  -> [debug info] lambda from {:.3e} to {:.3e} in {} iterations".format(lam0, lam, iter+1))
        print("  -> [debug info] wrong labels {}".format(np.sum(y_u != y_unlabeled)))

        sample_weights = ([1.] * x_labeled.shape[0]) + ([weight] * x_unlabeled.shape[0])
        falkon.fit(np.vstack((x_labeled, x_unlabeled)), np.concatenate((y_labeled, y_u)).astype(np.float32), sample_weights=sample_weights)
        functional_margin = falkon.predict(x_unlabeled)
    print("Annealing done in {:.3} seconds".format(time()-start_))

    # testing semi-supervised falkon
    print("Starting falkon testing routine...")
    accuracy = accuracy_score(y_unlabeled, np.sign(functional_margin))
    auc = roc_auc_score(y_unlabeled, functional_margin)
    print("Accuracy: {:.3f} - AUC: {:.3f}".format(accuracy, auc))
Пример #6
def main(path, kernel_function, max_iterations, gpu):
    # loading dataset as ndarray
    dataset = np.load(path).astype(np.float32)
    print("Dataset loaded ({} points, {} features per point)".format(
        dataset.shape[0], dataset.shape[1] - 1))

    # defining train and test set
    x_train = dataset[0:463715, 1:]
    x_test = dataset[463715:515345, 1:]
    y_train = dataset[0:463715, 0]
    y_test = dataset[463715:515345, 0]
    print("Train and test set defined")

    # creating the unsupervised part of the dataset (using x_train)
    labeled_ids, unlabeled_ids = train_test_split(range(x_train.shape[0]),
    x_labeled, y_labeled = x_train[labeled_ids, :], y_train[labeled_ids]
    x_unlabeled, y_unlabeled = x_train[
        unlabeled_ids, :], y_train[unlabeled_ids]
    print("Labeled examples {}, Unlabeled examples {}".format(
        x_labeled.shape[0], x_unlabeled.shape[0]))

    # labels binarization (-1 from 1922 to 2002, 1 from 2002 to 2011) -- balanced (labeled) dataset
    y_labeled, y_test = (y_labeled >= 2002).astype(
        np.float32), (y_test >= 2002).astype(np.float32)
    y_unlabeled = (y_unlabeled >= 2002).astype(np.float32)
    y_labeled, y_unlabeled = (2 * y_labeled) - 1, (2 * y_unlabeled) - 1
    y_test = (2 * y_test) - 1

    # removing the mean and scaling to unit variance
    x_scaler = StandardScaler()
    x_scaler.fit(x_train)  # using labeled + unlabeled part
    x_labeled, x_unlabeled = x_scaler.transform(x_labeled), x_scaler.transform(
    x_test = x_scaler.transform(x_test)
    print("Standardization done")

    # choosing kernel function
    kernel = Kernel(kernel_function=kernel_function, gpu=gpu)

    # training
    print("First training...")
    falkon = Falkon(nystrom_length=round(np.sqrt(x_labeled.shape[0])),
    falkon.fit(x_labeled, y_labeled)
    functional_margin = falkon.predict(x_test)

    # initial Accuracy, AUC_ROC
    accuracy = accuracy_score(y_test, np.sign(functional_margin))
    auc_roc = roc_auc_score(y_test, functional_margin)
    print("Accuracy: {:.4f} - AUC: {:.4f}".format(accuracy, auc_roc))

    print("Annealing loop...")
    functional_margin = falkon.predict(x_unlabeled)
    falkon = Falkon(nystrom_length=10000,
    balance_constraint = (2 * 0.5) - 1  # 2r - 1
    tic = time()
    for idx, weight in enumerate([0.1, 0.15, 0.25, 1.]):
        print(" -> iteration {}".format(idx + 1))
        lam0 = ((2 / x_unlabeled.shape[0]) *
                np.sum(functional_margin)) - (2 * balance_constraint)
        y_u, lam, _iter = labelling(functional_margin,
                                    balance_constraint, lam0, 1.,
                                    int(x_unlabeled.shape[0] * 0.005))
        print("  -> [debug info] balance constraint {:.2}".format(
            np.divide(np.sum(y_u), x_unlabeled.shape[0])))
            "  -> [debug info] lambda from {:.3e} to {:.3e} in {} iterations".
            format(lam0, lam, _iter + 1))
        print("  -> [debug info] wrong labels {}".format(
            np.sum(y_u != y_unlabeled)))
        sample_weights = ([1.] * x_labeled.shape[0]) + ([weight] *
        falkon.fit(np.vstack((x_labeled, x_unlabeled)),
                   np.concatenate((y_labeled, y_u)).astype(np.float32),
        functional_margin = falkon.predict(x_unlabeled)
    print("Annealing done in {:.3} seconds".format(time() - tic))

    # testing falkon
    print("Starting falkon testing routine...")
    y_pred = falkon.predict(x_test)
    functional_margin = falkon.predict(x_test)
    accuracy = accuracy_score(y_test, np.sign(functional_margin))
    auc_roc = roc_auc_score(y_test, functional_margin)
    print("Accuracy: {:.3f} - AUC_ROC: {:.3f}".format(accuracy, auc_roc))