Пример #1
0
def test_mismatch_kernel(simple_seq):
	n = len(simple_seq)
	kernel = kernels.MismatchKernel(3, 1, 2)
	K, _ = kernel.fit(simple_seq)
	print(K)
	assert (K[:n // 2, :n // 2] >= 1).all()
	assert (K[n // 2:, n // 2:] >= 1).all()
	zeros = np.zeros((n // 2, n // 2))
	np.testing.assert_equal(K[:n // 2, n // 2:], zeros)
	np.testing.assert_equal(K[n // 2:, :n // 2], zeros)
Пример #2
0
def main_sum():
    ls_kernel = [
        kernels.MismatchKernel(12, 1, 4, False),
        kernels.MismatchKernel(12, 1, 4, False),
        kernels.MismatchKernel(9, 1, 4, False)
    ]
    ls_kernel_prime = [
        kernels.Gaussian(.1, False),
        kernels.Gaussian(.1, False),
        kernels.Gaussian(.1, False)
    ]
    # ls_methods = [
    # methods.SVM(kernel_0, reg_val=.1),
    # methods.SVM(kernel_1, reg_val=.1),
    # methods.SVM(kernel_2, reg_val=.01),
    # ]
    for i in range(3):
        print("##################", f"i={i}")
        # X = read_write.read_X100(f"data/Xtr{i}_mat100.csv")
        X = read_write.read(f"data/Xtr{i}.csv")
        length = X.shape[1]

        X_cat = np.concatenate((X, X), axis=-1)

        # X_test = read_write.read_X100(f"data/Xte{i}_mat100.csv")
        # X_test = read_write.read(f"data/Xte{i}.csv")

        y = read_write.read_labels(f"data/Ytr{i}.csv")

        kernel_class = kernels.SumKernel
        method_class = methods.KernelRidgeRegression
        params_kernel = [(ls_kernel[i], ls_kernel_prime[i], length, length,
                          False)]
        # [(k, max(floor(k / 10), 1), 4) for k in range(6, 18, 3)]  # 10. **
        reg_vals = 10.**np.arange(-2, 3, 1 / 4)
        validation(X_cat, y, kernel_class, method_class, params_kernel,
                   reg_vals)
Пример #3
0
def main_rendu(accuracy_on_train_set=False):
    ls_kernel = [
        kernels.MismatchKernel(12, 2, 4, False),
        kernels.MismatchKernel(12, 2, 4, True),
        kernels.MismatchKernel(9, 2, 4, False)
    ]

    ls_reg_val = [100 * 0.03162277660168379, .1, 1000 * 0.03162277660168379]
    ls_methods = [
        methods.KernelRidgeRegression(ls_kernel[i], reg_val=ls_reg_val[i])
        for i in range(3)
    ]
    for i in range(3):
        print("##################", f"i={i}")
        # X = read_write.read_X100(f"data/Xtr{i}_mat100.csv")
        X = read_write.read(f"data/Xtr{i}.csv")
        # print(X.shape)
        # X_cat = np.concatenate((X, X), axis=-1)

        # X_test = read_write.read_X100(f"data/Xte{i}_mat100.csv")
        X_test = read_write.read(f"data/Xte{i}.csv")
        # X_test_cat = np.concatenate((X_test, X_test), axis=-1)

        y = read_write.read_labels(f"data/Ytr{i}.csv")
        # X_cat = np.concatenate((X, X), axis=-1)
        # X_test_cat = np.concatenate((X_test, X_test), axis=-1)
        ls_methods[i].learn(X, y)
        #  FOR ACCURACY ON TRAINING SET
        if accuracy_on_train_set:
            y_pred = ls_methods[i].predict(X)
            print(methods.accuracy(y, y_pred))
        y_test = ls_methods[i].predict(X_test)

        read_write.write(y_test,
                         "predictions/Yte.csv",
                         offset=i * 1000,
                         append=(i != 0))
Пример #4
0
def get_kernel(conf) -> kernels.Kernel:
    all_kernels = conf.kernels.values_()
    list_kernels = []
    list_coefs = []
    for k in range(len(all_kernels)):
        kernel = all_kernels[k]["name"]
        if not conf.mkl:
            coef = conf.coefs[k]
            assert coef >= 0, f"Coefficient for kernel {k + 1} must be positive."
            list_coefs.append(coef)
        kernel_conf = conf.kernels[k]
        assert kernel in ['onehot', 'spectrum', "mismatch", "substring", "local-alignment"], "Unknown requested kernel."

        if kernel == "spectrum":
            default_args = {"length": 3}
            default_args.update(kernel_conf.args.values_())
            kernel = kernels.SpectrumKernel(conf.memoize, default_args['length'])
        elif kernel == "mismatch":
            default_args = {"k": 3, "m": 2}
            default_args.update(kernel_conf.args.values_())
            kernel = kernels.MismatchKernel(conf.memoize, default_args['k'], default_args['m'])
        elif kernel == "substring":
            default_args = {"length": 3, "lambda_decay": 0.05}
            default_args.update(kernel_conf.args.values_())
            kernel = kernels.SubstringKernel(conf.memoize, default_args['length'])
        elif kernel == "local-alignment":
            default_args = {"beta": 0.05, "d": 1, "e": 11}
            default_args.update(kernel_conf.args.values_())
            kernel = kernels.LocalAlignmentKernel(conf.memoize, default_args['beta'], default_args['d'],
                                                  default_args['e'])
        else:
            kernel = kernels.OneHotKernel(conf.memoize)
        kernel.set_args(kernel_conf.type, kernel_conf.gamma, kernel_conf.degree, kernel_conf.r)
        list_kernels.append(kernel)
    if conf.mkl:
        kernel = kernels.SimpleMKL(conf.memoize, list_kernels)
    else:
        kernel = kernels.SumKernel(conf.memoize, list_kernels, list_coefs)
    kernel.set_args(normalize=conf.normalize)
    return kernel
Пример #5
0
TEST_METHODS = [
	methods.KernelRidgeRegression,
	methods.KernelLogisticRegression,
	methods.SVM
]

TEST_KERNELS = [
	(kernels.Linear(), .1),
	(kernels.Gaussian(.1), .1),
	(kernels.Polynomial(degree=3), .1)
]

TEST_SEQ_KERNELS = [
	# kernels.SpectrumKernel(1),
	# kernels.SpectrumKernel(2),
	kernels.MismatchKernel(3, 1, A)
]


@pytest.mark.parametrize('method', TEST_METHODS)
@pytest.mark.parametrize('kernel_with_reg', TEST_KERNELS)
def test_methods(method, kernel_with_reg, x_train, y_train, x_test, y_test):
	kernel, reg = kernel_with_reg
	kernel.normalize = True
	meth = method(kernel, reg_val=reg)
	meth.learn(x_train, y_train)
	y_est = meth.predict(x_train)
	np.testing.assert_equal(y_est, y_train)
	y_est = meth.predict(x_test)
	np.testing.assert_equal(y_est, y_test)