def test_mismatch_kernel(simple_seq): n = len(simple_seq) kernel = kernels.MismatchKernel(3, 1, 2) K, _ = kernel.fit(simple_seq) print(K) assert (K[:n // 2, :n // 2] >= 1).all() assert (K[n // 2:, n // 2:] >= 1).all() zeros = np.zeros((n // 2, n // 2)) np.testing.assert_equal(K[:n // 2, n // 2:], zeros) np.testing.assert_equal(K[n // 2:, :n // 2], zeros)
def main_sum(): ls_kernel = [ kernels.MismatchKernel(12, 1, 4, False), kernels.MismatchKernel(12, 1, 4, False), kernels.MismatchKernel(9, 1, 4, False) ] ls_kernel_prime = [ kernels.Gaussian(.1, False), kernels.Gaussian(.1, False), kernels.Gaussian(.1, False) ] # ls_methods = [ # methods.SVM(kernel_0, reg_val=.1), # methods.SVM(kernel_1, reg_val=.1), # methods.SVM(kernel_2, reg_val=.01), # ] for i in range(3): print("##################", f"i={i}") # X = read_write.read_X100(f"data/Xtr{i}_mat100.csv") X = read_write.read(f"data/Xtr{i}.csv") length = X.shape[1] X_cat = np.concatenate((X, X), axis=-1) # X_test = read_write.read_X100(f"data/Xte{i}_mat100.csv") # X_test = read_write.read(f"data/Xte{i}.csv") y = read_write.read_labels(f"data/Ytr{i}.csv") kernel_class = kernels.SumKernel method_class = methods.KernelRidgeRegression params_kernel = [(ls_kernel[i], ls_kernel_prime[i], length, length, False)] # [(k, max(floor(k / 10), 1), 4) for k in range(6, 18, 3)] # 10. ** reg_vals = 10.**np.arange(-2, 3, 1 / 4) validation(X_cat, y, kernel_class, method_class, params_kernel, reg_vals)
def main_rendu(accuracy_on_train_set=False): ls_kernel = [ kernels.MismatchKernel(12, 2, 4, False), kernels.MismatchKernel(12, 2, 4, True), kernels.MismatchKernel(9, 2, 4, False) ] ls_reg_val = [100 * 0.03162277660168379, .1, 1000 * 0.03162277660168379] ls_methods = [ methods.KernelRidgeRegression(ls_kernel[i], reg_val=ls_reg_val[i]) for i in range(3) ] for i in range(3): print("##################", f"i={i}") # X = read_write.read_X100(f"data/Xtr{i}_mat100.csv") X = read_write.read(f"data/Xtr{i}.csv") # print(X.shape) # X_cat = np.concatenate((X, X), axis=-1) # X_test = read_write.read_X100(f"data/Xte{i}_mat100.csv") X_test = read_write.read(f"data/Xte{i}.csv") # X_test_cat = np.concatenate((X_test, X_test), axis=-1) y = read_write.read_labels(f"data/Ytr{i}.csv") # X_cat = np.concatenate((X, X), axis=-1) # X_test_cat = np.concatenate((X_test, X_test), axis=-1) ls_methods[i].learn(X, y) # FOR ACCURACY ON TRAINING SET if accuracy_on_train_set: y_pred = ls_methods[i].predict(X) print(methods.accuracy(y, y_pred)) y_test = ls_methods[i].predict(X_test) read_write.write(y_test, "predictions/Yte.csv", offset=i * 1000, append=(i != 0))
def get_kernel(conf) -> kernels.Kernel: all_kernels = conf.kernels.values_() list_kernels = [] list_coefs = [] for k in range(len(all_kernels)): kernel = all_kernels[k]["name"] if not conf.mkl: coef = conf.coefs[k] assert coef >= 0, f"Coefficient for kernel {k + 1} must be positive." list_coefs.append(coef) kernel_conf = conf.kernels[k] assert kernel in ['onehot', 'spectrum', "mismatch", "substring", "local-alignment"], "Unknown requested kernel." if kernel == "spectrum": default_args = {"length": 3} default_args.update(kernel_conf.args.values_()) kernel = kernels.SpectrumKernel(conf.memoize, default_args['length']) elif kernel == "mismatch": default_args = {"k": 3, "m": 2} default_args.update(kernel_conf.args.values_()) kernel = kernels.MismatchKernel(conf.memoize, default_args['k'], default_args['m']) elif kernel == "substring": default_args = {"length": 3, "lambda_decay": 0.05} default_args.update(kernel_conf.args.values_()) kernel = kernels.SubstringKernel(conf.memoize, default_args['length']) elif kernel == "local-alignment": default_args = {"beta": 0.05, "d": 1, "e": 11} default_args.update(kernel_conf.args.values_()) kernel = kernels.LocalAlignmentKernel(conf.memoize, default_args['beta'], default_args['d'], default_args['e']) else: kernel = kernels.OneHotKernel(conf.memoize) kernel.set_args(kernel_conf.type, kernel_conf.gamma, kernel_conf.degree, kernel_conf.r) list_kernels.append(kernel) if conf.mkl: kernel = kernels.SimpleMKL(conf.memoize, list_kernels) else: kernel = kernels.SumKernel(conf.memoize, list_kernels, list_coefs) kernel.set_args(normalize=conf.normalize) return kernel
TEST_METHODS = [ methods.KernelRidgeRegression, methods.KernelLogisticRegression, methods.SVM ] TEST_KERNELS = [ (kernels.Linear(), .1), (kernels.Gaussian(.1), .1), (kernels.Polynomial(degree=3), .1) ] TEST_SEQ_KERNELS = [ # kernels.SpectrumKernel(1), # kernels.SpectrumKernel(2), kernels.MismatchKernel(3, 1, A) ] @pytest.mark.parametrize('method', TEST_METHODS) @pytest.mark.parametrize('kernel_with_reg', TEST_KERNELS) def test_methods(method, kernel_with_reg, x_train, y_train, x_test, y_test): kernel, reg = kernel_with_reg kernel.normalize = True meth = method(kernel, reg_val=reg) meth.learn(x_train, y_train) y_est = meth.predict(x_train) np.testing.assert_equal(y_est, y_train) y_est = meth.predict(x_test) np.testing.assert_equal(y_est, y_test)