def do_pca(X, method='svd'): n_samples, n_features = X.shape pca = ml.features.PrincipalComponentAnalysis(fraction=1, method=method) pca.learn(X) evals, evecs = pca.eigenvalues, pca.eigenvectors assert (evals[1:] - evals[:-1] <= 0.).all() assert testing.almost_equal(np.array([np.dot(evecs[:, i], evecs[:, i]) for i in range(n_features)]), 1., rtol=1.e-6) assert testing.almost_equal(np.array([np.dot(evecs[:, i], evecs[:, i + 1]) for i in range(n_features - 1)]), 0., atol=1.e-6) Xpc = pca.transform(X) assert testing.almost_equal(X, pca.invert(Xpc), rtol=1.e-6, atol=1.e-6) pca.n_components = 2 Xpc = pca.transform(X) assert tuple(Xpc.shape) == (len(X), 2) pca.fraction_explained = pca.explained_variance[0] Xpc = pca.transform(X) assert tuple(Xpc.shape) == (len(X), 1)
def test_lp_norm(): l1norm = lp_norm(p=1, axis=0, root=True) l2norm = lp_norm(p=2, axis=0, root=True) l3norm = lp_norm(p=3, axis=0, root=True) maxnorm = lp_norm(p='inf', axis=0, root=True) x = np.array((0., 0., 0)) assert almost_equal(l1norm(x), 0.) assert almost_equal(l2norm(x), 0.) assert almost_equal(l3norm(x), 0.) assert almost_equal(maxnorm(x), 0.) x = np.array((-120.5, 32.1, -0.8)) assert almost_equal(l1norm(x / l1norm(x)), 1.) assert almost_equal(l2norm(x / l2norm(x)), 1.) assert almost_equal(l3norm(x / l3norm(x)), 1.) assert almost_equal(maxnorm(x / maxnorm(x)), 1.) y = np.array((12.3, 5.1, 592.6)) assert l1norm(x) + l1norm(y) >= l1norm(x + y) > 0. assert l2norm(x) + l2norm(y) >= l2norm(x + y) > 0. assert l3norm(x) + l3norm(y) >= l3norm(x + y) > 0. assert maxnorm(x) + maxnorm(y) >= maxnorm(x + y) > 0. norm = lp_norm(p=2, axis=1) x = np.eye(3) x[0, 1] = - 2 assert almost_equal(norm(x), (np.sqrt(5), 1., 1.))
def test_standard(): std = ml.features.Standard() X = make_data() std.learn(X) X_std = std.transform(X) assert testing.almost_equal(np.mean(X_std), 0., atol=1.e-6) assert testing.almost_equal(np.std(X_std), 1., rtol=1.e-6) X_std_inv = std.invert(X_std) assert testing.almost_equal(np.mean(X_std_inv), np.mean(X), atol=1.e-6) assert testing.almost_equal(np.std(X_std_inv), np.std(X), rtol=1.e-6)
def kmeans_clustering(X, k=None, centrinit=None, centroids=None, init='++', atol=0.2, rtol=0.2, title='test'): kmeans = ml.KMeansCluster(init=init) kmeans.learn(X, k=k, centroids=centrinit) err_msg = ("\n{0}\ncentroids = {1}\n{2}\ntraining stats: {3}\ninfo: {4}" .format(title, centroids, kmeans, kmeans.training_stats, kmeans._training_info)) assert kmeans.converged, ("Failed to converge.\n" + err_msg + plot_clusters(X, kmeans, title=title)) if centroids is not None: k = len(centroids) for i in range(k): inearest, dnearest = - 1, - 1. for j in range(k): d2 = np.sum((kmeans.centroids[j] - centroids[i]) ** 2) if dnearest < 0 or d2 < dnearest: inearest, dnearest = j, d2 assert almost_equal(centroids[i], kmeans.centroids[inearest], atol=atol, rtol=rtol), \ ("Failed to find the centroids.\n" + err_msg + plot_clusters(X, kmeans, title=title))
def test_gradient_descent(): # dummy training set X = np.random.normal(size=(4, 2)) y = np.random.normal(size=4) # gradient descent object alpha = 0.15 gd = opt.GradientDescent(alpha=alpha, atol=1.0e-9, rtol=1.0e-4, nstepmax=100, nminibatch=2) for method in ("batch", "minibatch", "stochastic"): for adapt in (False, True): gd.settings(method=method, adapt=adapt) theta = np.random.normal(loc=(0, 1, -1), scale=1, size=3) obj = ParaboloidObjective(theta) theta_guess = np.random.normal(scale=0.1, size=3) theta_hat, info = gd.minimize(obj, theta_guess, X, y) obj_min = obj.J(theta_hat, X, y) assert info["converged"], ( "Gradient descent has failed to converge." + "\ninfo: {0}".format(info) + "\n{0}".format(str(gd)) + "\n{0}".format(obj) ) assert testing.almost_equal(obj_min, 0.0, rtol=1.0e-3, atol=1.0e-4), ( "Minimum is not 0: {0}".format(obj_min) + "\ninfo: {0}".format(info) + "\n{0}".format(str(gd)) + "\n{0}".format(obj) ) assert testing.almost_equal(np.dot(theta, theta_hat), np.dot(theta, theta), rtol=1.0e-3, atol=1.0e-4), ( "Gradient descent returned {0} instead of {1}.".format(theta, theta_hat) + "\ninfo: {0}".format(info) + "\n{0}".format(str(gd)) + "\n{0}".format(obj) ) try: gd.settings(alpha=-1) except ValueError: assert True else: assert False, "'settings(alpha=-1)' should have raised a ValueError." assert gd.settings()["alpha"] == alpha, "'alpha' ({0}) should be {1}.".format(gd.settings()["alpha"], alpha) assert str(gd).find("alpha") > 0, "bad string conversion:\n{0}".format(str(gd))
def test_confusion(): y = (0, 1, 1, 1, 0) y_hat = (0, 1, 1, 0, 0) conf = confusion(y, y_hat, c=1) assert (conf['falsepos'] == 0. and conf['truepos'] == 0.4 and conf['falseneg'] == 0.2 and conf['trueneg'] == 0.4) assert almost_equal(conf['precision'], 1.) assert almost_equal(conf['recall'], 0.4 / 0.6) assert almost_equal(conf['specificity'], 1.) assert almost_equal(conf['accuracy'], 0.8) try: conf = confusion(y, y_hat, c=2) except ValueError: assert True else: assert False, "An error should be raised when 'c' is not in 'y'"
def test_kfold_cv(): X = np.linspace(-1, 1, 50) y = 1 - 2 * X + 1.e-6 * np.random.normal(size=len(X)) cv = kfold_cross_validation(ml.LinearRegress(), X, y, k=10) assert almost_equal(cv, 0., atol=1.e-5), "cv={0}".format(cv) try: cv = kfold_cross_validation(ml.KMeansCluster(), X, y, k=10) except TypeError: assert True else: assert False, ("'kfold_cross_validation' should raise an error when " + "called with an unsupervised predictor.")