示例#1
0
def test_general(file, func):
    data = load_data.load_from_file(file)
    X, y = data[:, :-1], data[:, -1:]

    print(
        '\n\n===================================== test ex1data2 ====================================='
    )
    print(
        '-------------------------------  regression with momentum---------------------------------------'
    )
    X, mu, sigma = normalize.standard_deviation(X)
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    theta = np.zeros((X.shape[1], 1))
    theta_test = normal_eqn(X, y)
    data = {
        'alpha': [
            0.4,
        ],
        'lambda': 1,
        'reg_cost': reg.ridge_cost,
        'reg_grad': reg.ridge_grad
    }
    theta, J_history = func(X,
                            y,
                            theta,
                            linear_grad,
                            optimizer_data=data,
                            num_iter=1000,
                            batch=30,
                            optimizer=opt.simple,
                            cost=linear_cost)
    print(
        f'theta={[float(t) for t in theta]}\n real={[float(t) for t in theta_test]}\n'
    )
    print(
        f'cost={linear_cost(X, y, theta, reg.ridge_cost)}\nreal={linear_cost(X, y, theta_test, reg.ridge_cost)}'
    )

    # predict
    x = np.array([1650, 3], dtype=np.float64)
    x = (x - mu) / sigma
    x = np.insert(x, 0, [
        1,
    ])
    print(f'price={float(x @ theta)}, should be 293081.464335')
    print(
        'p=',
        predict(theta, [1650, 3],
                data={
                    'normalize': normalize.standard_deviation,
                    'mu': mu,
                    'sigma': sigma
                }))

    # plot
    plt.plot(range(len(J_history)), J_history)
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()
示例#2
0
def test_ex2data2():
    print(
        '\n\n===================================== test ex2data2 ====================================='
    )
    print(
        '-------------------------------  regression with classification------------------------------'
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex2data2.txt')
    np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    p = np.arange(36)
    X = poly_feature(X, p)
    X, mu, sigma = normalize.standard_deviation(X)
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    theta = np.zeros((X.shape[1], 1))
    # print(X)
    # theta = np.array([-24, 0.2, 0.2]).reshape((X.shape[1], 1))
    # print(theta.shape)
    # theta = np.array([-25.161, 0.206, 0.201]).reshape((X.shape[1], 1))
    # print(class_cost(X, y, theta))
    # print(class_grad(X, y, theta))
    # theta = np.array([-25.06116393, 0.2054152, 0.2006545]).reshape((X.shape[1], 1))
    data_opt = {
        'alpha': [
            0.000001,
        ],
        'lambda': 1,
        'reg_cost': reg.ridge_cost,
        'reg_grad': reg.ridge_grad
    }
    # data_opt = {'alpha': 0.0001}
    theta, J = regression(X,
                          y,
                          theta,
                          class_grad,
                          num_iter=100,
                          cost=class_cost,
                          optimizer=opt.adam,
                          batch=X.shape[0],
                          optimizer_data=data_opt)  # , optimizer_data=data_opt
    print(theta)
    print('cost=', J[-1:])
    print('accuracy=', np.mean(np.round(sigmoid(X, theta)) == y))

    # predict
    # x = poly_feature(np.array([45, 85]), p)
    # x = np.insert(x, 0, [1, ])
    # x[1:] = (x[1:] - mu) / sigma
    # p = (1 / (1 + np.exp(-x @ theta)))
    # print(p)

    # plot
    plt.plot(range(len(J[-30:])), J[-30:])
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()
示例#3
0
def test_seeds_one_vs_one():
    print(
        '\n\n===================================== test seeds ====================================='
    )
    data = load_data.load_from_file(
        '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t')
    np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    one_vs_one(X, y)
示例#4
0
def test_ex1data1():
    print(
        '===================================== test ex1data1 ====================================='
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex1data1.txt')
    X, y = np.insert(data[:, :-1],
                     0,
                     np.ones((data[:, :-1].shape[0]), dtype=data.dtype),
                     axis=1), data[:, -1:]

    print(
        '\n-------------------------------  iter on ex1data1.txt  ---------------------------------------'
    )
    theta = np.zeros((X.shape[1], 1))
    # print(theta.shape)
    print(f'cost={linear_cost(X, y, theta, 0)} should be 32.072733877455676')
    theta = normal_eqn(X, y)
    print(
        f'theta={[float(t) for t in theta]} should be [-3.89578088, 1.19303364]'
    )
    print(f'cost={linear_cost(X, y, theta, 0)} should be 4.476971375975179 ')
    print('mean theta error iter=', np.mean(np.abs(h_theta(X, theta) - y)),
          'should be 2.1942453988270043')
    # print('error in octave=', np.mean(np.abs(h_theta(X, np.array([-3.6303, 1.1664])) - y)))
    # print('predict in octave=',h_theta(np.array([1, 7]), np.array([-3.6303, 1.1664])) * 10000)

    print(
        '\n-------------------------------  regression  ---------------------------------------'
    )
    theta = np.zeros((X.shape[1], 1))
    theta, J_history = regression(X,
                                  y,
                                  theta,
                                  linear_grad,
                                  optimizer_data={'alpha': [
                                      1e-2,
                                  ]},
                                  num_iter=1000,
                                  batch=X.shape[0],
                                  cost=linear_cost)
    print(f'theta={[float(t) for t in theta]}')
    print(f'cost={linear_cost(X, y, theta)}')

    plt.plot(range(len(J_history)), J_history)
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()
示例#5
0
def test_seeds_softmax():
    print(
        '\n\n===================================== test seeds ====================================='
    )
    data = load_data.load_from_file(
        '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t')
    np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)

    classes = np.array(np.unique(y), dtype=np.uint8)
    # K = np.arange(classes.shape[0])
    # K_dict = {clas: k for clas, k in zip(classes, K)}
    # print(y.dtype)
    # y_ = np.array([K_dict[item] for item in y.reshape(y.shape[0], )], dtype=y.dtype)
    # print(y.dtype)
    softmax(X, y)
示例#6
0
def test_ex2data1():
    print(
        '\n\n===================================== test ex1data2 ====================================='
    )
    print(
        '-------------------------------  regression with classification------------------------------'
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex2data1.txt')
    np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    # X, mu, sigma = normalize.standard_deviation(X)
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    theta = np.zeros((X.shape[1], 1))
    # theta = np.array([-24, 0.2, 0.2]).reshape((X.shape[1], 1))
    # print(theta.shape)
    # theta = np.array([-25.161, 0.206, 0.201]).reshape((X.shape[1], 1))
    # print(class_cost(X, y, theta))
    # print(class_grad(X, y, theta))
    theta = np.array([-25.06116393, 0.2054152, 0.2006545]).reshape(
        (X.shape[1], 1))
    theta, J = regression(X,
                          y,
                          theta,
                          class_grad,
                          optimizer_data={'alpha': [
                              0.0000002,
                          ]},
                          num_iter=10000,
                          cost=class_cost,
                          optimizer=opt.momentum,
                          batch=X.shape[0])
    print(theta)
    print(J[-1:])
    print(np.mean(np.round(sigmoid(X, theta)) == y))

    # predict
    x = np.array([1, 45, 85])
    # x[1:] = (x[1:] - mu) / sigma
    p = (1 / (1 + np.exp(-x @ theta)))
    print(p)
示例#7
0
def test_seeds():
    print(
        '\n\n===================================== test seeds ====================================='
    )
    data = load_data.load_from_file(
        '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t')
    np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    k = np.array(np.unique(y), dtype=np.uint8)
    Y = (y == k)
    # print(k.dtype, y.dtype)

    print(
        '-------------------------------  classification k-classes  ------------------------------'
    )
    theta = np.zeros((X.shape[1], k.shape[0]))
    # theta = np.array([[-1.6829658687213866, -1.8543754265161039, 0.7754343585719218],
    #                   [-0.9215084632150828, 4.521276688115852, -4.004501938604477],
    #                   [2.648370316571783, -3.313060949916832, 0.16063859934981728],
    #                   [-0.05307786295000424, -2.4615062595012542, 1.3240942639424873],
    #                   [16.15559358997315, -11.585899548679317, -4.528678388339358],
    #                   [0.48474185667614517, -5.932838971987173, 4.974046769850393],
    #                   [-1.0259316117512303, 0.8043956856099687, 1.2596367701574402],
    #                   [-20.999903530911624, 11.891520812942074, 10.235602620923723]]
    #                  )

    data = {
        'alpha': [0.002],
        'cost': class_cost,
        'grad': class_grad,
        'reg_cost': reg.ridge_cost,
        'reg_grad': reg.ridge_grad,
        'compute_alpha': opt.compute_alpha_simple,
        'beta': 0.9,
        'beta1': 0.9,
        'beta2': 0.99,
        'beta_t': np.array([0.9, 0.99]),
        'compute_beta_t': opt.square_beta,
        'epsilon': 10e-9,
        'lambda': 1,
        'const': 10e+12,
        'limit_class': 0.5
    }
    theta, J = regression(X,
                          Y,
                          theta,
                          class_grad,
                          cost=class_cost,
                          num_iter=100,
                          optimizer_data=data,
                          optimizer=opt.adam_w,
                          batch=X.shape[0])
    print(J[0], J[-1:])
    print(theta.tolist())

    # plot
    plt.plot(range(len(J)), J)
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()

    # print error
    res = np.array(
        (np.round(np.argmax(sigmoid(X, theta), axis=1) + 1))).reshape(
            (y.shape)) == y
    print(np.mean(res))
    print('accuracy=', np.mean(np.round(sigmoid(X, theta)) == Y))
示例#8
0
def test_stars():
    data = load_data.load_from_file(
        '/home/bb/Downloads/data/archive2/6 class ready.csv')
示例#9
0
def test_ex1data2():
    print(
        '\n\n===================================== test ex1data2 ====================================='
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex1data2.txt')
    X, y = np.insert(data[:, :-1],
                     0,
                     np.ones((data[:, :-1].shape[0]), dtype=data.dtype),
                     axis=1), data[:, -1:]

    print(
        '\n-------------------------------  normal_eqn  ---------------------------------------'
    )
    theta = np.zeros((X.shape[1], 1))
    print(f'cost={linear_cost(X, y, theta)}')
    theta = normal_eqn(X, y)
    print(f'theta={[float(t) for t in theta]} should be ]')
    print(
        f'price={float(np.array([1, 1650, 3]) @ theta)}, should be 293081.464335'
    )
    print(f'cost={linear_cost(X, y, theta)}')
    print(
        '\n-------------------------------  regression with std normalize ---------------------------------------'
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex1data2.txt')
    X, y = data[:, :-1], data[:, -1:]
    X, mu, sigma = normalize.standard_deviation(X)
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    theta, J_history = regression(X,
                                  y,
                                  theta,
                                  linear_grad,
                                  optimizer_data={' alpha': 0.01},
                                  num_iter=10000,
                                  batch=X.shape[0],
                                  optimizer=opt.simple,
                                  cost=linear_cost)
    theta_test = normal_eqn(X, y)
    print(
        f'theta={[float(t) for t in theta]}\n real={[float(t) for t in theta_test]}\n'
    )
    print(
        f'cost={linear_cost(X, y, theta)}\nreal={linear_cost(X, y, theta_test)}'
    )
    # predict
    x = np.array([1, 1650, 3], dtype=np.float64)
    x[1:] = (x[1:] - mu) / sigma
    print(f'price={float(x @ theta)}, should be 293081.464335')

    plt.plot(range(len(J_history)), J_history)
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()

    print(
        '\n-------------------------------  regression with simple normalize ---------------------------------------'
    )
    data = load_data.load_from_file(
        '/home/bb/Documents/python/ML/data/ex1data2.txt')
    X, y = data[:, :-1], data[:, -1:]
    X, max_, min_ = normalize.simple_normalize(X)
    X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1)
    theta = np.zeros((X.shape[1], 1))
    theta, J_history = regression(X,
                                  y,
                                  theta,
                                  linear_grad,
                                  optimizer_data={'alpha': [
                                      10e-1,
                                  ]},
                                  num_iter=10000,
                                  batch=X.shape[0],
                                  cost=linear_cost)
    print(
        f'theta={[float(t) for t in theta]}\nreal= {[float(t) for t in normal_eqn(X, y)]}'
    )
    # predict
    x = np.array([1650, 3], dtype=np.float64)
    x = (x - min_) / max_
    x = np.insert(x, 0, [
        1,
    ], axis=0)
    print(f'price={x @ theta}, should be 293081.464335')

    plt.plot(range(len(J_history)), J_history)
    plt.xlabel(xlabel='iter number')
    plt.ylabel(ylabel='cost')
    plt.title('regression')
    plt.show()
示例#10
0
    :return:
        X_groups: list of X
        u: vector of mean(X) for each feature
        sigma: matrix 3D with correlations matrix for each feature

    :efficiency: O(k*(m*n+n^3+m*n^2+m*n^2)) ~ O(k*m*n^2)
    """
    m, n, k = X.shape[0] if len(X.shape) > 1 else 1, X.shape[0] if len(X.shape) == 1 else X.shape[1], u.shape[0]
    # if n == 1:
    #     u, sigma = np.zeros((m, n)) + u.reshape(-1), np.zeros((m, n)) + sigma.reshape(-1)
    X = X.reshape((m, -1))
    M = np.zeros((m, k))
    for i in range(k):
        M[:, i] = np.exp(-0.5 * np.sum(((X - u[i]) @ np.linalg.pinv(sigma[i])) * (X - u[i]), axis=1))
        M[:, i] /= (((2 * np.pi) ** (n / 2)) * np.sqrt(np.linalg.det(sigma[i])))  # (1/(2*n))
    return M


if __name__ == '__main__':
    print('\n\n===================================== test ex1data2 =====================================')
    data = load_data.load_from_file('/home/bb/Documents/python/ML/data/ex2data1.txt')
    # np.random.shuffle(data)
    X, y = data[:, :-1], data[:, -1:]
    # f(X[:, 0], y)
    X_group, u, sigma = gaussian_bayes(X, y)
    # print((X_group[0]).shape)
    p = predict_gaussian_bayes(X_group[0][0:6], u, sigma)
    # print(p)
    print(np.mean(np.argmax(p, axis=1) == y))