def test_general(file, func): data = load_data.load_from_file(file) X, y = data[:, :-1], data[:, -1:] print( '\n\n===================================== test ex1data2 =====================================' ) print( '------------------------------- regression with momentum---------------------------------------' ) X, mu, sigma = normalize.standard_deviation(X) X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) theta = np.zeros((X.shape[1], 1)) theta_test = normal_eqn(X, y) data = { 'alpha': [ 0.4, ], 'lambda': 1, 'reg_cost': reg.ridge_cost, 'reg_grad': reg.ridge_grad } theta, J_history = func(X, y, theta, linear_grad, optimizer_data=data, num_iter=1000, batch=30, optimizer=opt.simple, cost=linear_cost) print( f'theta={[float(t) for t in theta]}\n real={[float(t) for t in theta_test]}\n' ) print( f'cost={linear_cost(X, y, theta, reg.ridge_cost)}\nreal={linear_cost(X, y, theta_test, reg.ridge_cost)}' ) # predict x = np.array([1650, 3], dtype=np.float64) x = (x - mu) / sigma x = np.insert(x, 0, [ 1, ]) print(f'price={float(x @ theta)}, should be 293081.464335') print( 'p=', predict(theta, [1650, 3], data={ 'normalize': normalize.standard_deviation, 'mu': mu, 'sigma': sigma })) # plot plt.plot(range(len(J_history)), J_history) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show()
def test_ex2data2(): print( '\n\n===================================== test ex2data2 =====================================' ) print( '------------------------------- regression with classification------------------------------' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex2data2.txt') np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] p = np.arange(36) X = poly_feature(X, p) X, mu, sigma = normalize.standard_deviation(X) X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) theta = np.zeros((X.shape[1], 1)) # print(X) # theta = np.array([-24, 0.2, 0.2]).reshape((X.shape[1], 1)) # print(theta.shape) # theta = np.array([-25.161, 0.206, 0.201]).reshape((X.shape[1], 1)) # print(class_cost(X, y, theta)) # print(class_grad(X, y, theta)) # theta = np.array([-25.06116393, 0.2054152, 0.2006545]).reshape((X.shape[1], 1)) data_opt = { 'alpha': [ 0.000001, ], 'lambda': 1, 'reg_cost': reg.ridge_cost, 'reg_grad': reg.ridge_grad } # data_opt = {'alpha': 0.0001} theta, J = regression(X, y, theta, class_grad, num_iter=100, cost=class_cost, optimizer=opt.adam, batch=X.shape[0], optimizer_data=data_opt) # , optimizer_data=data_opt print(theta) print('cost=', J[-1:]) print('accuracy=', np.mean(np.round(sigmoid(X, theta)) == y)) # predict # x = poly_feature(np.array([45, 85]), p) # x = np.insert(x, 0, [1, ]) # x[1:] = (x[1:] - mu) / sigma # p = (1 / (1 + np.exp(-x @ theta))) # print(p) # plot plt.plot(range(len(J[-30:])), J[-30:]) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show()
def test_seeds_one_vs_one(): print( '\n\n===================================== test seeds =====================================' ) data = load_data.load_from_file( '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t') np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) one_vs_one(X, y)
def test_ex1data1(): print( '===================================== test ex1data1 =====================================' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex1data1.txt') X, y = np.insert(data[:, :-1], 0, np.ones((data[:, :-1].shape[0]), dtype=data.dtype), axis=1), data[:, -1:] print( '\n------------------------------- iter on ex1data1.txt ---------------------------------------' ) theta = np.zeros((X.shape[1], 1)) # print(theta.shape) print(f'cost={linear_cost(X, y, theta, 0)} should be 32.072733877455676') theta = normal_eqn(X, y) print( f'theta={[float(t) for t in theta]} should be [-3.89578088, 1.19303364]' ) print(f'cost={linear_cost(X, y, theta, 0)} should be 4.476971375975179 ') print('mean theta error iter=', np.mean(np.abs(h_theta(X, theta) - y)), 'should be 2.1942453988270043') # print('error in octave=', np.mean(np.abs(h_theta(X, np.array([-3.6303, 1.1664])) - y))) # print('predict in octave=',h_theta(np.array([1, 7]), np.array([-3.6303, 1.1664])) * 10000) print( '\n------------------------------- regression ---------------------------------------' ) theta = np.zeros((X.shape[1], 1)) theta, J_history = regression(X, y, theta, linear_grad, optimizer_data={'alpha': [ 1e-2, ]}, num_iter=1000, batch=X.shape[0], cost=linear_cost) print(f'theta={[float(t) for t in theta]}') print(f'cost={linear_cost(X, y, theta)}') plt.plot(range(len(J_history)), J_history) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show()
def test_seeds_softmax(): print( '\n\n===================================== test seeds =====================================' ) data = load_data.load_from_file( '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t') np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) classes = np.array(np.unique(y), dtype=np.uint8) # K = np.arange(classes.shape[0]) # K_dict = {clas: k for clas, k in zip(classes, K)} # print(y.dtype) # y_ = np.array([K_dict[item] for item in y.reshape(y.shape[0], )], dtype=y.dtype) # print(y.dtype) softmax(X, y)
def test_ex2data1(): print( '\n\n===================================== test ex1data2 =====================================' ) print( '------------------------------- regression with classification------------------------------' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex2data1.txt') np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] # X, mu, sigma = normalize.standard_deviation(X) X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) theta = np.zeros((X.shape[1], 1)) # theta = np.array([-24, 0.2, 0.2]).reshape((X.shape[1], 1)) # print(theta.shape) # theta = np.array([-25.161, 0.206, 0.201]).reshape((X.shape[1], 1)) # print(class_cost(X, y, theta)) # print(class_grad(X, y, theta)) theta = np.array([-25.06116393, 0.2054152, 0.2006545]).reshape( (X.shape[1], 1)) theta, J = regression(X, y, theta, class_grad, optimizer_data={'alpha': [ 0.0000002, ]}, num_iter=10000, cost=class_cost, optimizer=opt.momentum, batch=X.shape[0]) print(theta) print(J[-1:]) print(np.mean(np.round(sigmoid(X, theta)) == y)) # predict x = np.array([1, 45, 85]) # x[1:] = (x[1:] - mu) / sigma p = (1 / (1 + np.exp(-x @ theta))) print(p)
def test_seeds(): print( '\n\n===================================== test seeds =====================================' ) data = load_data.load_from_file( '/home/bb/Downloads/data/seeds_dataset.txt', delime='\t') np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) k = np.array(np.unique(y), dtype=np.uint8) Y = (y == k) # print(k.dtype, y.dtype) print( '------------------------------- classification k-classes ------------------------------' ) theta = np.zeros((X.shape[1], k.shape[0])) # theta = np.array([[-1.6829658687213866, -1.8543754265161039, 0.7754343585719218], # [-0.9215084632150828, 4.521276688115852, -4.004501938604477], # [2.648370316571783, -3.313060949916832, 0.16063859934981728], # [-0.05307786295000424, -2.4615062595012542, 1.3240942639424873], # [16.15559358997315, -11.585899548679317, -4.528678388339358], # [0.48474185667614517, -5.932838971987173, 4.974046769850393], # [-1.0259316117512303, 0.8043956856099687, 1.2596367701574402], # [-20.999903530911624, 11.891520812942074, 10.235602620923723]] # ) data = { 'alpha': [0.002], 'cost': class_cost, 'grad': class_grad, 'reg_cost': reg.ridge_cost, 'reg_grad': reg.ridge_grad, 'compute_alpha': opt.compute_alpha_simple, 'beta': 0.9, 'beta1': 0.9, 'beta2': 0.99, 'beta_t': np.array([0.9, 0.99]), 'compute_beta_t': opt.square_beta, 'epsilon': 10e-9, 'lambda': 1, 'const': 10e+12, 'limit_class': 0.5 } theta, J = regression(X, Y, theta, class_grad, cost=class_cost, num_iter=100, optimizer_data=data, optimizer=opt.adam_w, batch=X.shape[0]) print(J[0], J[-1:]) print(theta.tolist()) # plot plt.plot(range(len(J)), J) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show() # print error res = np.array( (np.round(np.argmax(sigmoid(X, theta), axis=1) + 1))).reshape( (y.shape)) == y print(np.mean(res)) print('accuracy=', np.mean(np.round(sigmoid(X, theta)) == Y))
def test_stars(): data = load_data.load_from_file( '/home/bb/Downloads/data/archive2/6 class ready.csv')
def test_ex1data2(): print( '\n\n===================================== test ex1data2 =====================================' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex1data2.txt') X, y = np.insert(data[:, :-1], 0, np.ones((data[:, :-1].shape[0]), dtype=data.dtype), axis=1), data[:, -1:] print( '\n------------------------------- normal_eqn ---------------------------------------' ) theta = np.zeros((X.shape[1], 1)) print(f'cost={linear_cost(X, y, theta)}') theta = normal_eqn(X, y) print(f'theta={[float(t) for t in theta]} should be ]') print( f'price={float(np.array([1, 1650, 3]) @ theta)}, should be 293081.464335' ) print(f'cost={linear_cost(X, y, theta)}') print( '\n------------------------------- regression with std normalize ---------------------------------------' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex1data2.txt') X, y = data[:, :-1], data[:, -1:] X, mu, sigma = normalize.standard_deviation(X) X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) theta, J_history = regression(X, y, theta, linear_grad, optimizer_data={' alpha': 0.01}, num_iter=10000, batch=X.shape[0], optimizer=opt.simple, cost=linear_cost) theta_test = normal_eqn(X, y) print( f'theta={[float(t) for t in theta]}\n real={[float(t) for t in theta_test]}\n' ) print( f'cost={linear_cost(X, y, theta)}\nreal={linear_cost(X, y, theta_test)}' ) # predict x = np.array([1, 1650, 3], dtype=np.float64) x[1:] = (x[1:] - mu) / sigma print(f'price={float(x @ theta)}, should be 293081.464335') plt.plot(range(len(J_history)), J_history) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show() print( '\n------------------------------- regression with simple normalize ---------------------------------------' ) data = load_data.load_from_file( '/home/bb/Documents/python/ML/data/ex1data2.txt') X, y = data[:, :-1], data[:, -1:] X, max_, min_ = normalize.simple_normalize(X) X = np.insert(X, 0, np.ones((X.shape[0]), dtype=X.dtype), axis=1) theta = np.zeros((X.shape[1], 1)) theta, J_history = regression(X, y, theta, linear_grad, optimizer_data={'alpha': [ 10e-1, ]}, num_iter=10000, batch=X.shape[0], cost=linear_cost) print( f'theta={[float(t) for t in theta]}\nreal= {[float(t) for t in normal_eqn(X, y)]}' ) # predict x = np.array([1650, 3], dtype=np.float64) x = (x - min_) / max_ x = np.insert(x, 0, [ 1, ], axis=0) print(f'price={x @ theta}, should be 293081.464335') plt.plot(range(len(J_history)), J_history) plt.xlabel(xlabel='iter number') plt.ylabel(ylabel='cost') plt.title('regression') plt.show()
:return: X_groups: list of X u: vector of mean(X) for each feature sigma: matrix 3D with correlations matrix for each feature :efficiency: O(k*(m*n+n^3+m*n^2+m*n^2)) ~ O(k*m*n^2) """ m, n, k = X.shape[0] if len(X.shape) > 1 else 1, X.shape[0] if len(X.shape) == 1 else X.shape[1], u.shape[0] # if n == 1: # u, sigma = np.zeros((m, n)) + u.reshape(-1), np.zeros((m, n)) + sigma.reshape(-1) X = X.reshape((m, -1)) M = np.zeros((m, k)) for i in range(k): M[:, i] = np.exp(-0.5 * np.sum(((X - u[i]) @ np.linalg.pinv(sigma[i])) * (X - u[i]), axis=1)) M[:, i] /= (((2 * np.pi) ** (n / 2)) * np.sqrt(np.linalg.det(sigma[i]))) # (1/(2*n)) return M if __name__ == '__main__': print('\n\n===================================== test ex1data2 =====================================') data = load_data.load_from_file('/home/bb/Documents/python/ML/data/ex2data1.txt') # np.random.shuffle(data) X, y = data[:, :-1], data[:, -1:] # f(X[:, 0], y) X_group, u, sigma = gaussian_bayes(X, y) # print((X_group[0]).shape) p = predict_gaussian_bayes(X_group[0][0:6], u, sigma) # print(p) print(np.mean(np.argmax(p, axis=1) == y))