def computeDecisionBoundary(self, w_full, lam, stepSize=0.01, maxFunctionCalls=10000, printSummary=True, plot=False, plotIter=False, useGradientCriterion=False, tol=1e-4): gd = self.constructGradientDescentObject(lam) gd.stepSize = stepSize storeIterValues=False if plotIter: storeIterValues=True sol = gd.computeMin(w_full, maxFunctionCalls=maxFunctionCalls, printSummary=printSummary, storeIterValues=storeIterValues, tol=tol, useGradientCriterion=useGradientCriterion) w_star = sol[0]; w_star_normalized = 1/np.linalg.norm(w_star)*w_star if printSummary: print "--- Classification Summary ---" print "w_full = " + str(w_star) print "w_full normalized = " + str(w_star_normalized) print "norm of w_full = " + str(np.linalg.norm(w_star)) print "lambda = " + str(lam) self.classificationErrorRate(w_star, verbose=True) print "------------------" print "" if plot: self.plotData(w_star) if plotIter: gd.plotIterValues() return w_star
def main(): optimizers = ["GD", "GD+", "GDM", "GDM+", "NAG", "NAG+", "Adam", "Adam+"] learning_rates = [0.01, 0.01, 0.015, 0.01, 0.006, 0.006, 0.0005, 0.0005] alphas = [0.0, 1e-4, 0.0, 1e-5, 0.0, 1e-6, 0.0, 1e-8] x_min, y_min = 3.0, 0.5 x_ini, y_ini = -2.0, -1.0 stats = { optimizer: { "x": None, "y": None, "eta_x": None, "eta_y": None } for optimizer in optimizers } n_iterations = 1000 # 10000 adam # 4400 nag # 2000 gdm 1200 gd for optimizer, learning_rate, alpha in zip(optimizers, learning_rates, alphas): print(optimizer) model = GradientDescent(f, x_ini, y_ini, n_iterations, learning_rate, alpha, optimizer) x_hist, y_hist, lr_x_hist, lr_y_hist = model.minimize() stats[optimizer]["x"] = x_hist stats[optimizer]["y"] = y_hist stats[optimizer]["eta_x"] = lr_x_hist stats[optimizer]["eta_y"] = lr_y_hist plot_gradient_descent(stats, f, x_min, y_min) plot_loss(stats, x_min, y_min) plot_learning_rates(stats)
def run( input_data_file): """ """ # parameter values intercept = 0. slope = 0. param_values = {"intercept": intercept, "slope": slope} # hypothesis object hypo = SimpleLinearRegression() hypo.initialize_parameters(param_values) # name of file where data is stored #file_name = 'data/input/ex1data1.txt' # read in data data = get_input_data(input_data_file) # extract features features = data[:, 0] # reshape to dimension nobs x 1 features = features.reshape((len(features), 1)) # extract yvalues yvalues = data[:, 1] # reshape to dimension nobs x 1 yvalues = yvalues.reshape((len(yvalues), 1)) # cost function object sel = SquaredErrorLoss(hypo, features, yvalues) gd = GradientDescent(.0001, param_values, .000000001, sel) gd.algorithm() print gd.get_parameters()
def fit(self, x, y, plot_cost=False): if hasattr(self, "scaler"): x = self.scaler.fit_transform(x) x = add_bias_feature(x) # one-vs-all for class_type in set(y): cur_y = np.ones(len(y)) cur_y[y != class_type] = 0 self.class_data[class_type] = {"y": cur_y} gd = GradientDescent(mode='logistic', track_cost=plot_cost, learning_rate=self.learning_rate, penalty=self.penalty, alpha=self.alpha, max_iterations=self.max_iterations) cur_theta = gd.find_theta(x, cur_y) self.class_data[class_type] = {"theta": cur_theta} if plot_cost: cost_x, cost_y = gd.get_cost_history() plt.plot(cost_x, cost_y, "r-") plt.title(F"Cost for class '{class_type}' (last value={gd.last_cost:0.6f})") plt.show()
def fit(self, x, y, plot_cost=False): x, y = check_X_y(x, y) if hasattr(self, "scaler"): x = self.scaler.fit_transform(x) x = add_bias_feature(x) gd = GradientDescent(mode='linear', track_cost=plot_cost, learning_rate=self.learning_rate, penalty=self.penalty, alpha=self.alpha, max_iterations=self.max_iterations) theta = gd.find_theta(x, y) if plot_cost: cost_x, cost_y = gd.get_cost_history() plt.plot(cost_x, cost_y, "r-") plt.title(F"Cost (last value={gd.last_cost:0.6f})") plt.show() self._theta = theta self.intercept_ = theta[0, 0] self.coef_ = theta[1:].reshape(len(theta) - 1) check_is_fitted(self, attributes=['intercept_', 'coef_']) return self
def test_batch(self): gd = GradientDescent(Parameters(), linear_regression_hypothesis) coefs = np.ones(2) x = np.array([[1, i] for i in range(10)]) y = np.array([2*i + 10 for i in range(10)]) gd.batch((x, y), coefs) self.assertTrue(np.all(np.greater(coefs, 1)))
def main(path: str, verbose: bool = False, separator: str = ',', number_of_target_feature: int = -1, file_with_params: str = "params.json", learning_rate: float = 0.01, epsilon: float = 10e-10) -> None: feature_arrays, target_values, columns, target_column = read_dataset( path, verbose=verbose, separator=separator, number_of_target_feature=number_of_target_feature ) gd = GradientDescent(verbose=verbose, columns=columns, target_column=target_column) gd.fit( feature_arrays, target_values, file_with_params=file_with_params, learning_rate=learning_rate, epsilon=epsilon )
def setUp(self): x = np.array([5, 2], float) y = np.array([0.25, 0, 1], float) self.examples = PreloadSource(([x], [y])) nnet = NetFactory.create_neural_net(sizes=[2, 3, 3]) nnet.randomize_parameters() self.nnet = nnet cost_func = QuadraticCost(nnet) self.grad_descent = GradientDescent(neural_net=nnet, cost_function=cost_func)
def test_regularization(self): derivatives = np.zeros(10) coefs = np.ones(10) parameters = Parameters(l2=2) derivatives_expected = np.array([0, 2, 2, 2, 2, 2, 2, 2, 2, 2]) gd = GradientDescent(parameters, linear_regression_hypothesis) gd.regularize(derivatives, coefs) np.testing.assert_array_equal(derivatives, derivatives_expected)
def test_shuffling(self): x = np.array(range(1000)) x_not_changed = x.copy() y = x.copy() parameters = Parameters(shuffle=True) gd = GradientDescent(parameters, linear_regression_hypothesis) x, y = gd.shuffle_data_set((x, y)) self.assertTrue(np.any(np.not_equal(x, x_not_changed))) np.testing.assert_array_equal(x, y)
def test_gives_correct_output_on_training_data(self): nnet = NetFactory.create_neural_net(sizes=[1, 1, 1]) cost_func = QuadraticCost(neural_net=nnet) gd = GradientDescent(neural_net=nnet, cost_function=cost_func) xes = [np.array([-10], float), np.array([100], float)] ys = [np.array([0.5], float), np.array([0.75], float)] gd.train(data_src=PreloadSource((xes, ys)), nepochs=100) for i in range(len(xes)): res = nnet.feed(xes[i]) self.assertAlmostEqual(res[0], ys[i][0], places=1)
def test_descent_easy(self): x = np.array([[1, i] for i in range(1000)]) y = np.array([10 + 2*i for i in range(1000)]) parameters = Parameters(batch_size=100, shuffle=True) gd = GradientDescent(parameters, linear_regression_hypothesis) xscal = StandardScaler(copy=False) x = xscal.fit_transform(x) yscal = StandardScaler(copy=False) y = yscal.fit_transform(y) coef0, coef1 = gd.descent((x, y)) self.assertAlmostEqual(coef0, 0) self.assertAlmostEqual(coef1, 1)
def run_for_file(f, exclude_columns, include_only_columns, output_dir, y_column): file_data = pd.read_csv(f) # Make all column name references lower case from here on out for simplicity file_data.columns = [col.lower() for col in list(file_data)] x_columns, y = get_columns_to_run(file_data, exclude_columns, include_only_columns, y_column) plots = [] for x in x_columns: print("Running gradient descent on " + x + " vs. " + y) data = file_data[[x, y]] gd = GradientDescent(data, 0.5) theta0, theta1 = gd.run() plots.append(get_plot(data, theta0, theta1, x, y)) save_as_pdf_pages(plots, path=output_dir, dpi=100)
def solve_in_z(A, b, x0, N, block_sizes, method): if block_sizes is not None and len(block_sizes) == A.shape[1]: logging.error('Trivial example: nblocks == nroutes, exiting solver') import sys sys.exit() z0 = x2z(x0, block_sizes) target = A.dot(x0) - b AT = A.T.tocsr() NT = N.T.tocsr() f = lambda z: 0.5 * la.norm(A.dot(N.dot(z)) + target)**2 nabla_f = lambda z: NT.dot(AT.dot(A.dot(N.dot(z)) + target)) ir = IsotonicRegression(y_min=0, y_max=1) cum_blocks = np.concatenate(([0], np.cumsum(block_sizes - 1))) blocks_start = cum_blocks blocks_end = cum_blocks[1:] def proj(x): #return block_isotonic_regression(x, ir, block_sizes, blocks_start, # blocks_end) isotonic_regression_multi_c(x, blocks_start[:-1]) return np.maximum(np.minimum(x, 1.), 0.) # value = simplex_projection(block_sizes - 1,x) # value = pysimplex_projection(block_sizes - 1,x) # return projected_value if method == 'DORE': gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj, method=method, A=A, N=N, target=target) else: gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj, method=method) iters, times, states = gd.run() x = particular_x0(block_sizes) + N.dot(states[-1]) assert np.all( x >= 0), 'x shouldn\'t have negative entries after projection' return iters, times, states
def constructGradDescentObject(self, lam=None): if lam is None: lam = self.lam f = lambda w_list: self.evalCost(lam, w_list=w_list) grad = lambda w_list: self.evalDerivs(w_list, lam=lam) gd = GradientDescent(f, grad=grad) def gradSGD(w_list, idx): idx = np.array([idx]) lamSGD = 1.0 * lam / self.N return self.evalDerivs(w_list, idx=idx, lam=lamSGD) gd.evalGradTraining = gradSGD return gd
def constructGradDescentObject(self, lam=None): if lam is None: lam = self.lam f = lambda w_list: self.evalCost(lam, w_list=w_list) grad = lambda w_list: self.evalDerivs(w_list, lam=lam) gd = GradientDescent(f, grad=grad) def gradSGD(w_list, idx): idx = np.array([idx]) lamSGD = 1.0*lam/self.N return self.evalDerivs(w_list, idx=idx, lam=lamSGD) gd.evalGradTraining = gradSGD return gd
def test_normal_mixture_hard(self): np.random.seed(0) size_batch = 1000 competition = AdversarialCompetition( size_batch=size_batch, true_model=GenerativeNormalMixtureModel( np.arange(-3, 4), np.random.uniform(1, 2, 7).round(2)), discriminative=pipeline.make_pipeline( preprocessing.PolynomialFeatures(4), linear_model.LogisticRegression()), generative=GenerativeNormalMixtureModel(np.arange(-3, 4) * 0.1, np.ones(7), updates=["mu", "sigma"]), gradient_descent=GradientDescent(np.array([0.3, 0.1, 0.3]).reshape( (-1, 1)), inertia=0.9, annealing=2000, last_learning_rate=0.001), ) for i in range(5000): competition.iteration() params = competition.generatives[-1]._params print params.shape true_params = competition.true_model._params np.testing.assert_allclose(params, true_params, 0, 0.2)
def main(): input_csv_file_name = sys.argv[1] output_csv_file_name = sys.argv[2] # input values are in the form of [age, weight, height] input_values = Reader.csv(input_csv_file_name) cases = [ (0.001, 100), (0.005, 100), (0.01, 100), (0.05, 100), (0.1, 100), (0.5, 100), (1, 100), (5, 100), (10, 100), #custom (0.01, 1000), ] Reporter.write_output(file_name=output_csv_file_name, content="", should_overwrite_file=True) for learning_rate, iterations in cases: # reset betas each time betas = [ # intercept 0 0, # age 0, # weight 0 ] training_inputs = [[x[0], x[1]] for x in input_values] expected_classifiers = [x[2] for x in input_values] betas = GradientDescent.run(training_inputs=training_inputs, expected_classifiers=expected_classifiers, learning_rate=learning_rate, betas=betas, iterations=iterations) # write lines to output file Reporter.write_output( file_name=output_csv_file_name, content=",".join( map(str, [learning_rate, iterations, betas[0], betas[1], betas[2]])) + "\n", ) Visualizer.draw_chart(input_values=training_inputs, weights=betas, file_name="figures/figure_" + "_".join(map(str, [learning_rate, iterations])))
def main(path: str, use_input: bool, separator: str = ',', file_with_params: Optional[str] = "params.json", verbose: bool = False) -> None: gd = GradientDescent(verbose=verbose) if file_with_params is None: if verbose: print( "Файл с коэффициентами не был передан, поэтому все коэффициенты приравниваются к нулю" ) print( f'Предсказанные целевые переменные машин для датасета по пути "{path or ""}": 0.0' ) return dct: NormalizationParams = get_normalization_params_from_file( file_with_params) if use_input: while True: values: List[float] = [] for column in dct.columns: try: values.append( float( input( f"Введите значение фичи '{column}' машины: "))) except ValueError as ex: raise ValueError( f"Было введено некорректное значение пробега машины ({ex})" ) result = gd.predict([values], file_with_params=file_with_params)[0] print(f"Предсказанная {dct.target_column} машины: {result}") not_to_continue = input( "Если хотите продолжить, нажмите `Enter`: ") if not_to_continue: break else: feature_arrays: List[List[float]] = read_dataset(path, dct.columns, separator=separator, verbose=verbose) result = gd.predict(feature_arrays, file_with_params=file_with_params) print( f'Предсказанные {dct.target_column} машин для датасета по пути "{path}": {result}' )
def computeDecisionBoundary(self, w_full, lam, stepSize=0.01, maxFunctionCalls=10000, printSummary=True, plot=False, plotIter=False, useGradientCriterion=False, tol=1e-4): gd = self.constructGradientDescentObject(lam) gd.stepSize = stepSize storeIterValues = False if plotIter: storeIterValues = True sol = gd.computeMin(w_full, maxFunctionCalls=maxFunctionCalls, printSummary=printSummary, storeIterValues=storeIterValues, tol=tol, useGradientCriterion=useGradientCriterion) w_star = sol[0] w_star_normalized = 1 / np.linalg.norm(w_star) * w_star if printSummary: print "--- Classification Summary ---" print "w_full = " + str(w_star) print "w_full normalized = " + str(w_star_normalized) print "norm of w_full = " + str(np.linalg.norm(w_star)) print "lambda = " + str(lam) self.classificationErrorRate(w_star, verbose=True) print "------------------" print "" if plot: self.plotData(w_star) if plotIter: gd.plotIterValues() return w_star
def test_gives_correct_output_for_unseen_data(self): nnet = NetFactory.create_neural_net(sizes=[1, 10, 1]) cost_func = QuadraticCost(neural_net=nnet) gd = GradientDescent(neural_net=nnet, cost_function=cost_func) def parabola(x): return x**2 examples = helpers.generate_data(f=parabola, start_value=-0.6, end_value=-0.4, step_value=0.005) gd.train(data_src=PreloadSource(examples), nepochs=10) xval = -0.5000125 yval = parabola(xval) net_input = np.array([xval], float) output = nnet.feed(net_input) self.assertAlmostEqual(output[0], yval, places=1)
def test_updating_learning(self): parameters = Parameters(learning_rate=10, decay=0.5) gd = GradientDescent(parameters, linear_regression_hypothesis) self.assertEqual(gd.get_learning_rate(), 10) gd.update_learning_rate() self.assertEqual(gd.get_learning_rate(), 5)
def fit(self, x, y, plot_cost=False): x, y = check_X_y(x, y) if self.scale_data: self._scaler = StandardScaler() x = self._scaler.fit_transform(x) x = add_bias_feature(x) # one-vs-all for class_type in set(y): cur_y = np.ones(len(y)) cur_y[y != class_type] = 0 self._class_data[class_type] = {"y": cur_y} gd = GradientDescent(mode='logistic', track_cost=plot_cost, learning_rate=self.learning_rate, penalty=self.penalty, alpha=self.alpha, max_iterations=self.max_iterations) cur_theta = gd.find_theta(x, cur_y) self._class_data[class_type] = {"theta": cur_theta} if plot_cost: cost_x, cost_y = gd.get_cost_history() plt.plot(cost_x, cost_y, "r-") plt.title(F"Cost for class '{class_type}' (last value={gd.last_cost:0.6f})") plt.show() coef = [self._class_data[key]["theta"][1:, 0] for key in self._class_data] intercept = [self._class_data[key]["theta"][0, 0] for key in self._class_data] self.coef_ = np.array(coef[1:]) if len(coef) == 2 else np.array(coef) self.intercept_ = np.array(intercept[1:]) if len(intercept) == 2 else np.array(intercept) check_is_fitted(self, attributes=['intercept_', 'coef_']) return self
def solve_in_z(A,b,x0,N,block_sizes,method): if block_sizes is not None and len(block_sizes) == A.shape[1]: logging.error('Trivial example: nblocks == nroutes, exiting solver') import sys sys.exit() z0 = x2z(x0,block_sizes) target = A.dot(x0)-b AT = A.T.tocsr() NT = N.T.tocsr() f = lambda z: 0.5 * la.norm(A.dot(N.dot(z)) + target)**2 nabla_f = lambda z: NT.dot(AT.dot(A.dot(N.dot(z)) + target)) ir = IsotonicRegression(y_min=0, y_max=1) cum_blocks = np.concatenate(([0], np.cumsum(block_sizes-1))) blocks_start = cum_blocks blocks_end = cum_blocks[1:] def proj(x): #return block_isotonic_regression(x, ir, block_sizes, blocks_start, # blocks_end) isotonic_regression_multi_c(x, blocks_start[:-1]) return np.maximum(np.minimum(x, 1.), 0.) # value = simplex_projection(block_sizes - 1,x) # value = pysimplex_projection(block_sizes - 1,x) # return projected_value if method == 'DORE': gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj, method=method, A=A, N=N, target=target) else: gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj, method=method) iters, times, states = gd.run() x = particular_x0(block_sizes) + N.dot(states[-1]) assert np.all(x >= 0), 'x shouldn\'t have negative entries after projection' return iters, times, states
def test_normal_1000(self): np.random.seed(0) size_batch = 1000 adversarials = AdversarialCompetition( size_batch=size_batch, true_model=GenerativeNormalModel(1, 2), discriminative=pipeline.make_pipeline( preprocessing.PolynomialFeatures(4), linear_model.LogisticRegression()), generative=GenerativeNormalModel(0, 1, updates=["mu", "sigma"]), gradient_descent=GradientDescent(0.03, 0.9), ) for i in range(200): adversarials.iteration() params = adversarials.generatives[-1]._params true_params = adversarials.true_model._params np.testing.assert_allclose(params, true_params, 0, 0.02)
def test_normal_mixture(self): np.random.seed(0) size_batch = 1000 competition = AdversarialCompetition( size_batch=size_batch, true_model=GenerativeNormalMixtureModel([-3, 3], [1, 1]), discriminative=pipeline.make_pipeline( preprocessing.PolynomialFeatures(4), linear_model.LogisticRegression()), generative=GenerativeNormalMixtureModel([-1, 1], [1, 1], updates=["mu", "sigma"]), gradient_descent=GradientDescent(0.1, inertia=0.9, annealing=1000, last_learning_rate=0.01), ) for i in range(2000): competition.iteration() params = competition.generatives[-1]._params true_params = competition.true_model._params np.testing.assert_allclose(params, true_params, 0, 0.1)
def plot_2d_graph(feature_arrays: List[List[float]], target_values: List[float], params_file: str, verbose: bool = False) -> None: for feature_array in feature_arrays: assert len( feature_array ) == 1, "Ожидается 2D линейная регрессия, было подано больше значений фичей" assert len(feature_arrays) == len(target_values), "Число поданных массивов со значениями фичей " \ "и целевыми значениями отличаются" if verbose: print(f'figsize графика: {(12, 6)}') fig, ax = plt.subplots(figsize=(12, 6)) if verbose: print(f'Цвет данных на графике: "red"') ax.scatter(feature_arrays, target_values, label="Данные", color="red") dct: NormalizationParams = get_normalization_params_from_file(params_file) assert len(dct.feature_mins) == 2 and len(dct.feature_maxs) == 2 and \ len(dct.columns) == 1, f"Некорректные данные по пути {params_file}" borders = list(map(int, ax.get_xlim())) if not NUM_OF_STEPS: raise ValueError("Переменная NUM_OF_STEPS не может быть нулевой") xs: List[List[float]] = [[ i ] for i in np.arange(*borders, (borders[1] - borders[0]) / NUM_OF_STEPS)] gd: GradientDescent = GradientDescent(verbose=verbose) ys = gd.predict(xs, params_file) ax.plot(xs, ys, label="Линия линейной регрессии") ax.set_xlabel(dct.columns[0]) ax.set_ylabel(dct.target_column) ax.set_title("Linear Regression (MSE)") ax.legend() plt.show()
logging.basicConfig( format="[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s", level="INFO") np.random.seed(0) size_batch = 1000 competition = AdversarialCompetition( size_batch=size_batch, true_model=GenerativeNormalMixtureModel([-3, 3], [1, 1]), discriminative=pipeline.make_pipeline(preprocessing.PolynomialFeatures(4), linear_model.LogisticRegression()), generative=GenerativeNormalMixtureModel([-1, 1], [1, 1], updates=["mu", "sigma"]), gradient_descent=GradientDescent(0.01, 0.5), ) print(competition) for i in range(1000): if i % 200 == 0: competition.plot() plt.show() pass competition.iteration() print("final model %s" % competition.generatives[-1]) competition.plot_params() plt.show()
# -*- coding: utf-8 -*- from logistic_regression import LogisticRegression from gradient_descent import GradientDescent, StocasticGradientDescent HW3_TRAIN = 'hw3_train.dat' HW3_TEST = 'hw3_test.dat' if __name__ == '__main__': zero_zero_point_one = LogisticRegression( 0.01, 2000, GradientDescent(), StocasticGradientDescent()) zero_zero_point_one.caculate_and_plot(HW3_TRAIN, HW3_TEST) zero_zero_zero_point_one = LogisticRegression( 0.001, 2000, GradientDescent(), StocasticGradientDescent()) zero_zero_zero_point_one.caculate_and_plot(HW3_TRAIN, HW3_TEST)
#!/usr/bin/env python import numpy as np from gradient_descent import GradientDescent #import matplotlib.pyplot as plt x = np.array([[1,10,100],[2,20,200],[3,30,300],[4,40,400],[5,50,500]]) y = np.array([[160],[320],[480],[640],[800]]) #x = np.array([[1],[2],[3],[4],[5]]) #y = np.array([[2],[4],[6],[8],[10]]) a = GradientDescent(x,y) #plt.plot(x,y) #plt.show() print a.hypothesis(1) print a.cost_function(1) print a.stochastic_gradient_descent(0.1) #a.batch_gradient_descent(0.1,25) #print(a.least_squares()) print a.cost_function(0)
logging.basicConfig( format= "[%(filename)s:%(lineno)s - %(funcName)15s() %(asctime)-15s ] %(message)s", level=logging.DEBUG) np.random.seed(0) size_batch = 1000 competition = AdversarialCompetition( size_batch=size_batch, true_model=GenerativeNormalModel(1, 2), discriminative=pipeline.make_pipeline(preprocessing.PolynomialFeatures(4), linear_model.LogisticRegression()), generative=GenerativeNormalModel(0, 1, updates=["mu", "sigma"]), gradient_descent=GradientDescent(0.03, inertia=0.0, annealing=100), ) print(competition) for i in range(500): if i % 50 == 0: competition.plot() pyplot.savefig('file.png') pyplot.close() pass competition.iteration() print("final model %s" % competition.generatives[-1]) competition.plot_params()
def go(self): log = Log() raw_data_lines = open(self.file_name).readlines() reader = self.data_reader_class(raw_data_lines, self.test_data_size) raw_data_inputs = reader.training_input_values raw_data_outputs = reader.training_output_values accepted_line_count = reader.accepted_count rejected_lines = reader.rejected_lines raw_input_var_count = reader.input_var_count # Display results of reading data file for rejected_line in rejected_lines: log.error('Bad input line: ' + rejected_line) log.bar() log.info('Read %s, loaded %s lines, rejected %s, %s input values per line' % (self.file_name, accepted_line_count, len(rejected_lines), raw_input_var_count)) if self.test_data_size > 0: log.info('Training set size: %s' % (len(raw_data_inputs),)) log.info('Test set size: %s' % (len(reader.testing_input_values),)) log.info('Press Ctrl-C at any time to stop working and show results') log.bar() def build_transformer(raw_input_values, with_linear_terms, other_terms): # Convert the raw inputs using the transformer functions transformer = Transformer(raw_input_values) if (with_linear_terms): transformer.add_linear_terms() for name, fn in other_terms.iteritems(): transformer.add_new_term(name, fn) return transformer transformer = build_transformer(raw_data_inputs, self.with_linear, self.other_terms) raw_variables = transformer.variables # Apply Feature Scaling and Mean Normalisation normaliser = Normaliser() normalised_variables = map(normaliser.normalise, raw_variables) hypothesis = self.hypothesis_class(len(normalised_variables)) cost_function = self.cost_fn_class(raw_data_outputs, self.learning_rate, self.regularisation_coefficient) gradient_descent = GradientDescent(hypothesis, cost_function, normalised_variables, raw_data_outputs) signal.signal(signal.SIGINT, gradient_descent.interrupt) gradient_descent.set_iterations(self.max_iterations) if self.err_check: gradient_descent.set_error_checking() gradient_descent.calculate() # Denormalise the calculated theta values normalised_thetas = gradient_descent.hypothesis.theta_values denormaliser = Denormaliser() final_thetas = denormaliser.denormalise(normalised_thetas, normalised_variables) # Run hypothesis against original values if self.test_data_size > 0: log.bar() hypothesis.theta_values = final_thetas transformer = build_transformer(reader.testing_input_values, self.with_linear, self.other_terms) raw_variable_data = zip(*map(lambda v : v.data, transformer.variables)) for i, o in zip(raw_variable_data, reader.testing_output_values): log.info('{0:>8} .... {1: .8f}'.format(o, hypothesis.calculate(i))) # Display results log.bar() log.info('Theta values:') log.underline() for nv, ht in zip(normalised_variables, final_thetas): log.info("{:>8} = {:>16.8f}".format(nv.variable.name, ht)) log.info('') log.info('Completed %s iterations' % (gradient_descent.iterations,)) log.bar()
# lr1_confidence = lr1.score(test_features1, test_targets1) # print("R2 score1:", lr1_confidence) elif model == "gradient_descent": GOOGL_closing_data = features[:, 5].reshape(-1, 1) n = 3 #Data Processing data0 = features[:, 5] example0 = data0[:-n].reshape(-1, 1) target = GOOGL_closing_data[n:] #Train and Test train_features, train_targets, test_features, test_targets = metrics.train_test_split( example0, target, 0.8) gd = GradientDescent() gd.fit(train_features, train_targets) gd_confidence = gd.score(test_features, test_targets) print("R2 score:", gd_confidence) elif model == "kmeans": # Need to make continuous for higher Mutual Info Score kmeans = KMeans(2) kmeans.fit(trainf) labels = kmeans.predict(testf) #acc = metrics.adjusted_mutual_info(testt.flatten(), labels) print(labels) cm = metrics.confusion_matrix(testt.flatten(), labels) a = metrics.accuracy(testt.flatten(), labels) p, r = metrics.precision_and_recall(testt.flatten(), labels)
def go(self): log = Log() raw_data_lines = open(self.file_name).readlines() reader = self.data_reader_class(raw_data_lines, self.test_data_size) raw_data_inputs = reader.training_input_values raw_data_outputs = reader.training_output_values accepted_line_count = reader.accepted_count rejected_lines = reader.rejected_lines raw_input_var_count = reader.input_var_count # Display results of reading data file for rejected_line in rejected_lines: log.error("Bad input line: " + rejected_line) log.bar() log.info( "Read %s, loaded %s lines, rejected %s, %s input values per line" % (self.file_name, accepted_line_count, len(rejected_lines), raw_input_var_count) ) if self.test_data_size > 0: log.info("Training set size: %s" % (len(raw_data_inputs),)) log.info("Test set size: %s" % (len(reader.testing_input_values),)) log.info("Press Ctrl-C at any time to stop working and show results") log.bar() def build_transformer(raw_input_values, with_linear_terms, other_terms): # Convert the raw inputs using the transformer functions transformer = Transformer(raw_input_values) if with_linear_terms: transformer.add_linear_terms() for name, fn in other_terms.iteritems(): transformer.add_new_term(name, fn) return transformer transformer = build_transformer(raw_data_inputs, self.with_linear, self.other_terms) raw_variables = transformer.variables # Apply Feature Scaling and Mean Normalisation normaliser = Normaliser() normalised_variables = map(normaliser.normalise, raw_variables) hypothesis = self.hypothesis_class(len(normalised_variables)) cost_function = self.cost_fn_class(raw_data_outputs, self.learning_rate, self.regularisation_coefficient) gradient_descent = GradientDescent(hypothesis, cost_function, normalised_variables, raw_data_outputs) signal.signal(signal.SIGINT, gradient_descent.interrupt) gradient_descent.set_iterations(self.max_iterations) if self.err_check: gradient_descent.set_error_checking() gradient_descent.calculate() # Denormalise the calculated theta values normalised_thetas = gradient_descent.hypothesis.theta_values denormaliser = Denormaliser() final_thetas = denormaliser.denormalise(normalised_thetas, normalised_variables) # Run hypothesis against original values if self.test_data_size > 0: log.bar() hypothesis.theta_values = final_thetas transformer = build_transformer(reader.testing_input_values, self.with_linear, self.other_terms) raw_variable_data = zip(*map(lambda v: v.data, transformer.variables)) for i, o in zip(raw_variable_data, reader.testing_output_values): log.info("{0:>8} .... {1: .8f}".format(o, hypothesis.calculate(i))) # Display results log.bar() log.info("Theta values:") log.underline() for nv, ht in zip(normalised_variables, final_thetas): log.info("{:>8} = {:>16.8f}".format(nv.variable.name, ht)) log.info("") log.info("Completed %s iterations" % (gradient_descent.iterations,)) log.bar()
def __init__(self): gd = GradientDescent() self.maximize_batch = gd.maximize_batch self.maximize_stochastic = gd.maximize_stochastic