def computeDecisionBoundary(self, w_full, lam, stepSize=0.01, maxFunctionCalls=10000, printSummary=True,
                                plot=False, plotIter=False, useGradientCriterion=False, tol=1e-4):
        gd = self.constructGradientDescentObject(lam)
        gd.stepSize = stepSize

        storeIterValues=False
        if plotIter:
            storeIterValues=True

        sol = gd.computeMin(w_full, maxFunctionCalls=maxFunctionCalls, printSummary=printSummary,
                            storeIterValues=storeIterValues, tol=tol,
                            useGradientCriterion=useGradientCriterion)
        w_star = sol[0];
        w_star_normalized = 1/np.linalg.norm(w_star)*w_star

        if printSummary:
            print "--- Classification Summary ---"
            print "w_full = " + str(w_star)
            print "w_full normalized = " + str(w_star_normalized)
            print "norm of w_full = " + str(np.linalg.norm(w_star))
            print "lambda = " + str(lam)
            self.classificationErrorRate(w_star, verbose=True)
            print "------------------"
            print ""



        if plot:
            self.plotData(w_star)

        if plotIter:
            gd.plotIterValues()

        return w_star
示例#2
0
def main():
    optimizers = ["GD", "GD+", "GDM", "GDM+", "NAG", "NAG+", "Adam", "Adam+"]
    learning_rates = [0.01, 0.01, 0.015, 0.01, 0.006, 0.006, 0.0005, 0.0005]
    alphas = [0.0, 1e-4, 0.0, 1e-5, 0.0, 1e-6, 0.0, 1e-8]

    x_min, y_min = 3.0, 0.5
    x_ini, y_ini = -2.0, -1.0

    stats = {
        optimizer: {
            "x": None,
            "y": None,
            "eta_x": None,
            "eta_y": None
        }
        for optimizer in optimizers
    }

    n_iterations = 1000  # 10000 adam # 4400 nag # 2000 gdm 1200 gd

    for optimizer, learning_rate, alpha in zip(optimizers, learning_rates,
                                               alphas):
        print(optimizer)
        model = GradientDescent(f, x_ini, y_ini, n_iterations, learning_rate,
                                alpha, optimizer)
        x_hist, y_hist, lr_x_hist, lr_y_hist = model.minimize()
        stats[optimizer]["x"] = x_hist
        stats[optimizer]["y"] = y_hist
        stats[optimizer]["eta_x"] = lr_x_hist
        stats[optimizer]["eta_y"] = lr_y_hist

    plot_gradient_descent(stats, f, x_min, y_min)
    plot_loss(stats, x_min, y_min)
    plot_learning_rates(stats)
示例#3
0
def run(
        input_data_file):
    """
    """
    # parameter values
    intercept = 0.
    slope = 0.
    param_values = {"intercept": intercept,
            "slope": slope}
    # hypothesis object
    hypo = SimpleLinearRegression()
    hypo.initialize_parameters(param_values)

    # name of file where data is stored
    #file_name = 'data/input/ex1data1.txt'
    # read in data
    data = get_input_data(input_data_file)
    # extract features
    features = data[:, 0]
    # reshape to dimension nobs x 1
    features = features.reshape((len(features), 1))
    # extract yvalues
    yvalues = data[:, 1]
    # reshape to dimension nobs x 1
    yvalues = yvalues.reshape((len(yvalues), 1))

    # cost function object
    sel = SquaredErrorLoss(hypo, features, yvalues)
    gd = GradientDescent(.0001, param_values, .000000001, sel)
    gd.algorithm()
    print gd.get_parameters()
示例#4
0
    def fit(self, x, y, plot_cost=False):
        if hasattr(self, "scaler"):
            x = self.scaler.fit_transform(x)

        x = add_bias_feature(x)

        # one-vs-all
        for class_type in set(y):
            cur_y = np.ones(len(y))
            cur_y[y != class_type] = 0
            self.class_data[class_type] = {"y": cur_y}

            gd = GradientDescent(mode='logistic',
                                 track_cost=plot_cost,
                                 learning_rate=self.learning_rate,
                                 penalty=self.penalty,
                                 alpha=self.alpha,
                                 max_iterations=self.max_iterations)

            cur_theta = gd.find_theta(x, cur_y)
            self.class_data[class_type] = {"theta": cur_theta}

            if plot_cost:
                cost_x, cost_y = gd.get_cost_history()
                plt.plot(cost_x, cost_y, "r-")
                plt.title(F"Cost for class '{class_type}' (last value={gd.last_cost:0.6f})")
                plt.show()
示例#5
0
    def fit(self, x, y, plot_cost=False):
        x, y = check_X_y(x, y)

        if hasattr(self, "scaler"):
            x = self.scaler.fit_transform(x)

        x = add_bias_feature(x)

        gd = GradientDescent(mode='linear',
                             track_cost=plot_cost,
                             learning_rate=self.learning_rate,
                             penalty=self.penalty,
                             alpha=self.alpha,
                             max_iterations=self.max_iterations)

        theta = gd.find_theta(x, y)

        if plot_cost:
            cost_x, cost_y = gd.get_cost_history()
            plt.plot(cost_x, cost_y, "r-")
            plt.title(F"Cost (last value={gd.last_cost:0.6f})")
            plt.show()

        self._theta = theta
        self.intercept_ = theta[0, 0]
        self.coef_ = theta[1:].reshape(len(theta) - 1)

        check_is_fitted(self, attributes=['intercept_', 'coef_'])
        return self
示例#6
0
    def test_batch(self):
        gd = GradientDescent(Parameters(), linear_regression_hypothesis)
        coefs = np.ones(2)
        x = np.array([[1, i] for i in range(10)])
        y = np.array([2*i + 10 for i in range(10)])

        gd.batch((x, y), coefs)
        self.assertTrue(np.all(np.greater(coefs, 1)))
示例#7
0
def main(path: str, verbose: bool = False, separator: str = ',', number_of_target_feature: int = -1,
         file_with_params: str = "params.json", learning_rate: float = 0.01, epsilon: float = 10e-10) -> None:
    feature_arrays, target_values, columns, target_column = read_dataset(
        path, verbose=verbose, separator=separator, number_of_target_feature=number_of_target_feature
    )
    gd = GradientDescent(verbose=verbose, columns=columns, target_column=target_column)
    gd.fit(
        feature_arrays, target_values, file_with_params=file_with_params, learning_rate=learning_rate, epsilon=epsilon
    )
 def setUp(self):
     x = np.array([5, 2], float)
     y = np.array([0.25, 0, 1], float)
     self.examples = PreloadSource(([x], [y]))
     nnet = NetFactory.create_neural_net(sizes=[2, 3, 3])
     nnet.randomize_parameters()
     self.nnet = nnet
     cost_func = QuadraticCost(nnet)
     self.grad_descent = GradientDescent(neural_net=nnet,
                                         cost_function=cost_func)
示例#9
0
    def test_regularization(self):
        derivatives = np.zeros(10)
        coefs = np.ones(10)
        parameters = Parameters(l2=2)
        derivatives_expected = np.array([0, 2, 2, 2, 2, 2, 2, 2, 2, 2])

        gd = GradientDescent(parameters, linear_regression_hypothesis)
        gd.regularize(derivatives, coefs)

        np.testing.assert_array_equal(derivatives, derivatives_expected)
示例#10
0
    def test_shuffling(self):
        x = np.array(range(1000))
        x_not_changed = x.copy()
        y = x.copy()
        parameters = Parameters(shuffle=True)

        gd = GradientDescent(parameters, linear_regression_hypothesis)
        x, y = gd.shuffle_data_set((x, y))

        self.assertTrue(np.any(np.not_equal(x, x_not_changed)))
        np.testing.assert_array_equal(x, y)
    def test_gives_correct_output_on_training_data(self):
        nnet = NetFactory.create_neural_net(sizes=[1, 1, 1])
        cost_func = QuadraticCost(neural_net=nnet)
        gd = GradientDescent(neural_net=nnet, cost_function=cost_func)

        xes = [np.array([-10], float), np.array([100], float)]
        ys = [np.array([0.5], float), np.array([0.75], float)]

        gd.train(data_src=PreloadSource((xes, ys)), nepochs=100)

        for i in range(len(xes)):
            res = nnet.feed(xes[i])
            self.assertAlmostEqual(res[0], ys[i][0], places=1)
示例#12
0
    def test_descent_easy(self):
        x = np.array([[1, i] for i in range(1000)])
        y = np.array([10 + 2*i for i in range(1000)])
        parameters = Parameters(batch_size=100, shuffle=True)
        gd = GradientDescent(parameters, linear_regression_hypothesis)

        xscal = StandardScaler(copy=False)
        x = xscal.fit_transform(x)
        yscal = StandardScaler(copy=False)
        y = yscal.fit_transform(y)

        coef0, coef1 = gd.descent((x, y))
        self.assertAlmostEqual(coef0, 0)
        self.assertAlmostEqual(coef1, 1)
示例#13
0
def run_for_file(f, exclude_columns, include_only_columns, output_dir, y_column):
    file_data = pd.read_csv(f)
    # Make all column name references lower case from here on out for simplicity
    file_data.columns = [col.lower() for col in list(file_data)]
    x_columns, y = get_columns_to_run(file_data, exclude_columns, include_only_columns, y_column)
    plots = []
    for x in x_columns:
        print("Running gradient descent on " + x + " vs. " + y)
        data = file_data[[x, y]]
        gd = GradientDescent(data, 0.5)
        theta0, theta1 = gd.run()
        plots.append(get_plot(data, theta0, theta1, x, y))

    save_as_pdf_pages(plots, path=output_dir, dpi=100)
示例#14
0
def solve_in_z(A, b, x0, N, block_sizes, method):
    if block_sizes is not None and len(block_sizes) == A.shape[1]:
        logging.error('Trivial example: nblocks == nroutes, exiting solver')
        import sys
        sys.exit()

    z0 = x2z(x0, block_sizes)
    target = A.dot(x0) - b

    AT = A.T.tocsr()
    NT = N.T.tocsr()

    f = lambda z: 0.5 * la.norm(A.dot(N.dot(z)) + target)**2
    nabla_f = lambda z: NT.dot(AT.dot(A.dot(N.dot(z)) + target))

    ir = IsotonicRegression(y_min=0, y_max=1)
    cum_blocks = np.concatenate(([0], np.cumsum(block_sizes - 1)))
    blocks_start = cum_blocks
    blocks_end = cum_blocks[1:]

    def proj(x):
        #return block_isotonic_regression(x, ir, block_sizes, blocks_start,
        #                                 blocks_end)
        isotonic_regression_multi_c(x, blocks_start[:-1])
        return np.maximum(np.minimum(x, 1.), 0.)
        # value = simplex_projection(block_sizes - 1,x)
        # value = pysimplex_projection(block_sizes - 1,x)
        # return projected_value

    if method == 'DORE':
        gd = GradientDescent(z0=z0,
                             f=f,
                             nabla_f=nabla_f,
                             proj=proj,
                             method=method,
                             A=A,
                             N=N,
                             target=target)
    else:
        gd = GradientDescent(z0=z0,
                             f=f,
                             nabla_f=nabla_f,
                             proj=proj,
                             method=method)
    iters, times, states = gd.run()
    x = particular_x0(block_sizes) + N.dot(states[-1])
    assert np.all(
        x >= 0), 'x shouldn\'t have negative entries after projection'
    return iters, times, states
    def constructGradDescentObject(self, lam=None):
        if lam is None:
            lam = self.lam

        f = lambda w_list: self.evalCost(lam, w_list=w_list)
        grad = lambda w_list: self.evalDerivs(w_list, lam=lam)
        gd = GradientDescent(f, grad=grad)

        def gradSGD(w_list, idx):
            idx = np.array([idx])
            lamSGD = 1.0 * lam / self.N
            return self.evalDerivs(w_list, idx=idx, lam=lamSGD)

        gd.evalGradTraining = gradSGD
        return gd
示例#16
0
    def constructGradDescentObject(self, lam=None):
        if lam is None:
            lam = self.lam

        f = lambda w_list: self.evalCost(lam, w_list=w_list)
        grad = lambda w_list: self.evalDerivs(w_list, lam=lam)
        gd = GradientDescent(f, grad=grad)

        def gradSGD(w_list, idx):
            idx = np.array([idx])
            lamSGD = 1.0*lam/self.N
            return self.evalDerivs(w_list, idx=idx, lam=lamSGD)

        gd.evalGradTraining = gradSGD
        return gd
示例#17
0
 def test_normal_mixture_hard(self):
     np.random.seed(0)
     size_batch = 1000
     competition = AdversarialCompetition(
         size_batch=size_batch,
         true_model=GenerativeNormalMixtureModel(
             np.arange(-3, 4),
             np.random.uniform(1, 2, 7).round(2)),
         discriminative=pipeline.make_pipeline(
             preprocessing.PolynomialFeatures(4),
             linear_model.LogisticRegression()),
         generative=GenerativeNormalMixtureModel(np.arange(-3, 4) * 0.1,
                                                 np.ones(7),
                                                 updates=["mu", "sigma"]),
         gradient_descent=GradientDescent(np.array([0.3, 0.1, 0.3]).reshape(
             (-1, 1)),
                                          inertia=0.9,
                                          annealing=2000,
                                          last_learning_rate=0.001),
     )
     for i in range(5000):
         competition.iteration()
     params = competition.generatives[-1]._params
     print params.shape
     true_params = competition.true_model._params
     np.testing.assert_allclose(params, true_params, 0, 0.2)
def main():
    input_csv_file_name = sys.argv[1]
    output_csv_file_name = sys.argv[2]

    # input values are in the form of [age, weight, height]
    input_values = Reader.csv(input_csv_file_name)

    cases = [
        (0.001, 100),
        (0.005, 100),
        (0.01, 100),
        (0.05, 100),
        (0.1, 100),
        (0.5, 100),
        (1, 100),
        (5, 100),
        (10, 100),
        #custom
        (0.01, 1000),
    ]

    Reporter.write_output(file_name=output_csv_file_name,
                          content="",
                          should_overwrite_file=True)

    for learning_rate, iterations in cases:
        # reset betas each time
        betas = [
            # intercept 0
            0,
            # age
            0,
            # weight
            0
        ]

        training_inputs = [[x[0], x[1]] for x in input_values]
        expected_classifiers = [x[2] for x in input_values]

        betas = GradientDescent.run(training_inputs=training_inputs,
                                    expected_classifiers=expected_classifiers,
                                    learning_rate=learning_rate,
                                    betas=betas,
                                    iterations=iterations)

        # write lines to output file
        Reporter.write_output(
            file_name=output_csv_file_name,
            content=",".join(
                map(str,
                    [learning_rate, iterations, betas[0], betas[1], betas[2]]))
            + "\n",
        )

        Visualizer.draw_chart(input_values=training_inputs,
                              weights=betas,
                              file_name="figures/figure_" +
                              "_".join(map(str, [learning_rate, iterations])))
示例#19
0
def main(path: str,
         use_input: bool,
         separator: str = ',',
         file_with_params: Optional[str] = "params.json",
         verbose: bool = False) -> None:
    gd = GradientDescent(verbose=verbose)
    if file_with_params is None:
        if verbose:
            print(
                "Файл с коэффициентами не был передан, поэтому все коэффициенты приравниваются к нулю"
            )
        print(
            f'Предсказанные целевые переменные машин для датасета по пути "{path or ""}": 0.0'
        )
        return
    dct: NormalizationParams = get_normalization_params_from_file(
        file_with_params)
    if use_input:
        while True:
            values: List[float] = []
            for column in dct.columns:
                try:
                    values.append(
                        float(
                            input(
                                f"Введите значение фичи '{column}' машины: ")))
                except ValueError as ex:
                    raise ValueError(
                        f"Было введено некорректное значение пробега машины ({ex})"
                    )
            result = gd.predict([values], file_with_params=file_with_params)[0]
            print(f"Предсказанная {dct.target_column} машины: {result}")
            not_to_continue = input(
                "Если хотите продолжить, нажмите `Enter`: ")
            if not_to_continue:
                break
    else:
        feature_arrays: List[List[float]] = read_dataset(path,
                                                         dct.columns,
                                                         separator=separator,
                                                         verbose=verbose)
        result = gd.predict(feature_arrays, file_with_params=file_with_params)
        print(
            f'Предсказанные {dct.target_column} машин для датасета по пути "{path}": {result}'
        )
示例#20
0
    def computeDecisionBoundary(self,
                                w_full,
                                lam,
                                stepSize=0.01,
                                maxFunctionCalls=10000,
                                printSummary=True,
                                plot=False,
                                plotIter=False,
                                useGradientCriterion=False,
                                tol=1e-4):
        gd = self.constructGradientDescentObject(lam)
        gd.stepSize = stepSize

        storeIterValues = False
        if plotIter:
            storeIterValues = True

        sol = gd.computeMin(w_full,
                            maxFunctionCalls=maxFunctionCalls,
                            printSummary=printSummary,
                            storeIterValues=storeIterValues,
                            tol=tol,
                            useGradientCriterion=useGradientCriterion)
        w_star = sol[0]
        w_star_normalized = 1 / np.linalg.norm(w_star) * w_star

        if printSummary:
            print "--- Classification Summary ---"
            print "w_full = " + str(w_star)
            print "w_full normalized = " + str(w_star_normalized)
            print "norm of w_full = " + str(np.linalg.norm(w_star))
            print "lambda = " + str(lam)
            self.classificationErrorRate(w_star, verbose=True)
            print "------------------"
            print ""

        if plot:
            self.plotData(w_star)

        if plotIter:
            gd.plotIterValues()

        return w_star
    def test_gives_correct_output_for_unseen_data(self):
        nnet = NetFactory.create_neural_net(sizes=[1, 10, 1])
        cost_func = QuadraticCost(neural_net=nnet)
        gd = GradientDescent(neural_net=nnet, cost_function=cost_func)

        def parabola(x):
            return x**2

        examples = helpers.generate_data(f=parabola,
                                         start_value=-0.6,
                                         end_value=-0.4,
                                         step_value=0.005)

        gd.train(data_src=PreloadSource(examples), nepochs=10)

        xval = -0.5000125
        yval = parabola(xval)

        net_input = np.array([xval], float)
        output = nnet.feed(net_input)
        self.assertAlmostEqual(output[0], yval, places=1)
示例#22
0
    def test_updating_learning(self):
        parameters = Parameters(learning_rate=10, decay=0.5)
        gd = GradientDescent(parameters, linear_regression_hypothesis)

        self.assertEqual(gd.get_learning_rate(), 10)
        gd.update_learning_rate()
        self.assertEqual(gd.get_learning_rate(), 5)
示例#23
0
    def fit(self, x, y, plot_cost=False):
        x, y = check_X_y(x, y)

        if self.scale_data:
            self._scaler = StandardScaler()
            x = self._scaler.fit_transform(x)

        x = add_bias_feature(x)

        # one-vs-all
        for class_type in set(y):
            cur_y = np.ones(len(y))
            cur_y[y != class_type] = 0
            self._class_data[class_type] = {"y": cur_y}

            gd = GradientDescent(mode='logistic',
                                 track_cost=plot_cost,
                                 learning_rate=self.learning_rate,
                                 penalty=self.penalty,
                                 alpha=self.alpha,
                                 max_iterations=self.max_iterations)

            cur_theta = gd.find_theta(x, cur_y)
            self._class_data[class_type] = {"theta": cur_theta}

            if plot_cost:
                cost_x, cost_y = gd.get_cost_history()
                plt.plot(cost_x, cost_y, "r-")
                plt.title(F"Cost for class '{class_type}' (last value={gd.last_cost:0.6f})")
                plt.show()

        coef = [self._class_data[key]["theta"][1:, 0] for key in self._class_data]
        intercept = [self._class_data[key]["theta"][0, 0] for key in self._class_data]

        self.coef_ = np.array(coef[1:]) if len(coef) == 2 else np.array(coef)
        self.intercept_ = np.array(intercept[1:]) if len(intercept) == 2 else np.array(intercept)

        check_is_fitted(self, attributes=['intercept_', 'coef_'])
        return self
def solve_in_z(A,b,x0,N,block_sizes,method):
    if block_sizes is not None and len(block_sizes) == A.shape[1]:
        logging.error('Trivial example: nblocks == nroutes, exiting solver')
        import sys
        sys.exit()

    z0 = x2z(x0,block_sizes)
    target = A.dot(x0)-b

    AT = A.T.tocsr()
    NT = N.T.tocsr()

    f = lambda z: 0.5 * la.norm(A.dot(N.dot(z)) + target)**2
    nabla_f = lambda z: NT.dot(AT.dot(A.dot(N.dot(z)) + target))

    ir = IsotonicRegression(y_min=0, y_max=1)
    cum_blocks = np.concatenate(([0], np.cumsum(block_sizes-1)))
    blocks_start = cum_blocks
    blocks_end = cum_blocks[1:]

    def proj(x):
        #return block_isotonic_regression(x, ir, block_sizes, blocks_start,
        #                                 blocks_end)
        isotonic_regression_multi_c(x, blocks_start[:-1])
        return np.maximum(np.minimum(x, 1.), 0.)
        # value = simplex_projection(block_sizes - 1,x)
        # value = pysimplex_projection(block_sizes - 1,x)
        # return projected_value

    if method == 'DORE':
        gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj,
                             method=method, A=A, N=N, target=target)
    else:
        gd = GradientDescent(z0=z0, f=f, nabla_f=nabla_f, proj=proj,
                             method=method)
    iters, times, states = gd.run()
    x = particular_x0(block_sizes) + N.dot(states[-1])
    assert np.all(x >= 0), 'x shouldn\'t have negative entries after projection'
    return iters, times, states
示例#25
0
 def test_normal_1000(self):
     np.random.seed(0)
     size_batch = 1000
     adversarials = AdversarialCompetition(
         size_batch=size_batch,
         true_model=GenerativeNormalModel(1, 2),
         discriminative=pipeline.make_pipeline(
             preprocessing.PolynomialFeatures(4),
             linear_model.LogisticRegression()),
         generative=GenerativeNormalModel(0, 1, updates=["mu", "sigma"]),
         gradient_descent=GradientDescent(0.03, 0.9),
     )
     for i in range(200):
         adversarials.iteration()
     params = adversarials.generatives[-1]._params
     true_params = adversarials.true_model._params
     np.testing.assert_allclose(params, true_params, 0, 0.02)
示例#26
0
 def test_normal_mixture(self):
     np.random.seed(0)
     size_batch = 1000
     competition = AdversarialCompetition(
         size_batch=size_batch,
         true_model=GenerativeNormalMixtureModel([-3, 3], [1, 1]),
         discriminative=pipeline.make_pipeline(
             preprocessing.PolynomialFeatures(4),
             linear_model.LogisticRegression()),
         generative=GenerativeNormalMixtureModel([-1, 1], [1, 1],
                                                 updates=["mu", "sigma"]),
         gradient_descent=GradientDescent(0.1,
                                          inertia=0.9,
                                          annealing=1000,
                                          last_learning_rate=0.01),
     )
     for i in range(2000):
         competition.iteration()
     params = competition.generatives[-1]._params
     true_params = competition.true_model._params
     np.testing.assert_allclose(params, true_params, 0, 0.1)
示例#27
0
def plot_2d_graph(feature_arrays: List[List[float]],
                  target_values: List[float],
                  params_file: str,
                  verbose: bool = False) -> None:
    for feature_array in feature_arrays:
        assert len(
            feature_array
        ) == 1, "Ожидается 2D линейная регрессия, было подано больше значений фичей"
    assert len(feature_arrays) == len(target_values), "Число поданных массивов со значениями фичей " \
                                                      "и целевыми значениями отличаются"
    if verbose:
        print(f'figsize графика: {(12, 6)}')
    fig, ax = plt.subplots(figsize=(12, 6))
    if verbose:
        print(f'Цвет данных на графике: "red"')
    ax.scatter(feature_arrays, target_values, label="Данные", color="red")

    dct: NormalizationParams = get_normalization_params_from_file(params_file)
    assert len(dct.feature_mins) == 2 and len(dct.feature_maxs) == 2 and \
           len(dct.columns) == 1, f"Некорректные данные по пути {params_file}"

    borders = list(map(int, ax.get_xlim()))
    if not NUM_OF_STEPS:
        raise ValueError("Переменная NUM_OF_STEPS не может быть нулевой")
    xs: List[List[float]] = [[
        i
    ] for i in np.arange(*borders, (borders[1] - borders[0]) / NUM_OF_STEPS)]

    gd: GradientDescent = GradientDescent(verbose=verbose)
    ys = gd.predict(xs, params_file)

    ax.plot(xs, ys, label="Линия линейной регрессии")

    ax.set_xlabel(dct.columns[0])
    ax.set_ylabel(dct.target_column)
    ax.set_title("Linear Regression (MSE)")
    ax.legend()
    plt.show()
示例#28
0
logging.basicConfig(
    format="[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s",
    level="INFO")

np.random.seed(0)
size_batch = 1000

competition = AdversarialCompetition(
    size_batch=size_batch,
    true_model=GenerativeNormalMixtureModel([-3, 3], [1, 1]),
    discriminative=pipeline.make_pipeline(preprocessing.PolynomialFeatures(4),
                                          linear_model.LogisticRegression()),
    generative=GenerativeNormalMixtureModel([-1, 1], [1, 1],
                                            updates=["mu", "sigma"]),
    gradient_descent=GradientDescent(0.01, 0.5),
)

print(competition)

for i in range(1000):
    if i % 200 == 0:
        competition.plot()
        plt.show()
        pass
    competition.iteration()

print("final model %s" % competition.generatives[-1])

competition.plot_params()
plt.show()
示例#29
0
# -*- coding: utf-8 -*-
from logistic_regression import LogisticRegression
from gradient_descent import GradientDescent, StocasticGradientDescent

HW3_TRAIN = 'hw3_train.dat'
HW3_TEST = 'hw3_test.dat'

if __name__ == '__main__':
    zero_zero_point_one = LogisticRegression(
        0.01, 2000, GradientDescent(), StocasticGradientDescent())
    zero_zero_point_one.caculate_and_plot(HW3_TRAIN, HW3_TEST)

    zero_zero_zero_point_one = LogisticRegression(
        0.001, 2000, GradientDescent(), StocasticGradientDescent())
    zero_zero_zero_point_one.caculate_and_plot(HW3_TRAIN, HW3_TEST)
示例#30
0
#!/usr/bin/env python

import numpy as np
from gradient_descent import GradientDescent
#import matplotlib.pyplot as plt



x = np.array([[1,10,100],[2,20,200],[3,30,300],[4,40,400],[5,50,500]])
y = np.array([[160],[320],[480],[640],[800]])

#x = np.array([[1],[2],[3],[4],[5]])
#y = np.array([[2],[4],[6],[8],[10]])

a = GradientDescent(x,y)

#plt.plot(x,y)
#plt.show()

print a.hypothesis(1)

print a.cost_function(1)


print a.stochastic_gradient_descent(0.1)
#a.batch_gradient_descent(0.1,25)
#print(a.least_squares())

print a.cost_function(0)
示例#31
0
logging.basicConfig(
    format=
    "[%(filename)s:%(lineno)s - %(funcName)15s() %(asctime)-15s ] %(message)s",
    level=logging.DEBUG)

np.random.seed(0)
size_batch = 1000

competition = AdversarialCompetition(
    size_batch=size_batch,
    true_model=GenerativeNormalModel(1, 2),
    discriminative=pipeline.make_pipeline(preprocessing.PolynomialFeatures(4),
                                          linear_model.LogisticRegression()),
    generative=GenerativeNormalModel(0, 1, updates=["mu", "sigma"]),
    gradient_descent=GradientDescent(0.03, inertia=0.0, annealing=100),
)

print(competition)

for i in range(500):
    if i % 50 == 0:
        competition.plot()
        pyplot.savefig('file.png')
        pyplot.close()
        pass
    competition.iteration()

print("final model %s" % competition.generatives[-1])

competition.plot_params()
示例#32
0
    def go(self):
        log = Log()

        raw_data_lines = open(self.file_name).readlines()

        reader = self.data_reader_class(raw_data_lines, self.test_data_size)

        raw_data_inputs     = reader.training_input_values
        raw_data_outputs    = reader.training_output_values
        accepted_line_count = reader.accepted_count
        rejected_lines      = reader.rejected_lines
        raw_input_var_count = reader.input_var_count

        # Display results of reading data file
        for rejected_line in rejected_lines:
            log.error('Bad input line: ' + rejected_line)

        log.bar()
        log.info('Read %s, loaded %s lines, rejected %s, %s input values per line' % (self.file_name, accepted_line_count, len(rejected_lines), raw_input_var_count))
        if self.test_data_size > 0:
            log.info('Training set size: %s' % (len(raw_data_inputs),))
            log.info('Test set size:     %s' % (len(reader.testing_input_values),))
        log.info('Press Ctrl-C at any time to stop working and show results')
        log.bar()

        def build_transformer(raw_input_values, with_linear_terms, other_terms):
            # Convert the raw inputs using the transformer functions
            transformer = Transformer(raw_input_values)

            if (with_linear_terms):
                transformer.add_linear_terms()

            for name, fn in other_terms.iteritems():
                transformer.add_new_term(name, fn)

            return transformer

        transformer = build_transformer(raw_data_inputs, self.with_linear, self.other_terms)
        raw_variables = transformer.variables

        # Apply Feature Scaling and Mean Normalisation
        normaliser = Normaliser()
        normalised_variables = map(normaliser.normalise, raw_variables)

        hypothesis    = self.hypothesis_class(len(normalised_variables))
        cost_function = self.cost_fn_class(raw_data_outputs, self.learning_rate, self.regularisation_coefficient)

        gradient_descent = GradientDescent(hypothesis, cost_function, normalised_variables, raw_data_outputs)
        signal.signal(signal.SIGINT, gradient_descent.interrupt)

        gradient_descent.set_iterations(self.max_iterations)
        if self.err_check:
            gradient_descent.set_error_checking()

        gradient_descent.calculate()

        # Denormalise the calculated theta values
        normalised_thetas = gradient_descent.hypothesis.theta_values
        denormaliser = Denormaliser()
        final_thetas = denormaliser.denormalise(normalised_thetas, normalised_variables)

        # Run hypothesis against original values
        if self.test_data_size > 0:
            log.bar()
            hypothesis.theta_values = final_thetas

            transformer = build_transformer(reader.testing_input_values, self.with_linear, self.other_terms)

            raw_variable_data = zip(*map(lambda v : v.data, transformer.variables))
            for i, o in zip(raw_variable_data, reader.testing_output_values):
                log.info('{0:>8} .... {1: .8f}'.format(o, hypothesis.calculate(i)))

        # Display results
        log.bar()
        log.info('Theta values:')
        log.underline()
        for nv, ht in zip(normalised_variables, final_thetas):
            log.info("{:>8} = {:>16.8f}".format(nv.variable.name, ht))

        log.info('')
        log.info('Completed %s iterations' % (gradient_descent.iterations,))
        log.bar()
    # lr1_confidence = lr1.score(test_features1, test_targets1)
    # print("R2 score1:", lr1_confidence)

elif model == "gradient_descent":
    GOOGL_closing_data = features[:, 5].reshape(-1, 1)
    n = 3

    #Data Processing
    data0 = features[:, 5]
    example0 = data0[:-n].reshape(-1, 1)
    target = GOOGL_closing_data[n:]

    #Train and Test
    train_features, train_targets, test_features, test_targets = metrics.train_test_split(
        example0, target, 0.8)
    gd = GradientDescent()
    gd.fit(train_features, train_targets)
    gd_confidence = gd.score(test_features, test_targets)
    print("R2 score:", gd_confidence)

elif model == "kmeans":
    # Need to make continuous for higher Mutual Info Score
    kmeans = KMeans(2)
    kmeans.fit(trainf)
    labels = kmeans.predict(testf)
    #acc = metrics.adjusted_mutual_info(testt.flatten(), labels)
    print(labels)

    cm = metrics.confusion_matrix(testt.flatten(), labels)
    a = metrics.accuracy(testt.flatten(), labels)
    p, r = metrics.precision_and_recall(testt.flatten(), labels)
示例#34
0
    def go(self):
        log = Log()

        raw_data_lines = open(self.file_name).readlines()

        reader = self.data_reader_class(raw_data_lines, self.test_data_size)

        raw_data_inputs = reader.training_input_values
        raw_data_outputs = reader.training_output_values
        accepted_line_count = reader.accepted_count
        rejected_lines = reader.rejected_lines
        raw_input_var_count = reader.input_var_count

        # Display results of reading data file
        for rejected_line in rejected_lines:
            log.error("Bad input line: " + rejected_line)

        log.bar()
        log.info(
            "Read %s, loaded %s lines, rejected %s, %s input values per line"
            % (self.file_name, accepted_line_count, len(rejected_lines), raw_input_var_count)
        )
        if self.test_data_size > 0:
            log.info("Training set size: %s" % (len(raw_data_inputs),))
            log.info("Test set size:     %s" % (len(reader.testing_input_values),))
        log.info("Press Ctrl-C at any time to stop working and show results")
        log.bar()

        def build_transformer(raw_input_values, with_linear_terms, other_terms):
            # Convert the raw inputs using the transformer functions
            transformer = Transformer(raw_input_values)

            if with_linear_terms:
                transformer.add_linear_terms()

            for name, fn in other_terms.iteritems():
                transformer.add_new_term(name, fn)

            return transformer

        transformer = build_transformer(raw_data_inputs, self.with_linear, self.other_terms)
        raw_variables = transformer.variables

        # Apply Feature Scaling and Mean Normalisation
        normaliser = Normaliser()
        normalised_variables = map(normaliser.normalise, raw_variables)

        hypothesis = self.hypothesis_class(len(normalised_variables))
        cost_function = self.cost_fn_class(raw_data_outputs, self.learning_rate, self.regularisation_coefficient)

        gradient_descent = GradientDescent(hypothesis, cost_function, normalised_variables, raw_data_outputs)
        signal.signal(signal.SIGINT, gradient_descent.interrupt)

        gradient_descent.set_iterations(self.max_iterations)
        if self.err_check:
            gradient_descent.set_error_checking()

        gradient_descent.calculate()

        # Denormalise the calculated theta values
        normalised_thetas = gradient_descent.hypothesis.theta_values
        denormaliser = Denormaliser()
        final_thetas = denormaliser.denormalise(normalised_thetas, normalised_variables)

        # Run hypothesis against original values
        if self.test_data_size > 0:
            log.bar()
            hypothesis.theta_values = final_thetas

            transformer = build_transformer(reader.testing_input_values, self.with_linear, self.other_terms)

            raw_variable_data = zip(*map(lambda v: v.data, transformer.variables))
            for i, o in zip(raw_variable_data, reader.testing_output_values):
                log.info("{0:>8} .... {1: .8f}".format(o, hypothesis.calculate(i)))

        # Display results
        log.bar()
        log.info("Theta values:")
        log.underline()
        for nv, ht in zip(normalised_variables, final_thetas):
            log.info("{:>8} = {:>16.8f}".format(nv.variable.name, ht))

        log.info("")
        log.info("Completed %s iterations" % (gradient_descent.iterations,))
        log.bar()
示例#35
0
 def __init__(self):
     gd = GradientDescent()
     self.maximize_batch = gd.maximize_batch
     self.maximize_stochastic = gd.maximize_stochastic