Пример #1
0
def abs_error(x: X, theta: Vector, y: Vector) -> Tuple[Scalar, Vector]:
    pred = mult_mv(x.by_sample(), theta)
    diff = diff_vv(pred, y)
    error = sum(map(lambda e: abs(e), diff))

    sign = vector(map(lambda d: signum(d), diff))
    grad = mult_mv(x.by_feature(), sign)

    return error, grad
Пример #2
0
    def _train(self, x: X, y: Vector, cost: Cost, step: float,
               stdev: float) -> Vector:
        x = x.append_ones()
        m = x.nsamples()

        theta = vector(
            map(lambda _: gauss(0., stdev),
                range(x.nfeatures() - 1))) + (gauss(mean(y), stdev), )

        stop_condition = self.stop_condition
        while True:
            error, gradient = cost(x, theta, y)
            error, gradient = error / m, mult_vs(gradient, 1 / m)

            theta = sum_vv(theta, mult_vs(gradient, -step))

            stop_condition, stop = stop_condition.update(gradient, error)
            if stop:
                break
        return theta
Пример #3
0
    def train(self, x: X, y: Vector) -> Vector:
        if self._parameters is None:
            raise RuntimeError('Parameters not set yet')

        x = x.convert(self._basis_functions)
        reg = parametrize(self._regularization,
                          self._parameters.regularization_parameters)
        cost = make_cost(self._basic_cost, reg)

        self._theta = self._train(x, y, cost, self._parameters.gradient_step,
                                  self._parameters.stdev)
        return copy(self._theta)
Пример #4
0
def optimize_model(model: Model, train_set, test_set) -> Model:
    (x, y), (x_test, y_test) = train_set, test_set
    x_test = X(x_test)

    accuracies = defaultdict(list)
    for _ in range(REPETITIONS):
        (x_train, y_train), (x_val, y_val) = split2(x, y, 1 / 2)
        x_train, x_val = X(x_train), X(x_val)

        evaluated = model.evaluate_parameters(x_train, y_train, x_val, y_val,
                                              parameters)
        for e, _, p in evaluated:
            accuracies[p].append(e)

    accuracies = [(p, mean(es)) for p, es in accuracies.items()]
    best_params, train_error = min(accuracies, key=lambda x: x[1])

    model.set_parameters(best_params)
    theta = model.train(X(x), y)
    test_error = model.error(x_test, y_test)

    report_best_parameters(model, theta, train_error, test_error)
    return model
Пример #5
0
    def evaluate_parameters(
        self, x_train: X, y_train: Vector, x_val: X, y_val: Vector,
        parameters: Iterable[Parameters]
    ) -> List[Tuple[float, Vector, Parameters]]:
        x_train = x_train.convert(self._basis_functions)
        x_val = x_val.convert(self._basis_functions).append_ones()

        cost = lambda ps: make_cost(self._basic_cost,
                                    parametrize(self._regularization, ps))

        train = partial(self._train, x_train, y_train)
        trained = map(
            lambda p: train(cost(p.regularization_parameters), p.gradient_step,
                            p.stdev), parameters)

        def evaluate(paired):
            theta, params = paired
            cum_error, _ = self._basic_cost(x_val, theta, y_val)
            error = cum_error / x_val.nsamples()
            return error, theta, params

        evaluated = list(
            map(lambda paired: evaluate(paired), zip(trained, parameters)))
        return evaluated
Пример #6
0
def squared_error(x: X, theta: Vector, y: Vector) -> Tuple[Scalar, Vector]:
    pred = mult_mv(x.by_sample(), theta)
    diff = diff_vv(pred, y)
    error = sum(map(lambda e: 0.5 * e**2, diff))
    grad = mult_mv(x.by_feature(), diff)
    return error, grad
Пример #7
0
 def error(self, x: X, y: Vector) -> float:
     if self._theta is None:
         raise RuntimeError('Not trained yet')
     x = x.convert(self._basis_functions).append_ones()
     error, _ = self._basic_cost(x, self._theta, y)
     return error / len(y)
Пример #8
0
 def predict(self, x: X) -> Vector:
     if self._theta is None:
         raise RuntimeError('Not trained yet')
     x = x.convert(self._basis_functions).append_ones()
     return mult_mv(x.by_sample(), self._theta)
Пример #9
0
    report_best_parameters(model, theta, train_error, test_error)
    return model


def plot_learning_curve(model: Model, data: Tuple[Iterable,
                                                  Iterable]) -> NoReturn:
    plt.ylim(0, 30)
    plt.plot(*data)
    repr = str(model).replace(', ', '\n').strip('{}').replace('\'', '')
    plt.title(repr)
    plt.show()


if __name__ == '__main__':
    x, y = load(Path('../noise.data'))
    x = X(x).normalize(ScalingType.MIN_MAX_1)
    train, test = split2(x.by_sample(), y, 1 / 3)

    optimized_models = (optimize_model(m, train, test) for m in models)

    train_fractions = [0.01, 0.02, 0.03, 0.125, 0.625, 1.]

    for model in optimized_models:
        accuracies = defaultdict(list)
        for tf, _ in product(train_fractions, range(REPETITIONS)):
            (x_train, y_train), (x_test, y_test) = split2(x.by_sample(), y, tf)
            if tf == 1.:
                x_test, y_test = x_train, y_train
            x_train, x_test = X(x_train), X(x_test)

            model.train(x_train, y_train)