Exemplo n.º 1
0
    def compute_step(self, parameter, previous_step):
        mean_square_step_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_step_tm1")
        add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
        mean_square_delta_x_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_delta_x_tm1")
        add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER)

        mean_square_step_t = (
            self.decay_rate * mean_square_step_tm1 +
            (1 - self.decay_rate) * tensor.sqr(previous_step)
        )

        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
        delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step

        mean_square_delta_x_t = (
            self.decay_rate * mean_square_delta_x_tm1 +
            (1 - self.decay_rate) * tensor.sqr(delta_x_t)
        )

        step = delta_x_t
        updates = [(mean_square_step_tm1, mean_square_step_t),
                   (mean_square_delta_x_tm1, mean_square_delta_x_t)]
        return step, updates
Exemplo n.º 2
0
    def compute_step(self, parameter, previous_step):
        mean_square_step_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_step_tm1")
        add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
        mean_square_delta_x_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_delta_x_tm1")
        add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER)

        mean_square_step_t = (
            self.decay_rate * mean_square_step_tm1 +
            (1 - self.decay_rate) * tensor.sqr(previous_step)
        )

        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
        delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step

        mean_square_delta_x_t = (
            self.decay_rate * mean_square_delta_x_tm1 +
            (1 - self.decay_rate) * tensor.sqr(delta_x_t)
        )

        step = delta_x_t
        updates = [(mean_square_step_tm1, mean_square_step_t),
                   (mean_square_delta_x_tm1, mean_square_delta_x_t)]
        return step, updates
Exemplo n.º 3
0
 def compute_step(self, parameter, previous_step):
     mean_square_step_tm1 = shared_floatx_zeros_matching(parameter, "mean_square_step_tm1")
     add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
     mean_square_step_t = self.decay_rate * mean_square_step_tm1 + (1 - self.decay_rate) * tensor.sqr(previous_step)
     add_role(mean_square_step_t, ALGORITHM_BUFFER)
     rms_step_t = tensor.maximum(tensor.sqrt(mean_square_step_t), self.epsilon)
     step = previous_step / rms_step_t
     updates = [(mean_square_step_tm1, mean_square_step_t)]
     return step, updates
Exemplo n.º 4
0
    def compute_step(self, parameter, previous_step):
        mean = shared_floatx_zeros_matching(parameter, "mean")
        add_role(mean, ALGORITHM_BUFFER)
        variance = shared_floatx_zeros_matching(parameter, "variance")
        add_role(variance, ALGORITHM_BUFFER)
        time = shared_floatx(0.0, "time")
        add_role(time, ALGORITHM_BUFFER)

        t1 = time + 1
        learning_rate = (
            self.learning_rate * tensor.sqrt((1.0 - (1.0 - self.beta2) ** t1)) / (1.0 - (1.0 - self.beta1) ** t1)
        )
        beta_1t = 1 - (1 - self.beta1) * self.decay_factor ** (t1 - 1)
        mean_t = beta_1t * previous_step + (1.0 - beta_1t) * mean
        variance_t = self.beta2 * tensor.sqr(previous_step) + (1.0 - self.beta2) * variance
        step = learning_rate * mean_t / (tensor.sqrt(variance_t) + self.epsilon)

        updates = [(mean, mean_t), (variance, variance_t), (time, t1)]

        return step, updates
Exemplo n.º 5
0
    def compute_step(self, parameter, previous_step):
        mean = shared_floatx_zeros_matching(parameter, 'mean')
        add_role(mean, ALGORITHM_BUFFER)
        variance = shared_floatx_zeros_matching(parameter, 'variance')
        add_role(variance, ALGORITHM_BUFFER)
        time = shared_floatx(0., 'time')
        add_role(time, ALGORITHM_BUFFER)

        t1 = time + 1
        learning_rate = (self.learning_rate * tensor.sqrt(
            (1. - (1. - self.beta2)**t1)) / (1. - (1. - self.beta1)**t1))
        beta_1t = 1 - (1 - self.beta1) * self.decay_factor**(t1 - 1)
        mean_t = beta_1t * previous_step + (1. - beta_1t) * mean
        variance_t = (self.beta2 * tensor.sqr(previous_step) +
                      (1. - self.beta2) * variance)
        step = (learning_rate * mean_t /
                (tensor.sqrt(variance_t) + self.epsilon))

        updates = [(mean, mean_t), (variance, variance_t), (time, t1)]

        return step, updates
Exemplo n.º 6
0
    def compute_step(self, parameter, previous_step):
        name = "adagrad_sqs"
        if parameter.name:
            name += "_" + parameter.name
        ssq = shared_floatx_zeros_matching(parameter, name=name)
        add_role(ssq, ALGORITHM_BUFFER)

        ssq_t = tensor.sqr(previous_step) + ssq
        step = self.learning_rate * previous_step / (tensor.sqrt(ssq_t) + self.epsilon)

        updates = [(ssq, ssq_t)]

        return step, updates
Exemplo n.º 7
0
 def compute_step(self, parameter, previous_step):
     mean_square_step_tm1 = shared_floatx_zeros_matching(
         parameter, "mean_square_step_tm1")
     add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
     mean_square_step_t = (
         self.decay_rate * mean_square_step_tm1 +
         (1 - self.decay_rate) * tensor.sqr(previous_step))
     add_role(mean_square_step_t, ALGORITHM_BUFFER)
     rms_step_t = tensor.maximum(
         tensor.sqrt(mean_square_step_t), self.epsilon)
     step = previous_step / rms_step_t
     updates = [(mean_square_step_tm1, mean_square_step_t)]
     return step, updates
Exemplo n.º 8
0
    def compute_step(self, parameter, previous_step):
        name = 'adagrad_sqs'
        if parameter.name:
            name += '_' + parameter.name
        ssq = shared_floatx_zeros_matching(parameter, name=name)
        add_role(ssq, ALGORITHM_BUFFER)

        ssq_t = (tensor.sqr(previous_step) + ssq)
        step = (self.learning_rate * previous_step /
                (tensor.sqrt(ssq_t) + self.epsilon))

        updates = [(ssq, ssq_t)]

        return step, updates
Exemplo n.º 9
0
def _create_algorithm_buffer_for(param, *args, **kwargs):
    buf = shared_floatx_zeros_matching(param, *args, **kwargs)
    buf.tag.for_parameter = param
    add_role(buf, ALGORITHM_BUFFER)
    return buf
Exemplo n.º 10
0
 def compute_step(self, parameter, previous_step):
     velocity = shared_floatx_zeros_matching(parameter, "velocity")
     add_role(velocity, ALGORITHM_BUFFER)
     step = self.momentum * velocity + previous_step
     updates = [(velocity, step)]
     return step, updates
Exemplo n.º 11
0
def _create_algorithm_buffer_for(param, *args, **kwargs):
    buf = shared_floatx_zeros_matching(param, *args, **kwargs)
    buf.tag.for_parameter = param
    add_role(buf, ALGORITHM_BUFFER)
    return buf
Exemplo n.º 12
0
 def compute_step(self, parameter, previous_step):
     velocity = shared_floatx_zeros_matching(parameter, "velocity")
     add_role(velocity, ALGORITHM_BUFFER)
     step = self.momentum * velocity + previous_step
     updates = [(velocity, step)]
     return step, updates