示例#1
0
 def __init__(self,
              decay_rate=0.95,
              learning_rate=2e-3,
              epsilon=1e-6,
              gradient_clip_norm=None,
              gradient_clip_value=None,
              name=None,
              schedule=Schedule()):
     """
     Class constructor.
     Arguments:
         decay_rate (float): decay rate of states
         learning_rate (float): the multiplication coefficent of updates
         epsilon (float): smoothing epsilon to avoid divide by zeros
         gradient_clip_norm (float, optional): Target gradient norm.
                                               Defaults to None.
         gradient_clip_value (float, optional): Value to element-wise clip
                                                gradients.
                                                Defaults to None.
         schedule (neon.optimizers.optimizer.Schedule, optional): Learning rate schedule.
                                                                  Defaults to a constant.
     Notes:
         Only constant learning rate is supported currently.
     """
     super(RMSProp, self).__init__(name=name)
     self.state_list = None
     self.epsilon = epsilon
     self.decay_rate = decay_rate
     self.schedule = schedule
     self.gradient_clip_norm = gradient_clip_norm
     self.gradient_clip_value = gradient_clip_value
     self.learning_rate = ng.persistent_tensor(
         axes=(), initial_value=learning_rate).named('lrate')
示例#2
0
def test_persistent_tensor():
    input_axes = ng.make_axes([
        ng.make_axis(10),
        ng.make_axis(3)
    ])
    bgr = ng.persistent_tensor(
        axes=input_axes,
        initial_value=np.array([113.9, 123.0, 125.3]))
    bgr_comp = ng.computation(bgr, "all")

    results = dict()
    weight_saver = Saver()
    with closing(ngt.make_transformer()) as transformer:
        bgr_func = transformer.add_computation(bgr_comp)
        weight_saver.setup_save(transformer=transformer, computation=bgr_comp)
        results['saved'] = bgr_func().copy()
        weight_saver.save(filename="test_persistent_tensor")
    with closing(ngt.make_transformer()) as restore_transformer:
        bgr_refunc = restore_transformer.add_computation(bgr_comp)
        weight_saver.setup_restore(transformer=restore_transformer, computation=bgr_comp,
                                   filename="test_persistent_tensor")
        weight_saver.restore()
        results['restored'] = bgr_refunc().copy()
    os.remove("test_persistent_tensor.npz")
    assert np.allclose(results['saved'], results['restored'], atol=0)
示例#3
0
    def __call__(self, cost_func):
        with ng.Op.saved_user_deps():
            velocity_updates, param_updates = [], []
            batch_cost = ng.sum(cost_func, out_axes=())
            batch_size = cost_func.axes.batch_axes()[0].length
            scale_factor = 1

            for variable in batch_cost.variables():
                grad = clip_gradient_value(
                    ng.deriv(batch_cost, variable) / batch_size,
                    self.gradient_clip_value)

                velocity = ng.persistent_tensor(
                    axes=variable.axes,
                    initial_value=0.).named(variable.name + '_vel')
                velocity_updates.append(
                    ng.assign(
                        velocity,
                        velocity * self.momentum_coef - self.learning_rate *
                        (scale_factor * grad + self.wdecay * variable)))

                param_updates.append(ng.assign(variable, variable + velocity))

            lr_update = [
                ng.assign(
                    self.learning_rate,
                    self.schedule.get_learning_rate(self.learning_rate,
                                                    self.iteration_index))
            ]

            updates = ng.doall(velocity_updates + param_updates + lr_update)
            self.iteration_index += 1

        return updates
示例#4
0
    def __call__(self, cost_func):
        all_updates = []
        batch_cost = ng.sum(cost_func, out_axes=())
        batch_size = cost_func.axes.batch_axes()[0].length

        grads = [
            ng.deriv(batch_cost, v) / batch_size
            for v in batch_cost.variables()
        ]
        scale_factor = clip_gradient_norm(grads, batch_size,
                                          self.gradient_clip_norm)

        epsilon, decay = (self.epsilon, self.decay_rate)
        for i, (variable, grad) in enumerate(zip(batch_cost.variables(),
                                                 grads)):
            grad = clip_gradient_value(grad, self.gradient_clip_value)
            state = ng.persistent_tensor(axes=variable.axes, initial_value=0.)
            all_updates.append(
                ng.sequential([
                    ng.assign(state,
                              decay * state + (1.0 - decay) * ng.square(grad)),
                    ng.assign(
                        variable,
                        variable - ((scale_factor * grad * self.lrate) /
                                    (ng.sqrt(state + epsilon) + epsilon)))
                ]))

        return ng.doall(all_updates)
def cifar_mean_subtract(x):
    # Assign roles
    bgr_mean = ng.persistent_tensor(
        axes=[x.axes.channel_axis()],
        initial_value=np.array([104., 119., 127.]))

    return (x - bgr_mean) / 255.
示例#6
0
 def __call__(self, cost_func):
     all_updates = []
     batch_cost = ng.sum(cost_func, out_axes=())
     batch_size = cost_func.axes.batch_axes()[0].length
     grads = [
         ng.deriv(batch_cost, v) / batch_size
         for v in batch_cost.variables()
     ]
     scale_factor = clip_gradient_norm(grads, batch_size,
                                       self.gradient_clip_norm)
     for variable, grad in zip(batch_cost.variables(), grads):
         updates = []
         velocity = ng.persistent_tensor(
             axes=variable.axes,
             initial_value=0.).named(variable.name + '_vel')
         clip_grad = clip_gradient_value(grad, self.gradient_clip_value)
         lr = -self.lrate * (scale_factor * clip_grad +
                             self.wdecay * variable)
         updates.append(
             ng.assign(velocity, velocity * self.momentum_coef + lr))
         if self.nesterov:
             delta = (self.momentum_coef * velocity + lr)
         else:
             delta = velocity
         updates.append(ng.assign(variable, variable + delta))
         all_updates.append(ng.sequential(updates))
     return ng.doall(all_updates)
示例#7
0
    def UniformFill(self, c2_op, inputs):
        """
        Creates a constant tensor with uniform fill.

        Arguments:
            c2_op: OperatorDef object, the caffe2 node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the caffe2 node.

        Inputs to c2_op:
            value, dtype, shape, name
        """

        # parse protobuf arguments
        args = {arg.name: arg for arg in c2_op.arg}

        # convert to numpy value
        np_val = np.random.uniform(args["min"].f, args["max"].f,
                                   tuple(args["shape"].ints))

        ng_const = make_const_op(np_val, np_val.shape,
                                 c2_op.name)  # TODO simplify
        ng_placeholder = ng.persistent_tensor(axes=ng_const.axes,
                                              initial_value=ng_const)
        return ng_placeholder
示例#8
0
    def GaussianFill(self, c2_op, inputs):
        """
        Creates a constant tensor with Gaussian fill.

        Arguments:
            c2_op: OperatorDef object, the caffe2 node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the caffe2 node.

        Inputs to c2_op:
            value, dtype, shape, name
        """
        # parse protobuf arguments
        args = {arg.name: arg for arg in c2_op.arg}

        mean = args["mean"].f if "mean" in args.keys() else 0
        std = args["std"].f if "std" in args.keys() else 1

        # convert to numpy value
        np_val = np.random.normal(mean, std, tuple(args["shape"].ints))

        ng_const = make_const_op(np_val, np_val.shape,
                                 c2_op.name)  # TODO simplify
        ng_placeholder = ng.persistent_tensor(axes=ng_const.axes,
                                              initial_value=ng_const)
        return ng_placeholder
示例#9
0
    def ConstantFill(self, c2_op, inputs):
        """
        Creates a constant tensor with constant fill.

        Arguments:
            c2_op: OperatorDef object, the caffe2 node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the caffe2 node.

        Inputs to c2_op:
            value, dtype, shape, name
        """

        # parse protobuf arguments
        args = {arg.name: arg for arg in c2_op.arg}

        value = args["value"].i if ("dtype" in args.keys()
                                    and args["dtype"].i == c2core.DataType.INT32) \
            else args["value"].f
        # convert to numpy value
        np_val = np.full(tuple(args["shape"].ints), value)

        ng_const = make_const_op(np_val, np_val.shape,
                                 c2_op.name)  # TODO simplify
        ng_placeholder = ng.persistent_tensor(axes=ng_const.axes,
                                              initial_value=ng_const)
        return ng_placeholder
示例#10
0
    def GivenTensorFill(self, c2_op, inputs):
        """
        Creates a constant tensor with values provided.

        Arguments:
            c2_op: OperatorDef object, the caffe2 node to convert.
            inputs: List of ngraph Ops as inputs to this node.

        Returns:
            A ngraph Op corresponding to the caffe2 node.

        Inputs to c2_op:
            value, dtype, shape, name
        """
        # parse arguments
        args = {arg.name: arg for arg in c2_op.arg}
        # convert to numpy value
        values = [v for v in args["values"].floats]
        shape = [s for s in args["shape"].ints]
        np_init = np.array(values)
        np_val = np.ndarray(shape)
        np_val[:] = np_init.reshape(shape)[:]

        ng_const = make_const_op(np_val, np_val.shape,
                                 c2_op.name)  # TODO simplify
        ng_placeholder = ng.persistent_tensor(axes=ng_const.axes,
                                              initial_value=ng_const)
        return ng_placeholder
示例#11
0
    def __call__(self, in_obj, keep=None, **kwargs):

        if self.mask is None:
            in_axes = in_obj.axes.sample_axes()
            self.mask = ng.persistent_tensor(axes=in_axes).named('mask')
        self.mask = ng.less_equal(ng.uniform(self.mask, low=0.0, high=1.0),
                                  keep)
        return ng.multiply(self.mask, in_obj) * (1. / keep)
示例#12
0
 def __call__(self, in_obj):
     if Layer.inference_mode:
         return self.keep * in_obj
     else:
         if self.mask is None:
             in_axes = in_obj.axes.sample_axes()
             self.mask = ng.persistent_tensor(axes=in_axes).named('mask')
         self.mask = ng.uniform(self.mask, low=0.0, high=1.0) <= self.keep
         return self.mask * in_obj
示例#13
0
文件: layer.py 项目: kkasravi/ngraph
 def __init__(self, rho=0.9, eps=1e-3, **kwargs):
     # rho needs to be allocated storage because it will be changed dynamically during tuning
     self.rho = ng.persistent_tensor(axes=(),
                                     initial_value=rho).named('rho')
     self.eps = eps
     self.gamma = None
     self.beta = None
     self.gmean = None
     self.gvar = None
示例#14
0
 def variable_update(self, variable, grad, scale_factor):
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=grad.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(state, state + ng.square(grad)),
         ng.assign(
             variable, variable - (scale_factor * self.lrate * grad) /
             (ng.sqrt(state + self.epsilon)))
     ])
     return updates
示例#15
0
 def variable_update(self, variable, grad, scale_factor):
     epsilon, decay = (self.epsilon, self.decay_rate)
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=variable.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)),
         ng.assign(variable, variable - ((scale_factor * grad * self.lrate)
                                         / (ng.sqrt(state + epsilon) + epsilon)))
     ])
     return updates
示例#16
0
    def train_outputs(self, in_obj):
        in_axes = in_obj.axes.sample_axes()
        red_axes = ng.make_axes()
        if len(in_axes.role_axes(ar.Channel)) != 0:
            red_axes += in_axes.sample_axes() - in_axes.role_axes(ar.Channel)
        red_axes += in_obj.axes.batch_axes()
        out_axes = in_axes - red_axes

        self.gamma = self.gamma or ng.variable(axes=out_axes, initial_value=1.0).named('gamma')
        self.beta = self.beta or ng.variable(axes=out_axes, initial_value=0.0).named('beta')
        self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes, initial_value=1.0)
        self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes, initial_value=1.0)

        xmean = ng.mean(in_obj, reduction_axes=red_axes)
        xvar = ng.variance(in_obj, reduction_axes=red_axes)
        ng.assign(self.gmean, self.gmean * self.rho + xmean * (1.0 - self.rho))
        ng.assign(self.gvar, self.gvar * self.rho + xvar * (1.0 - self.rho))

        return self.gamma * (in_obj - xmean) / ng.sqrt(xvar + self.eps) + self.beta
示例#17
0
def test_write_state():
    """
    This reads back a tensor set from an argument. No code is generated.
    """
    with ExecutorFactory() as ex:
        N = ng.make_axis(3, name='N')
        x_np = np.ones((N.length)) * 4
        x = ng.persistent_tensor([N]).named('x')
        f = ex.executor(x, x)
        x_val = f(x_np)
        assert np.allclose(x_np, x_val)
 def __call__(self, in_obj, **kwargs):
     if Layer.inference_mode:
         return self.keep * in_obj
     else:
         if self.mask is None:
             in_axes = in_obj.axes.sample_axes()
             channel_axes = ng.make_axes([in_axes.channel_axis()])
             self.mask = ng.persistent_tensor(
                 axes=channel_axes).named('channel_mask')
         self.mask = ng.uniform(self.mask, low=0.0, high=1.0) <= self.keep
         return self.mask * in_obj
示例#19
0
    def __call__(self, in_obj):
        if not self.initialized:
            w_axis = ng.make_axis()
            self.weight = ng.variable(axes=[w_axis],
                                      initial_value=2,
                                      metadata={"label": LABELS["weight"]})
            self.side_effect = ng.persistent_tensor(axes=[w_axis],
                                                    initial_value=0)

        return ng.sequential([ng.assign(self.side_effect, self.weight),
                              self.weight * in_obj])
示例#20
0
    def __call__(self, *args, **kwargs):
        if len(self.ops) == 0:
            self.beta_1 = ng.constant(self.beta_1, dtype=np.float32)
            self.beta_2 = ng.constant(self.beta_2, dtype=np.float32)
            self.t = ng.persistent_tensor(axes=(), initial_value=0)

        self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t])
        self.ell = self.lrate * ng.sqrt(1 - self.beta_2**self.t) / (
            1 - self.beta_1**self.t)

        return super(Adam, self).__call__(*args, **kwargs)
示例#21
0
 def variable_update(self, variable, grad, scale_factor):
     updates = []
     velocity = ng.persistent_tensor(
         axes=variable.axes, initial_value=0.).named(variable.name + '_vel')
     clip_grad = clip_gradient_value(grad, self.gradient_clip_value)
     lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable)
     updates.append(ng.assign(velocity, velocity * self.momentum_coef + lr))
     if self.nesterov:
         delta = (self.momentum_coef * velocity + lr)
     else:
         delta = velocity
     updates.append(ng.assign(variable, variable + delta))
     return ng.sequential(updates)
示例#22
0
def test_uniform_range_posneg(transformer_factory):
    """TODO."""
    M = ng.make_axis(5, name='M')
    N = ng.make_axis(8, name='N')

    ng_a = ng.persistent_tensor([M, N], initial_value=10.0)
    ng_a = ng.uniform(ng_a, low=-0.5, high=0.5)

    result = executor(ng_a)()
    print(result)

    assert np.all(result < 0.5)
    assert np.all(result >= -0.5)
    assert not np.all(result >= 0.0)
示例#23
0
def test_scope_ops(input_placeholder):
    """
    Test scope_ops creates a subgraph with correct attributes
    """

    with scope_ops(name="foo") as subgraph:
        w = ng.variable(ng.make_axis(), initial_value=1, name="W")
        y = w * input_placeholder
        z = y + 4
        v1 = ng.persistent_tensor(w.axes, initial_value=0, name="effect1")
        v2 = ng.persistent_tensor(w.axes, initial_value=0, name="effect2")
        ng.sequential([ng.assign(v1, w), ng.assign(v2, w), z.named("output")])

    assert len(subgraph.inputs) == 1
    assert input_placeholder.unscoped_name in subgraph.inputs

    assert len(subgraph.variables) == 1
    assert "W" in subgraph.variables

    assert len(subgraph.outputs) == 1
    assert "output" in subgraph.outputs

    assert len(subgraph.side_effects) == 2
示例#24
0
 def variable_update(self, variable, grad, scale_factor):
     updates = []
     velocity = ng.persistent_tensor(
         axes=variable.axes, initial_value=0.).named(variable.name + '_vel')
     # add metadata to the gradient node indicating that
     # it should be reduced across data-parallel workers before used for optimization
     grad.metadata['reduce_func'] = 'sum'
     clip_grad = clip_gradient_value(grad, self.gradient_clip_value)
     lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable)
     updates.append(ng.assign(velocity, velocity * self.momentum_coef + lr))
     if self.nesterov:
         delta = (self.momentum_coef * velocity + lr)
     else:
         delta = velocity
     updates.append(ng.assign(variable, variable + delta))
     return ng.sequential(updates)
示例#25
0
    def __call__(self, cost_func):
        with ng.Op.saved_user_deps():
            state_updates, param_updates = [], []
            batch_cost = ng.sum(cost_func, out_axes=())
            batch_size = cost_func.axes.batch_axes()[0].length

            grads = [
                ng.deriv(batch_cost, v) / batch_size
                for v in batch_cost.variables()
            ]
            scale_factor = clip_gradient_norm(
                grads) if self.gradient_clip_norm else 1

            epsilon, decay = (self.epsilon, self.decay_rate)
            for i, (variable,
                    grad) in enumerate(zip(batch_cost.variables(), grads)):
                grad = clip_gradient_value(grad, self.gradient_clip_value)

                state = ng.persistent_tensor(axes=variable.axes,
                                             initial_value=0.)
                state_updates.append(
                    ng.assign(lvalue=state,
                              rvalue=decay * state +
                              (1.0 - decay) * ng.square(grad)).named(
                                  'state_u_%s' % i))

                param_updates.append(
                    ng.assign(
                        lvalue=variable,
                        rvalue=variable -
                        ((scale_factor * grad * self.learning_rate) /
                         (ng.sqrt(state + epsilon) + epsilon)),
                    ).named('var_u_%s' % i))

            lr_update = [
                ng.assign(
                    self.learning_rate,
                    self.schedule.get_learning_rate(self.learning_rate,
                                                    self.iteration_index))
            ]

            updates = ng.doall(state_updates + param_updates + lr_update)
            self.iteration_index += 1

        return updates
示例#26
0
 def __init__(self,
              learning_rate,
              momentum_coef=0.0,
              stochastic_round=False,
              wdecay=0.0,
              gradient_clip_norm=None,
              gradient_clip_value=None,
              name=None,
              schedule=Schedule(),
              **kwargs):
     super(GradientDescentMomentum, self).__init__(**kwargs)
     self.momentum_coef = momentum_coef
     self.gradient_clip_norm = gradient_clip_norm
     self.gradient_clip_value = gradient_clip_value
     self.wdecay = wdecay
     self.schedule = schedule
     self.stochastic_round = stochastic_round
     self.learning_rate = ng.persistent_tensor(
         axes=(), initial_value=learning_rate).named('lrate')
示例#27
0
def test_normal_negative_mean():
    """TODO."""
    M = ng.make_axis(100).named('M')
    N = ng.make_axis(100).named('N')

    mean = -0.5
    std = 1.0

    ng_a = ng.persistent_tensor([M, N], initial_value=10.0)
    ng_a = ng.normal(ng_a, loc=mean, scale=std)

    with executor(ng_a) as ex:
        result = ex()
    print(np.mean(result))
    print(np.std(result))

    assert np.allclose(np.mean(result), mean, rtol=0.1, atol=0.02)
    assert np.allclose(np.std(result), std, rtol=0.1, atol=0.02)
    assert not np.all(result >= 0.0)
    assert not np.all(result < 0.0)
示例#28
0
def test_specific_slice_deriv():
    #
    with ExecutorFactory() as ex:
        A = ng.make_axis(name='A', length=3)
        B = ng.make_axis(name='B', length=4)
        np_shape = (A.length, B.length)
        x_np = np.empty(np_shape, dtype=np.float32)
        for i in range(A.length):
            for j in range(B.length):
                x_np[i, j] = 10 * i + j
        x_ng = ng.persistent_tensor([A, B], initial_value=x_np)
        for i in range(A.length):
            for j in range(B.length):
                slice = ng.tensor_slice(x_ng, (i, j))
                dslice_dx = ng.deriv(slice, x_ng)
                dslice_dx_fun = ex.executor(dslice_dx)
                dslice_dx_val = dslice_dx_fun()
                dslice_dx_np = np.zeros_like(x_np)
                dslice_dx_np[i, j] = 1
                ng.testing.assert_allclose(dslice_dx_val, dslice_dx_np)
示例#29
0
def input_tensor():
    axes = ng.make_axes([ng.make_axis(length=5), ng.make_axis(length=8)])
    return ng.persistent_tensor(axes, initial_value=10.0)
示例#30
0
def cifar_mean_subtract(x):
    bgr_mean = ng.persistent_tensor(axes=x.axes[0],
                                    initial_value=np.array([[104., 119.,
                                                             127.]]))
    y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1)
    return y