def __init__(self, decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_norm=None, gradient_clip_value=None, name=None, schedule=Schedule()): """ Class constructor. Arguments: decay_rate (float): decay rate of states learning_rate (float): the multiplication coefficent of updates epsilon (float): smoothing epsilon to avoid divide by zeros gradient_clip_norm (float, optional): Target gradient norm. Defaults to None. gradient_clip_value (float, optional): Value to element-wise clip gradients. Defaults to None. schedule (neon.optimizers.optimizer.Schedule, optional): Learning rate schedule. Defaults to a constant. Notes: Only constant learning rate is supported currently. """ super(RMSProp, self).__init__(name=name) self.state_list = None self.epsilon = epsilon self.decay_rate = decay_rate self.schedule = schedule self.gradient_clip_norm = gradient_clip_norm self.gradient_clip_value = gradient_clip_value self.learning_rate = ng.persistent_tensor( axes=(), initial_value=learning_rate).named('lrate')
def test_persistent_tensor(): input_axes = ng.make_axes([ ng.make_axis(10), ng.make_axis(3) ]) bgr = ng.persistent_tensor( axes=input_axes, initial_value=np.array([113.9, 123.0, 125.3])) bgr_comp = ng.computation(bgr, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: bgr_func = transformer.add_computation(bgr_comp) weight_saver.setup_save(transformer=transformer, computation=bgr_comp) results['saved'] = bgr_func().copy() weight_saver.save(filename="test_persistent_tensor") with closing(ngt.make_transformer()) as restore_transformer: bgr_refunc = restore_transformer.add_computation(bgr_comp) weight_saver.setup_restore(transformer=restore_transformer, computation=bgr_comp, filename="test_persistent_tensor") weight_saver.restore() results['restored'] = bgr_refunc().copy() os.remove("test_persistent_tensor.npz") assert np.allclose(results['saved'], results['restored'], atol=0)
def __call__(self, cost_func): with ng.Op.saved_user_deps(): velocity_updates, param_updates = [], [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length scale_factor = 1 for variable in batch_cost.variables(): grad = clip_gradient_value( ng.deriv(batch_cost, variable) / batch_size, self.gradient_clip_value) velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') velocity_updates.append( ng.assign( velocity, velocity * self.momentum_coef - self.learning_rate * (scale_factor * grad + self.wdecay * variable))) param_updates.append(ng.assign(variable, variable + velocity)) lr_update = [ ng.assign( self.learning_rate, self.schedule.get_learning_rate(self.learning_rate, self.iteration_index)) ] updates = ng.doall(velocity_updates + param_updates + lr_update) self.iteration_index += 1 return updates
def __call__(self, cost_func): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) all_updates.append( ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign( variable, variable - ((scale_factor * grad * self.lrate) / (ng.sqrt(state + epsilon) + epsilon))) ])) return ng.doall(all_updates)
def cifar_mean_subtract(x): # Assign roles bgr_mean = ng.persistent_tensor( axes=[x.axes.channel_axis()], initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255.
def __call__(self, cost_func): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) for variable, grad in zip(batch_cost.variables(), grads): updates = [] velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') clip_grad = clip_gradient_value(grad, self.gradient_clip_value) lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable) updates.append( ng.assign(velocity, velocity * self.momentum_coef + lr)) if self.nesterov: delta = (self.momentum_coef * velocity + lr) else: delta = velocity updates.append(ng.assign(variable, variable + delta)) all_updates.append(ng.sequential(updates)) return ng.doall(all_updates)
def UniformFill(self, c2_op, inputs): """ Creates a constant tensor with uniform fill. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: value, dtype, shape, name """ # parse protobuf arguments args = {arg.name: arg for arg in c2_op.arg} # convert to numpy value np_val = np.random.uniform(args["min"].f, args["max"].f, tuple(args["shape"].ints)) ng_const = make_const_op(np_val, np_val.shape, c2_op.name) # TODO simplify ng_placeholder = ng.persistent_tensor(axes=ng_const.axes, initial_value=ng_const) return ng_placeholder
def GaussianFill(self, c2_op, inputs): """ Creates a constant tensor with Gaussian fill. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: value, dtype, shape, name """ # parse protobuf arguments args = {arg.name: arg for arg in c2_op.arg} mean = args["mean"].f if "mean" in args.keys() else 0 std = args["std"].f if "std" in args.keys() else 1 # convert to numpy value np_val = np.random.normal(mean, std, tuple(args["shape"].ints)) ng_const = make_const_op(np_val, np_val.shape, c2_op.name) # TODO simplify ng_placeholder = ng.persistent_tensor(axes=ng_const.axes, initial_value=ng_const) return ng_placeholder
def ConstantFill(self, c2_op, inputs): """ Creates a constant tensor with constant fill. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: value, dtype, shape, name """ # parse protobuf arguments args = {arg.name: arg for arg in c2_op.arg} value = args["value"].i if ("dtype" in args.keys() and args["dtype"].i == c2core.DataType.INT32) \ else args["value"].f # convert to numpy value np_val = np.full(tuple(args["shape"].ints), value) ng_const = make_const_op(np_val, np_val.shape, c2_op.name) # TODO simplify ng_placeholder = ng.persistent_tensor(axes=ng_const.axes, initial_value=ng_const) return ng_placeholder
def GivenTensorFill(self, c2_op, inputs): """ Creates a constant tensor with values provided. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. Inputs to c2_op: value, dtype, shape, name """ # parse arguments args = {arg.name: arg for arg in c2_op.arg} # convert to numpy value values = [v for v in args["values"].floats] shape = [s for s in args["shape"].ints] np_init = np.array(values) np_val = np.ndarray(shape) np_val[:] = np_init.reshape(shape)[:] ng_const = make_const_op(np_val, np_val.shape, c2_op.name) # TODO simplify ng_placeholder = ng.persistent_tensor(axes=ng_const.axes, initial_value=ng_const) return ng_placeholder
def __call__(self, in_obj, keep=None, **kwargs): if self.mask is None: in_axes = in_obj.axes.sample_axes() self.mask = ng.persistent_tensor(axes=in_axes).named('mask') self.mask = ng.less_equal(ng.uniform(self.mask, low=0.0, high=1.0), keep) return ng.multiply(self.mask, in_obj) * (1. / keep)
def __call__(self, in_obj): if Layer.inference_mode: return self.keep * in_obj else: if self.mask is None: in_axes = in_obj.axes.sample_axes() self.mask = ng.persistent_tensor(axes=in_axes).named('mask') self.mask = ng.uniform(self.mask, low=0.0, high=1.0) <= self.keep return self.mask * in_obj
def __init__(self, rho=0.9, eps=1e-3, **kwargs): # rho needs to be allocated storage because it will be changed dynamically during tuning self.rho = ng.persistent_tensor(axes=(), initial_value=rho).named('rho') self.eps = eps self.gamma = None self.beta = None self.gmean = None self.gvar = None
def variable_update(self, variable, grad, scale_factor): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, state + ng.square(grad)), ng.assign( variable, variable - (scale_factor * self.lrate * grad) / (ng.sqrt(state + self.epsilon))) ]) return updates
def variable_update(self, variable, grad, scale_factor): epsilon, decay = (self.epsilon, self.decay_rate) grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign(variable, variable - ((scale_factor * grad * self.lrate) / (ng.sqrt(state + epsilon) + epsilon))) ]) return updates
def train_outputs(self, in_obj): in_axes = in_obj.axes.sample_axes() red_axes = ng.make_axes() if len(in_axes.role_axes(ar.Channel)) != 0: red_axes += in_axes.sample_axes() - in_axes.role_axes(ar.Channel) red_axes += in_obj.axes.batch_axes() out_axes = in_axes - red_axes self.gamma = self.gamma or ng.variable(axes=out_axes, initial_value=1.0).named('gamma') self.beta = self.beta or ng.variable(axes=out_axes, initial_value=0.0).named('beta') self.gvar = self.gvar or ng.persistent_tensor(axes=out_axes, initial_value=1.0) self.gmean = self.gmean or ng.persistent_tensor(axes=out_axes, initial_value=1.0) xmean = ng.mean(in_obj, reduction_axes=red_axes) xvar = ng.variance(in_obj, reduction_axes=red_axes) ng.assign(self.gmean, self.gmean * self.rho + xmean * (1.0 - self.rho)) ng.assign(self.gvar, self.gvar * self.rho + xvar * (1.0 - self.rho)) return self.gamma * (in_obj - xmean) / ng.sqrt(xvar + self.eps) + self.beta
def test_write_state(): """ This reads back a tensor set from an argument. No code is generated. """ with ExecutorFactory() as ex: N = ng.make_axis(3, name='N') x_np = np.ones((N.length)) * 4 x = ng.persistent_tensor([N]).named('x') f = ex.executor(x, x) x_val = f(x_np) assert np.allclose(x_np, x_val)
def __call__(self, in_obj, **kwargs): if Layer.inference_mode: return self.keep * in_obj else: if self.mask is None: in_axes = in_obj.axes.sample_axes() channel_axes = ng.make_axes([in_axes.channel_axis()]) self.mask = ng.persistent_tensor( axes=channel_axes).named('channel_mask') self.mask = ng.uniform(self.mask, low=0.0, high=1.0) <= self.keep return self.mask * in_obj
def __call__(self, in_obj): if not self.initialized: w_axis = ng.make_axis() self.weight = ng.variable(axes=[w_axis], initial_value=2, metadata={"label": LABELS["weight"]}) self.side_effect = ng.persistent_tensor(axes=[w_axis], initial_value=0) return ng.sequential([ng.assign(self.side_effect, self.weight), self.weight * in_obj])
def __call__(self, *args, **kwargs): if len(self.ops) == 0: self.beta_1 = ng.constant(self.beta_1, dtype=np.float32) self.beta_2 = ng.constant(self.beta_2, dtype=np.float32) self.t = ng.persistent_tensor(axes=(), initial_value=0) self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t]) self.ell = self.lrate * ng.sqrt(1 - self.beta_2**self.t) / ( 1 - self.beta_1**self.t) return super(Adam, self).__call__(*args, **kwargs)
def variable_update(self, variable, grad, scale_factor): updates = [] velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') clip_grad = clip_gradient_value(grad, self.gradient_clip_value) lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable) updates.append(ng.assign(velocity, velocity * self.momentum_coef + lr)) if self.nesterov: delta = (self.momentum_coef * velocity + lr) else: delta = velocity updates.append(ng.assign(variable, variable + delta)) return ng.sequential(updates)
def test_uniform_range_posneg(transformer_factory): """TODO.""" M = ng.make_axis(5, name='M') N = ng.make_axis(8, name='N') ng_a = ng.persistent_tensor([M, N], initial_value=10.0) ng_a = ng.uniform(ng_a, low=-0.5, high=0.5) result = executor(ng_a)() print(result) assert np.all(result < 0.5) assert np.all(result >= -0.5) assert not np.all(result >= 0.0)
def test_scope_ops(input_placeholder): """ Test scope_ops creates a subgraph with correct attributes """ with scope_ops(name="foo") as subgraph: w = ng.variable(ng.make_axis(), initial_value=1, name="W") y = w * input_placeholder z = y + 4 v1 = ng.persistent_tensor(w.axes, initial_value=0, name="effect1") v2 = ng.persistent_tensor(w.axes, initial_value=0, name="effect2") ng.sequential([ng.assign(v1, w), ng.assign(v2, w), z.named("output")]) assert len(subgraph.inputs) == 1 assert input_placeholder.unscoped_name in subgraph.inputs assert len(subgraph.variables) == 1 assert "W" in subgraph.variables assert len(subgraph.outputs) == 1 assert "output" in subgraph.outputs assert len(subgraph.side_effects) == 2
def variable_update(self, variable, grad, scale_factor): updates = [] velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') # add metadata to the gradient node indicating that # it should be reduced across data-parallel workers before used for optimization grad.metadata['reduce_func'] = 'sum' clip_grad = clip_gradient_value(grad, self.gradient_clip_value) lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable) updates.append(ng.assign(velocity, velocity * self.momentum_coef + lr)) if self.nesterov: delta = (self.momentum_coef * velocity + lr) else: delta = velocity updates.append(ng.assign(variable, variable + delta)) return ng.sequential(updates)
def __call__(self, cost_func): with ng.Op.saved_user_deps(): state_updates, param_updates = [], [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm( grads) if self.gradient_clip_norm else 1 epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) state_updates.append( ng.assign(lvalue=state, rvalue=decay * state + (1.0 - decay) * ng.square(grad)).named( 'state_u_%s' % i)) param_updates.append( ng.assign( lvalue=variable, rvalue=variable - ((scale_factor * grad * self.learning_rate) / (ng.sqrt(state + epsilon) + epsilon)), ).named('var_u_%s' % i)) lr_update = [ ng.assign( self.learning_rate, self.schedule.get_learning_rate(self.learning_rate, self.iteration_index)) ] updates = ng.doall(state_updates + param_updates + lr_update) self.iteration_index += 1 return updates
def __init__(self, learning_rate, momentum_coef=0.0, stochastic_round=False, wdecay=0.0, gradient_clip_norm=None, gradient_clip_value=None, name=None, schedule=Schedule(), **kwargs): super(GradientDescentMomentum, self).__init__(**kwargs) self.momentum_coef = momentum_coef self.gradient_clip_norm = gradient_clip_norm self.gradient_clip_value = gradient_clip_value self.wdecay = wdecay self.schedule = schedule self.stochastic_round = stochastic_round self.learning_rate = ng.persistent_tensor( axes=(), initial_value=learning_rate).named('lrate')
def test_normal_negative_mean(): """TODO.""" M = ng.make_axis(100).named('M') N = ng.make_axis(100).named('N') mean = -0.5 std = 1.0 ng_a = ng.persistent_tensor([M, N], initial_value=10.0) ng_a = ng.normal(ng_a, loc=mean, scale=std) with executor(ng_a) as ex: result = ex() print(np.mean(result)) print(np.std(result)) assert np.allclose(np.mean(result), mean, rtol=0.1, atol=0.02) assert np.allclose(np.std(result), std, rtol=0.1, atol=0.02) assert not np.all(result >= 0.0) assert not np.all(result < 0.0)
def test_specific_slice_deriv(): # with ExecutorFactory() as ex: A = ng.make_axis(name='A', length=3) B = ng.make_axis(name='B', length=4) np_shape = (A.length, B.length) x_np = np.empty(np_shape, dtype=np.float32) for i in range(A.length): for j in range(B.length): x_np[i, j] = 10 * i + j x_ng = ng.persistent_tensor([A, B], initial_value=x_np) for i in range(A.length): for j in range(B.length): slice = ng.tensor_slice(x_ng, (i, j)) dslice_dx = ng.deriv(slice, x_ng) dslice_dx_fun = ex.executor(dslice_dx) dslice_dx_val = dslice_dx_fun() dslice_dx_np = np.zeros_like(x_np) dslice_dx_np[i, j] = 1 ng.testing.assert_allclose(dslice_dx_val, dslice_dx_np)
def input_tensor(): axes = ng.make_axes([ng.make_axis(length=5), ng.make_axis(length=8)]) return ng.persistent_tensor(axes, initial_value=10.0)
def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor(axes=x.axes[0], initial_value=np.array([[104., 119., 127.]])) y = ng.expand_dims((x - bgr_mean) / 255., ax.D, 1) return y