def build_regressor_computations(): train_preds = predictions(encoder, affine_layer, inputs['X']) train_loss = ng.squared_L2(train_preds - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): eval_preds = predictions(encoder, affine_layer, inputs['X']) eval_loss = ng.mean(ng.squared_L2(eval_preds - inputs['y']), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def build_seq2seq_computations(): # Training loss, optimizer train_decoded = recurrent_model.encode_and_decode(encoder, decoder, inputs['X'], previous) train_loss = ng.squared_L2(target - train_decoded) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Evaluation loss with Layer.inference_mode_on(): eval_decoded = recurrent_model.encode_and_generate(encoder, decoder, inputs['X'], in_axes) eval_loss = ng.mean(ng.squared_L2(target - eval_decoded), out_axes=()) loss_computation = ng.computation([eval_loss], "all") return train_computation, loss_computation
def clip_gradient_norm(grad_list, clip_norm=None): """ Returns a scaling factor to apply to the gradients. The scaling factor is computed such that the root mean squared average of the scaled gradients across all layers will be less than or equal to the provided clip_norm value. This factor is always <1, so never scales up the gradients. Arguments: param_list (list): List of layer parameters clip_norm (float, optional): Target norm for the gradients. If not provided the returned scale_factor will equal 1. Returns: Computed scale factor (float) """ if clip_norm is None: return 1 else: s = None for param in grad_list: term = ng.squared_L2(param, out_axes=None) if s is None: s = term else: s = s + term s = ng.sqrt(s) return clip_norm / ng.maximum(s, clip_norm)
def test_placeholder(transformer_factory): W = ng.make_axis(length=10) H = ng.make_axis(length=20) # Pass array through a placeholder aaxes = ng.make_axes([W, H]) ashape = aaxes.lengths asize = aaxes.size aval = np.arange(asize, dtype=np.float32).reshape(ashape) x = ng.placeholder(aaxes) d = 2 * x d2 = ng.squared_L2(x, out_axes=None) with ExecutorFactory() as ex: # Return placeholder, param is placeholder placeholder_fun = ex.executor(x, x) prod_fun = ex.executor([d, d2], x) cval = placeholder_fun(aval) ng.testing.assert_allclose(cval, aval) # Pass a different array though u = rng.uniform(-1.0, 1.0, aaxes) cval = placeholder_fun(u) ng.testing.assert_allclose(cval, u) cval, s = prod_fun(aval) ng.testing.assert_allclose(cval, aval * 2) ng.testing.assert_allclose(s[()], np.dot(aval.flatten(), aval.flatten())) cval, s = prod_fun(u) u2 = u * 2 ng.testing.assert_allclose(cval, u2) ng.testing.assert_allclose(s[()], np.dot(u.flatten(), u.flatten()))
def test_one_dot_bprop_allreduce(config): c = config pytest.xfail( "GPU child transformers generate errors during AssignLayouts graph pass #1651" ) H_axis = ng.make_axis(length=4, name='height') W_axis = ng.make_axis(length=6, name='width') with ng.metadata(step='input'): X = ng.placeholder(axes=[H_axis, W_axis]) target = ng.constant(1, axes=[W_axis]) with ng.metadata(device_id=c['device_id'], parallel=W_axis): W = ng.variable(axes=[H_axis], initial_value=UniformInit(1, 1)) dot = ng.dot(W, X) L = ng.squared_L2(target - dot, out_axes=()) grad = ng.deriv(L, W) grad.metadata['reduce_func'] = c['func'] update = (W - grad) with closing(ngt.make_transformer_factory('hetr')()) as hetr: out_comp = hetr.computation([update], X) result = out_comp(c['input']) np.testing.assert_array_equal(result, c['expected_result'])
def test_squared_L2(): H = ng.make_axis(2) W = ng.make_axis(3) N = ng.make_axis(5, name='N') axes = ng.make_axes([H, W, N]) a = ng.constant(np.ones(axes.lengths), axes=axes) with ExecutorFactory() as factory: l2_samples_fun = factory.executor(ng.squared_L2(a)) l2_samples_val = np.ones([N.length]) * H.length * W.length l2_all_fun = factory.executor(ng.squared_L2(a, out_axes=[])) l2_all_val = np.ones([]) * W.length * H.length * N.length l2_W_fun = factory.executor(ng.squared_L2(a, reduction_axes=[H, N])) l2_W_val = np.ones([W.length]) * H.length * N.length l2_samples_result = l2_samples_fun() l2_all_result = l2_all_fun() l2_W_result = l2_W_fun() ng.testing.assert_allclose(l2_samples_val, l2_samples_result) ng.testing.assert_allclose(l2_all_val, l2_all_result) ng.testing.assert_allclose(l2_W_val, l2_W_result)
def SquaredL2Distance(self, c2_op, inputs): """ Computes squared L2 distance between two inputs. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. """ x, y = inputs y = ng.cast_axes(y, x.axes) out_axes = y.axes.batch_axes() if y.axes.batch_axes() else y.axes[0] return 0.5 * ng.squared_L2(x - y, out_axes=out_axes)
def ngraph_l2_norm(np_array): """ TODO. Arguments: np_array: TODO Returns: TODO """ axes = () for i, l in enumerate(np_array.shape): axes += (ng.make_axis(name='axis%s' % i, length=l), ) np_tensor = ng.constant(np_array, axes) var = ng.variable(axes, initial_value=np_tensor) return executor(ng.sqrt(ng.squared_L2(var)))()
def clip_gradient_norm(grad_list, clip_norm, bsz): """ TODO. Arguments: grad_list: TODO clip_norm: TODO bsz: TODO Returns: """ s = None for param in grad_list: term = ng.squared_L2(param) if s is None: s = term else: s = s + term s = s / bsz return clip_norm / ng.max(s, clip_norm)
def __init__(self): self.ng_computation = lambda Y, T: ng.squared_L2(Y - T, out_axes=() ) / 2
def __init__(self, state_axes, action_size, batch_size, model, learning_rate=0.0001): """ for now, model must be a function which takes action_axes, and returns a neon container """ super(ModelWrapper, self).__init__() self.axes = Namespace() self.axes.state = make_axes(state_axes, name='state') self.axes.action = ng.make_axis(name='action', length=action_size) self.axes.n = ng.make_axis(name='N', length=batch_size) self.axes.n1 = ng.make_axis(name='N', length=1) # placeholders self.state = ng.placeholder(self.axes.state + [self.axes.n]) self.state_single = ng.placeholder(self.axes.state + [self.axes.n1]) self.target = ng.placeholder([self.axes.action, self.axes.n]) # these q functions have the same structure but different variables self.q_function = model(self.axes.action) self.q_function_target = model(self.axes.action) # construct inference computation with neon.Layer.inference_mode_on(): inference = self.q_function(self.state) inference_computation = ng.computation(inference, self.state) # construct inference target computation with neon.Layer.inference_mode_on(): inference_target = self.q_function_target(self.state) inference_target_computation = ng.computation(inference_target, self.state) # construct inference computation for evaluating a single observation with neon.Layer.inference_mode_on(): inference_single = self.q_function(self.state_single) inference_computation_single = ng.computation(inference_single, self.state_single) # update q function target weights with values from q function # assumes that the variables in each are in the same order update_computation = ng.computation( ng.doall([ ng.assign(target_variable, ng.cast_axes(variable, target_variable.axes)) for target_variable, variable in zip( self.q_function_target.variables.values(), self.q_function.variables.values()) ])) # construct training computation loss = ng.squared_L2(self.q_function(self.state) - self.target) optimizer = neon.RMSProp( learning_rate=learning_rate, gradient_clip_value=1, ) train_output = ng.sequential([ optimizer(loss), loss, ]) train_computation = ng.computation(train_output, self.state, self.target) # now bind computations we are interested in self.transformer = ng.transformers.make_transformer() self.inference_function = self.transformer.add_computation( inference_computation) self.inference_target_function = self.transformer.add_computation( inference_target_computation) self.inference_function_single = self.transformer.add_computation( inference_computation_single) self.train_function = self.transformer.add_computation( train_computation) self.update_function = self.transformer.add_computation( update_computation) # run a single update to ensure that both q functions have the same # initial weights self.update()
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005) # At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005) schedule = [num_iterations // 5, num_iterations // 2] learning_rate_policy = { 'name': 'schedule', 'schedule': schedule, 'gamma': 0.1, 'base_lr': 0.05 } optimizer = Adam(learning_rate=learning_rate_policy, iteration=inputs['iteration'], gradient_clip_value=1) # Define the loss function (squared L2 loss) fwd_prop = seq1(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) # Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Forward prop of test set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prob = seq1(inputs['X']) eval_loss = ng.squared_L2(inference_prob - inputs['y']) eval_outputs = dict(l2_loss=eval_loss) # Define computations
model = recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=hidden_sizes, return_sequence=False) # Optimizer if args.modeltype == "TCN": optimizer = Adam(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) else: optimizer = GradientDescentMomentum( learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (squared L2 loss) fwd_prop = model(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) with Layer.inference_mode_on(): preds = model(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=()) eval_computation = ng.computation([eval_loss], "all") predict_computation = ng.computation([preds], "all") # Cost calculation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") trainer = TimeseriesTrainer(optimizer, train_computation,
def cost(y, t): return ng.squared_L2(y - t) / 2
def mean_squared_error(targets, outputs, weights=1.0, scope=""): with name_scope(scope): # TODO: reduce mean over the action axis loss = ng.squared_L2(targets - outputs) weighted_loss = loss * weights return weighted_loss