def __call__(self, cost_func, variables=None, subgraph=None, warning=False): """ Arguments: cost_func (Op): The cost function to optimize variables (list of variables): List of variables to optimize subgraph (SubGraph): A subgraph instance containing all variables to optimize warning (bool): If True displays warning message if any variables specified do not participate in batch cost computation .. Note:: If subgraph is provided, the variables to optimize will be taken from it. Otherwise, they can be provided explicitly by passing a list as `variables`. If neither `subgraph` nor `variables` is provided, the variables to optimize will be all trainable variables on which `cost` depends. """ all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) if cost_func.axes.batch_axis() is None: batch_size = 1 else: batch_size = cost_func.axes.batch_axis().length # determine variables to optimize if subgraph is not None: if variables is not None: raise ValueError( "variables and subgraph cannot both be specified.") variables = list(subgraph.variables.values()) if variables is None: variables = batch_cost.variables() elif variables is not None and warning is True: all_variables = batch_cost.variables() selected_variables = all_variables & set(variables) if len(selected_variables) < len(variables): logger.warn( "not all selected variables participate in cost computation" ) # gradients grads = [ng.deriv(batch_cost, v) / batch_size for v in variables] scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm) # updates for variable, grad in zip(variables, grads): updates = self.variable_update(variable, grad, scale_factor) all_updates.append(updates) updates = ng.doall(all_updates) grads = ng.doall(grads) clips = ng.doall([ ng.assign(variable, clip_weight_value(variable, self.weight_clip_value)) for variable in variables ]) return ng.sequential([grads, updates, clips, 0])
def get_restore_op(self): """ Get variable restoring ngraph op from TF model checkpoint Returns: A `ng.doall` op that restores the stored weights in TF model checkpoint """ if self._graph is None: raise ValueError("self._graph is None, import meta_graph first.") if self._checkpoint_path is None: raise ValueError("self._checkpoint_path is None, please specify" "checkpoint_path while importing meta_graph.") with self._graph.as_default(): tf_variables = tf.all_variables() ng_variables = self.get_op_handle(tf_variables) ng_restore_ops = [] with tf.Session() as sess: self.saver.restore(sess, self._checkpoint_path) for tf_variable, ng_variable in zip(tf_variables, ng_variables): val = sess.run(tf_variable) with ng.Op.saved_user_deps(): restore_op = ng.assign(ng_variable, val) ng_restore_ops.append(restore_op) with ng.Op.saved_user_deps(): ng_restore_ops = ng.doall(ng_restore_ops) return ng_restore_ops
def __call__(self, cost_func): with ng.Op.saved_user_deps(): velocity_updates, param_updates = [], [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables()] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) for variable, grad in zip(batch_cost.variables(), grads): grad = clip_gradient_value(grad, self.gradient_clip_value) velocity = ng.persistent_tensor(axes=variable.axes, initial_value=0.).named(variable.name + '_vel') velocity_updates.append( ng.assign(velocity, velocity * self.momentum_coef - self.learning_rate * ( scale_factor * grad + self.wdecay * variable))) param_updates.append(ng.assign(variable, variable + velocity)) lr_update = [ng.assign(self.learning_rate, self.schedule.get_learning_rate(self.learning_rate, self.iteration_index))] updates = ng.doall(velocity_updates + param_updates + lr_update) self.iteration_index += 1 return updates
def __call__(self, cost_func): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) all_updates.append( ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign( variable, variable - ((scale_factor * grad * self.lrate) / (ng.sqrt(state + epsilon) + epsilon))) ])) return ng.doall(all_updates)
def __call__(self, cost_func): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) for variable, grad in zip(batch_cost.variables(), grads): updates = [] velocity = ng.persistent_tensor( axes=variable.axes, initial_value=0.).named(variable.name + '_vel') clip_grad = clip_gradient_value(grad, self.gradient_clip_value) lr = -self.lrate * (scale_factor * clip_grad + self.wdecay * variable) updates.append( ng.assign(velocity, velocity * self.momentum_coef + lr)) if self.nesterov: delta = (self.momentum_coef * velocity + lr) else: delta = velocity updates.append(ng.assign(variable, variable + delta)) all_updates.append(ng.sequential(updates)) return ng.doall(all_updates)
def __call__(self, cost_func, variable_scope=None): all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axis().length selected_variables = batch_cost.variables() if variable_scope is not None: selected_variables = [op for op in selected_variables if op.scope == variable_scope] grads = [ng.deriv(batch_cost, v) / batch_size for v in selected_variables] scale_factor = clip_gradient_norm(grads, batch_size, self.gradient_clip_norm) for variable, grad in zip(selected_variables, grads): updates = self.variable_update(variable, grad, scale_factor) all_updates.append(updates) updates = ng.doall(all_updates) grads = ng.doall(grads) return ng.sequential([grads, updates, 0])
def NoOp(self, tf_node, inputs): """ Does nothing. Only useful to implement doall by applying dependencies. Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. """ if tf_node.name == "init": # TODO remove hardcoded name by passing in names for op return ng.doall(all=inputs) else: raise NotImplementedError
def __call__(self, cost_func): with ng.Op.saved_user_deps(): state_updates, param_updates = [], [] batch_cost = ng.sum(cost_func, out_axes=()) batch_size = cost_func.axes.batch_axes()[0].length grads = [ ng.deriv(batch_cost, v) / batch_size for v in batch_cost.variables() ] scale_factor = clip_gradient_norm( grads) if self.gradient_clip_norm else 1 epsilon, decay = (self.epsilon, self.decay_rate) for i, (variable, grad) in enumerate(zip(batch_cost.variables(), grads)): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=0.) state_updates.append( ng.assign(lvalue=state, rvalue=decay * state + (1.0 - decay) * ng.square(grad)).named( 'state_u_%s' % i)) param_updates.append( ng.assign( lvalue=variable, rvalue=variable - ((scale_factor * grad * self.learning_rate) / (ng.sqrt(state + epsilon) + epsilon)), ).named('var_u_%s' % i)) lr_update = [ ng.assign( self.learning_rate, self.schedule.get_learning_rate(self.learning_rate, self.iteration_index)) ] updates = ng.doall(state_updates + param_updates + lr_update) self.iteration_index += 1 return updates
def minimize(self, cost, variables): """ Minimize cost by returning update Ops. Arguments: cost: The cost Op to be minimized variables: TODO Returns: A doall op containing setitems to variable ops. """ assert cost is not None assert variables is not None return ng.doall([ng.assign(variable, variable - self.compute_lr_op * ng.deriv(cost, variable)) for variable in variables])
def minimize(self, cost): """ Minimize cost by returning update Ops. Arguments: cost: The cost Op to be minimized Returns: A doall op containing setitems to variable ops. """ variables = list(cost.variables()) grads = [ng.deriv(cost, variable) for variable in variables] with ng.Op.saved_user_deps(): param_updates = [ ng.assign(variable, variable - self.lrate * grad) for variable, grad in zip(variables, grads) ] updates = ng.doall(param_updates) return updates
def __call__(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len(init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.temporary(initial_value=0, axes=self.out_axes).named('c_init') else: self.h_init = ng.variable(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.variable(initial_value=0, axes=self.out_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = {k: ng.variable(axes=self.w_in_axes, initial_value=self.init, scope=self.scope). named("W_in_{}".format(k)) for k in self.metadata['gates']} self.W_recur = {k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner, scope=self.scope). named("W_re_{}".format(k)) for k in self.metadata['gates']} self.b = {k: ng.variable(axes=self.out_feature_axes, initial_value=0, scope=self.scope). named("bias_{}".format(k)) for k in self.metadata['gates']} h = self.h_init c = self.c_init h_list = [] c_list = [] # Compute feed forward weighted inputs # Batch norm is computed only on the weighted inputs # as in https://arxiv.org/abs/1510.01378 h_ff = dict() for k in self.metadata["gates"]: h_ff[k] = ng.dot(self.W_input[k], in_obj) if self.batch_norm is not None: h_ff[k] = self.batch_norm[k](h_ff[k]) # slice the weighted inputs into time slices h_ff = get_steps(h_ff, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(h_ff[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])
def unroll_with_attention(cell, num_steps, H_pr, H_hy, init_states=None, reset_cells=True, return_sequence=True, reverse_mode=False, input_data=None): """ Unroll the cell with attention for num_steps steps. Arguments: ---------- cell : provide the cell that has to be unrolled (Eg: MatchLSTMCell_withAttention) num_steps: the number of steps needed to unroll H_pr : the encoding for the question H_hy : the encoding for the passage init_states: Either None or a dictionary containing states reset_cell: argument which determine if cell has to be reset or not reverse_mode: Set to True if unrolling in the opposite direction is desired input_data: the ArrayIterator object for training data (contains information of length of each sentence) """ recurrent_axis = H_hy.axes.recurrent_axis() if init_states is not None: states = { k: ng.cast_role(v, out_axes) for (k, v) in init_states.items() } else: states = init_states stepped_inputs = get_steps(H_hy, recurrent_axis, backward=reverse_mode) stepped_outputs = [] for t in range(num_steps): with ng.metadata(step=str(t)): if t == 0: output, states = cell(H_pr, stepped_inputs[t], states, output=None, input_data=input_data) else: output, states = cell(H_pr, stepped_inputs[t], states, output=output, input_data=input_data) stepped_outputs.append(output) if reverse_mode: if return_sequence: stepped_outputs.reverse() if return_sequence: outputs = ng.stack(stepped_outputs, recurrent_axis, pos=1) else: outputs = stepped_outputs[-1] if not reset_cells: update_inits = ng.doall([ ng.assign(initial, states[name]) for (name, initial) in states.items() ]) outputs = ng.sequential([update_inits, outputs]) return outputs
def __init__(self, state_axes, action_size, batch_size, model, learning_rate=0.0001): """ for now, model must be a function which takes action_axes, and returns a neon container """ super(ModelWrapper, self).__init__() self.axes = Namespace() self.axes.state = make_axes(state_axes, name='state') self.axes.action = ng.make_axis(name='action', length=action_size) self.axes.n = ng.make_axis(name='N', length=batch_size) self.axes.n1 = ng.make_axis(name='N', length=1) # placeholders self.state = ng.placeholder(self.axes.state + [self.axes.n]) self.state_single = ng.placeholder(self.axes.state + [self.axes.n1]) self.target = ng.placeholder([self.axes.action, self.axes.n]) # these q functions have the same structure but different variables self.q_function = model(self.axes.action) self.q_function_target = model(self.axes.action) # construct inference computation with neon.Layer.inference_mode_on(): inference = self.q_function(self.state) inference_computation = ng.computation(inference, self.state) # construct inference target computation with neon.Layer.inference_mode_on(): inference_target = self.q_function_target(self.state) inference_target_computation = ng.computation(inference_target, self.state) # construct inference computation for evaluating a single observation with neon.Layer.inference_mode_on(): inference_single = self.q_function(self.state_single) inference_computation_single = ng.computation(inference_single, self.state_single) # update q function target weights with values from q function # assumes that the variables in each are in the same order update_computation = ng.computation( ng.doall([ ng.assign(target_variable, ng.cast_axes(variable, target_variable.axes)) for target_variable, variable in zip( self.q_function_target.variables.values(), self.q_function.variables.values()) ])) # construct training computation loss = ng.squared_L2(self.q_function(self.state) - self.target) optimizer = neon.RMSProp( learning_rate=learning_rate, gradient_clip_value=1, ) train_output = ng.sequential([ optimizer(loss), loss, ]) train_computation = ng.computation(train_output, self.state, self.target) # now bind computations we are interested in self.transformer = ng.transformers.make_transformer() self.inference_function = self.transformer.add_computation( inference_computation) self.inference_target_function = self.transformer.add_computation( inference_target_computation) self.inference_function_single = self.transformer.add_computation( inference_computation_single) self.train_function = self.transformer.add_computation( train_computation) self.update_function = self.transformer.add_computation( update_computation) # run a single update to ensure that both q functions have the same # initial weights self.update()
def train_outputs(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len( init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('c_init') else: self.h_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = { k: ng.variable(axes=self.w_in_axes, initial_value=self.init).named("W_in_{}".format(k)) for k in self.metadata['gates'] } self.W_recur = { k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner).named( "W_re_{}".format(k)) for k in self.metadata['gates'] } self.b = { k: ng.variable(axes=self.hidden_axes, initial_value=0).named("bias_{}".format(k)) for k in self.metadata['gates'] } h = self.h_init c = self.c_init h_list = [] c_list = [] # feedforward computation in_s = get_steps(in_obj, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(in_s[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])