def test_stack(transformer_factory): if transformer_factory.name == flex_gpu_transformer_name: pytest.skip("Allowed to fail until PR2") W = ng.make_axis(length=4) H = ng.make_axis(length=5) I = ng.make_axis(length=3) axes = ng.make_axes([W, H]) rng = RandomTensorGenerator(0, np.float32) a_v = [rng.uniform(0, 1, axes) for i in range(I.length)] for pos in range(len(axes) + 1): a = [ng.placeholder(axes, initial_value=_) for _ in a_v] s = ng.stack(a, I, pos) with ExecutorFactory() as ex: num_funs = [ex.numeric_derivative(s, _, delta) for _ in a] sym_funs = [ex.derivative(s, _) for _ in a] for n_fun, s_fun, a_i in zip(num_funs, sym_funs, a_v): d_n = n_fun(a_i) d_s = s_fun(a_i) ng.testing.allclose(d_n, d_s, rtol=rtol, atol=atol)
def test_stack(): W = ng.make_axis(length=4) H = ng.make_axis(length=5) I = ng.make_axis(length=3) axes = ng.make_axes([W, H]) rng = RandomTensorGenerator(0, np.float32) a_v = [rng.uniform(0, 1, axes) for i in range(I.length)] for pos in range(len(axes) + 1): a = [ng.placeholder(axes, initial_value=p) for p in a_v] s = ng.stack(a, I, pos) with ExecutorFactory() as ex: num_funs = [ ex.numeric_derivative(s, p, delta, *(np for np in a if np is not p)) for p in a ] sym_funs = [ ex.derivative(s, p, *(np for np in a if np is not p)) for p in a ] for n_fun, s_fun, a_i in zip(num_funs, sym_funs, a_v): na_is = list(na_i for na_i in a_v if na_i is not a_i) d_n = n_fun(a_i, *na_is) d_s = s_fun(a_i, *na_is) ng.testing.assert_allclose(d_n, d_s, rtol=rtol, atol=atol)
def test_stack(transformer_factory): ax = ng.make_name_scope(name="ax") ax.W = ng.make_axis(length=4) ax.H = ng.make_axis(length=5) ax.I = ng.make_axis(length=3) axes = ng.make_axes([ax.W, ax.H]) rng = RandomTensorGenerator(0, np.float32) a_v = [rng.uniform(0, 1, axes) for i in range(ax.I.length)] for pos in range(len(axes) + 1): a = [ng.placeholder(axes, initial_value=_) for _ in a_v] s = ng.stack(a, ax.I, pos) ex = ExecutorFactory() num_funs = [ex.numeric_derivative(s, _, delta) for _ in a] sym_funs = [ex.derivative(s, _) for _ in a] ex.transformer.initialize() for n_fun, s_fun, a_i in zip(num_funs, sym_funs, a_v): d_n = n_fun(a_i) d_s = s_fun(a_i) np.allclose(d_n, d_s, rtol=rtol, atol=atol)
def train_outputs(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (Tensor): object that provides initial state Returns: rnn_out (Tensor): output """ # try to understand the axes from the input self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state else: if self.reset_cells: self.h_init = ng.constant( const=0, axes=self.hidden_state_axes).named('h_init') else: self.h_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.W_input = ng.variable(axes=self.w_in_axes, initial_value=self.init).named("W_in") self.W_recur = ng.variable(axes=self.w_re_axes, initial_value=self.init_inner).named("W_re") self.b = ng.variable(axes=self.hidden_axes, initial_value=0).named("bias") h = self.h_init h_list = [] # slice the inputs into time slices in_s = get_steps(in_obj, self.recurrent_axis, self.backward) # unrolling computations for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): h = self._step(in_s[i], h) h_list.append(h) if self.return_sequence is True: # only when returning a sequence, need to reverse the output h_list = h_list[::-1] if self.backward else h_list rnn_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: rnn_out = h_list[-1] return rnn_out
def train_outputs(self, in_obj): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer Returns: (Tensor): output """ in_axes = in_obj.axes self.time_axis = in_axes.recurrent_axes()[0] def get_steps(x, time_axis): return [ ng.slice_along_axis(x, time_axis, i) for i in range(time_axis.length) ] if self.axes is not None: hidden_axes = self.axes - self.axes.recurrent_axes() else: hidden_axes = ng.make_axes( [ng.make_axis(self.nout).named('Hidden_in')]) w_in_axes = hidden_axes + [ axis - 1 for axis in in_axes.sample_axes() - in_axes.recurrent_axes() ] w_re_axes = hidden_axes + [axis - 1 for axis in hidden_axes] self.W_input = ng.variable(axes=w_in_axes, initial_value=self.init( w_in_axes.lengths)).named("W_in") self.W_recur = ng.variable(axes=w_re_axes, initial_value=self.init_inner( w_re_axes.lengths)).named("W_re") self.b = ng.variable(axes=hidden_axes, initial_value=0).named("bias") h_ff_buf = ng.dot(self.W_input, in_obj).named("W_in_dot_in") h_ff_s = get_steps(h_ff_buf, self.time_axis) self.h_init = ng.constant(np.zeros(h_ff_s[0].axes.lengths), axes=h_ff_s[0].axes).named('h_init') hprev = [self.h_init] for i in range(self.time_axis.length): with ng.metadata(recurrent_step=str(i)): d = ng.dot(self.W_recur, hprev[i]).named("W_rec_dot_h{}".format(i)) h = self.activation(d + h_ff_s[i] + self.b) h.name = "activ{}".format(i) hprev.append(h) rnn_out = ng.stack(hprev[1:], self.time_axis, pos=1) return rnn_out
def Combine(self, cntk_op, inputs): """ Returns combined outputs of inputs list. Arguments: inputs: List of inputs to this node. Returns: A ngraph Op. """ return ng.stack(inputs, ng.make_axis(len(inputs)))
def Combine(self, cntk_op, inputs): """ Returns combined outputs of inputs list. Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ return ng.stack(inputs, ng.make_axis(len(inputs))).named(cntk_op.uid)
def run_inference(self, out_axes, init_states, **kwargs): if self.celltype == 'LSTM': init_states = [(state, ng.constant(0., state.axes)) for state in init_states] one_time_axis = ng.make_axis(1, name="REC") time_axis = out_axes.recurrent_axis() batch_axis = out_axes.batch_axis() feature_axis = (out_axes - [time_axis, batch_axis])[0] outputs = [ng.constant(0., [batch_axis, one_time_axis, feature_axis])] hidden_states = init_states for timestep in range(time_axis.length): in_obj = outputs[-1] # Compute the next hidden/cell states for the recurrent layers next_hidden_states = [] for i, l in enumerate(self.layers[:-1]): if i < len(hidden_states): init_state = hidden_states[i] else: init_state = None if self.celltype == 'LSTM': h, c = l(in_obj, init_state=init_state, return_cell_state=True) in_obj = h h = ng.slice_along_axis(h, one_time_axis, 0) c = ng.slice_along_axis(c, one_time_axis, 0) next_hidden_states.append((h, c)) else: h = l(in_obj, init_state=init_state) in_obj = h h = ng.slice_along_axis(h, one_time_axis, 0) next_hidden_states.append((h, c)) hidden_states = next_hidden_states # Compute the output of the affine layer in_obj = self.layers[-1](in_obj) outputs.append(in_obj) # Get rid of the initial 0 input outputs = outputs[1:] outputs = [ ng.slice_along_axis(output, one_time_axis, 0) for output in outputs ] outputs = ng.stack(outputs, time_axis) outputs = ng.axes_with_order(outputs, out_axes) return outputs
ax.N.length = args.batch_size for i in range(ax.N.length): # for each iteration, permute (by true label) # encoder input embedding for teacher forcing input to decoder emb_enc_input = ng.slice_along_axis(emb_enc_inputs, axis=ax.N, idx=i) tmp_axis_1 = ng.make_axis(length=time_steps, name='tmp_axis_1') emb_enc_input_tmp = ng.cast_axes(emb_enc_input, ng.make_axes([hidden_feature_axis, tmp_axis_1])) perm = ng.slice_along_axis(inputs['tgt_txt'], axis=ax.N, idx=i) one_hot_target_tmp = ng.one_hot(perm, axis=tmp_axis_1) emb_dec_input.append(ng.dot(emb_enc_input_tmp, one_hot_target_tmp)) emb_dec_inputs = ng.stack(emb_dec_input, axis=ax.N, pos=1) enc_input = emb_enc_inputs dec_input = emb_dec_inputs else: enc_input = inputs['inp_txt'] dec_input = inputs['teacher_txt'] (enc_h_out, enc_c_out) = enc(enc_input, return_cell_state=True) # compute the last hidden/cell states as decoder's initial states rec_axis = enc_h_out.axes.recurrent_axis() enc_last_h_out = ng.slice_along_axis(enc_h_out, axis=rec_axis, idx=-1) enc_last_c_out = ng.slice_along_axis(enc_c_out, axis=rec_axis, idx=-1) dec_h_out = dec(dec_input, init_state=(enc_last_h_out, enc_last_c_out), return_cell_state=False)
def __call__(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len(init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.temporary(initial_value=0, axes=self.out_axes).named('c_init') else: self.h_init = ng.variable(initial_value=0, axes=self.out_axes).named('h_init') self.c_init = ng.variable(initial_value=0, axes=self.out_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = {k: ng.variable(axes=self.w_in_axes, initial_value=self.init, scope=self.scope). named("W_in_{}".format(k)) for k in self.metadata['gates']} self.W_recur = {k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner, scope=self.scope). named("W_re_{}".format(k)) for k in self.metadata['gates']} self.b = {k: ng.variable(axes=self.out_feature_axes, initial_value=0, scope=self.scope). named("bias_{}".format(k)) for k in self.metadata['gates']} h = self.h_init c = self.c_init h_list = [] c_list = [] # Compute feed forward weighted inputs # Batch norm is computed only on the weighted inputs # as in https://arxiv.org/abs/1510.01378 h_ff = dict() for k in self.metadata["gates"]: h_ff[k] = ng.dot(self.W_input[k], in_obj) if self.batch_norm is not None: h_ff[k] = self.batch_norm[k](h_ff[k]) # slice the weighted inputs into time slices h_ff = get_steps(h_ff, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(h_ff[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])
def unroll_with_attention(cell, num_steps, H_pr, H_hy, init_states=None, reset_cells=True, return_sequence=True, reverse_mode=False, input_data=None): """ Unroll the cell with attention for num_steps steps. Arguments: ---------- cell : provide the cell that has to be unrolled (Eg: MatchLSTMCell_withAttention) num_steps: the number of steps needed to unroll H_pr : the encoding for the question H_hy : the encoding for the passage init_states: Either None or a dictionary containing states reset_cell: argument which determine if cell has to be reset or not reverse_mode: Set to True if unrolling in the opposite direction is desired input_data: the ArrayIterator object for training data (contains information of length of each sentence) """ recurrent_axis = H_hy.axes.recurrent_axis() if init_states is not None: states = { k: ng.cast_role(v, out_axes) for (k, v) in init_states.items() } else: states = init_states stepped_inputs = get_steps(H_hy, recurrent_axis, backward=reverse_mode) stepped_outputs = [] for t in range(num_steps): with ng.metadata(step=str(t)): if t == 0: output, states = cell(H_pr, stepped_inputs[t], states, output=None, input_data=input_data) else: output, states = cell(H_pr, stepped_inputs[t], states, output=output, input_data=input_data) stepped_outputs.append(output) if reverse_mode: if return_sequence: stepped_outputs.reverse() if return_sequence: outputs = ng.stack(stepped_outputs, recurrent_axis, pos=1) else: outputs = stepped_outputs[-1] if not reset_cells: update_inits = ng.doall([ ng.assign(initial, states[name]) for (name, initial) in states.items() ]) outputs = ng.sequential([update_inits, outputs]) return outputs
# Total Loss train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val) # Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer') eval_frequency = 20 val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size']) train_error_frequency = 1000 # Create Transformer
def train_outputs(self, in_obj, init_state=None): """ Sets shape based parameters of this layer given an input tuple or int or input layer. Arguments: in_obj (int, tuple, Layer or Tensor): object that provides shape information for layer init_state (tuple of Tensor): object that provides initial state, and in LSTM, it includes hidden state, and cell states Returns: rnn_out (Tensor): output """ # try to understand the axes from the input if init_state is not None: assert len( init_state) == 2 and init_state[0].axes == init_state[1].axes self.interpret_axes(in_obj, init_state[0]) else: self.interpret_axes(in_obj, init_state) # initialize the hidden states if init_state is not None: self.h_init = init_state[0] self.c_init = init_state[1] else: if self.reset_cells: self.h_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.temporary( initial_value=0, axes=self.hidden_state_axes).named('c_init') else: self.h_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('h_init') self.c_init = ng.variable( initial_value=0, axes=self.hidden_state_axes).named('c_init') # params are dictionary for i, f, o, g self.W_input = { k: ng.variable(axes=self.w_in_axes, initial_value=self.init).named("W_in_{}".format(k)) for k in self.metadata['gates'] } self.W_recur = { k: ng.variable(axes=self.w_re_axes, initial_value=self.init_inner).named( "W_re_{}".format(k)) for k in self.metadata['gates'] } self.b = { k: ng.variable(axes=self.hidden_axes, initial_value=0).named("bias_{}".format(k)) for k in self.metadata['gates'] } h = self.h_init c = self.c_init h_list = [] c_list = [] # feedforward computation in_s = get_steps(in_obj, self.recurrent_axis, self.backward) # recurrent computation for i in range(self.recurrent_axis.length): with ng.metadata(recurrent_step=str(i)): [h, c] = self._step(in_s[i], [h, c]) h_list.append(h) c_list.append(c) if self.return_sequence is True: if self.backward: h_list = h_list[::-1] c_list = c_list[::-1] lstm_out = ng.stack(h_list, self.recurrent_axis, pos=self.recurrent_axis_idx) else: lstm_out = h_list[-1] if self.reset_cells is True: return lstm_out else: return ng.sequential([ ng.doall([ ng.assign(self.h_init, h_list[-1]), ng.assign(self.c_init, c_list[-1]) ]), lstm_out ])
batch_axis = ng.make_axis(length=batch_size, name="N") time_axis = ng.make_axis(length=seq_len, name="REC") feature_axis = ng.make_axis(length=feature_dim, name="F") out_axis = ng.make_axis(length=1, name="Fo") in_axes = ng.make_axes([batch_axis, time_axis, feature_axis]) rul_axes = ng.make_axes([batch_axis, out_axis]) # Build placeholders for the created axes inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(rul_axes)) Xs = get_steps(inputs['X'], time_axis) if args.backward: target_steps = Xs[::-1] target = ng.stack(target_steps, time_axis) else: target_steps = Xs target = inputs['X'] previous_steps = [ng.constant(0., [batch_axis, feature_axis])] + [target_steps[i] for i in range(seq_len - 1)] previous = ng.stack(previous_steps, time_axis) # define model encoder_recurrent_units = list(map(int, args.n_hidden.split(","))) if args.bottleneck: decoder_recurrent_units = encoder_recurrent_units[::-1] else: decoder_recurrent_units = encoder_recurrent_units encoder = recurrent_model.RecurrentEncoder(celltype=args.modeltype, recurrent_units=encoder_recurrent_units,
train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) # Predicted class is the max probability out of the 2=3 # Required Outputs- Batch Cost, Train Probability,misclass train train_outputs = dict(batch_cost=batch_cost, inps=inputs['answer'], logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=dropout_val) # Inference Mode for validation dataset: with Layer.inference_mode_on(): eval_outputs = dict(logits=ng.stack(logits_concat, span, 1), labels=inputs['answer'], drop=drop_pointer) # Now bind the computations we are interested in print('generating transformer') eval_frequency = 20 val_frequency = np.ceil(len(train['para']['data']) / params_dict['batch_size']) train_error_frequency = 1000