def __init__(self, num_units, memory, memory_sequence_length=None, scale=False, probablity_fn=None, score_mask_value=None, name="LuongAttention"): if probablity_fn is None: probablity_fn = softmax wrapped_probability_fn = lambda score: probablity_fn(score) super(LuongAttention, self).__init__(query_layer=None, memory_layer=Dense(num_units, name="memory_layer", use_bias=False, trainable=False), memory=memory, probability_fn=wrapped_probability_fn, memory_sequence_length=memory_sequence_length, score_mask_value=score_mask_value, name=name) if (self.debug): print(self) self._num_units = num_units self._scale = scale self._name = name
def RNN(x, weights, biases): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell1 = LSTMCell(n_hidden,debug=True) cell2 = LSTMCell(n_hidden,debug=True) cell= MultiRNNCell([cell1, cell2]) result, state = dynamic_rnn(cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases)) return out_l(state[-1].h)
def RNN(x, weights, biases): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: bw_cell = LSTMCell(n_hidden) fw_cell = LSTMCell(n_hidden) result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) fw_result, bw_result = result h = np.concatenate((fw_result, bw_result), -1) pred = out_l(h[0][-1].reshape(1, vocab_size)) return pred
def decoding_layer(decoding_embed_inp, embeddings, encoding_op, encoding_st, v_size, fr_len, en_len, max_en_len, rnn_cell_size, word2int, dropout_prob, batch_size, n_layers): out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init)) logits_tr = training_decoding_layer(decoding_embed_inp, en_len, get_rnn_cell(rnn_cell_size, dr_prob,n_layers,debug), encoding_op, encoding_st, out_l, v_size, fr_len, max_en_len) return logits_tr
def __init__(self, cell, attention_mechanism, attention_layer_size=None, alignment_history=False, cell_input_fn=None, output_attention=True, initial_cell_state=None, name=None, attention_layer=None, debug=False): self.seqsize = 0 self.ec = ExecutionContext.getInstance() self.debug = debug if issubclass(type(cell), MultiRNNCell): """""" else: cell = MultiRNNCell([cell]) al = AttentionLayer(name="AttentionLayer", bi=False, fw_cell=self, bw_cell=None, prev=None) self.ec.current_layer(al) self.ec.register(self.ec.get_current_layer()) self._cell = cell self._attention_mechanism = attention_mechanism self._output_attention = output_attention if attention_layer_size is not None and attention_layer is not None: raise ValueError( "Only one of attention_layer_size and attention_layer " "should be set") if (attention_layer_size is not None): self._attention_layer = Dense(attention_layer_size, name="attention_layer", use_bias=False, trainable=False) self._attention_layer_size = attention_layer_size #state for Ds self.aht,self.attenzt,self.attentiont,self.alignmentst={},{},{},{}
def RNN(x, weights, biases): fw_cell_list = [] bw_cell_list = [] for i in range(n_layers): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: fw_cell_list.append(LSTMCell(n_hidden, debug=True)) bw_cell_list.append(LSTMCell(n_hidden, debug=True)) fw_cell = MultiRNNCell(fw_cell_list) bw_cell = MultiRNNCell(bw_cell_list) result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) fw_result, bw_result = result h = np.concatenate((fw_result, bw_result), -1) pred = out_l(h[0][-1].reshape(1, vocab_size)) print("pred:", pred) return pred
acc_total = 0 loss_total = 0 print("offset:", offset) # only for testing weights = np.ones([4 * n_hidden, vocab_size + n_hidden + 1]) * .1 c = np.ones((n_hidden, 1)) h = np.ones((n_hidden, 1)) #initstate=(c,h) initstate = LSTMStateTuple(c, h) with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell = LSTMCell(n_hidden, debug=True) gdo = BatchGradientDescent(learning_rate) out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) while step < training_iters: if offset > (len(train_data) - end_offset): offset = rnd.randint(0, n_input + 1) print("offset:", offset) symbols_in_keys = [ input_one_hot(dictionary[str(train_data[i])], vocab_size) for i in range(offset, offset + n_input) ] symbols_in_keys = np.reshape(np.array(symbols_in_keys), [-1, n_input, vocab_size]) target = dictionary[str(train_data[offset + n_input])] result, state = dynamic_rnn(cell, symbols_in_keys, initstate)
lr = args.learning_rate debug=args.debug per_epoch=args.per_epoch logs_path=args.out_dir display_steps=args.display_steps fr_embeddings_matrix,en_embeddings_matrix,fr_word2int,en_word2int,fr_filtered,en_filtered,args=get_nmt_data() set_modelparams(args) make_model() en_train = en_filtered[0:30000] fr_train = fr_filtered[0:30000] update_check = (len(fr_train) // batch_size // per_epoch) - 1 out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init)) for epoch_i in range(1, epochs + 1): update_loss = 0 batch_loss = 0 for batch_i, (en_batch, fr_batch, en_text_len, fr_text_len) in enumerate( get_batches(en_train, fr_train, batch_size)): before = time.time() encoding_optf, encoding_sttf ,logits_tr= seq2seq_model(fr_batch[:, ::-1], en_batch, dr_prob, fr_text_len, en_text_len, np.amax(en_text_len), len(en_word2int) + 1 , hidden_size, n_layers, en_word2int, batch_size); #print("batch:", batch_i, "decoding:logits:", logits_tr) yhat,loss=sequence_loss(logits_tr.rnn_output,en_batch,make_mask(en_batch)) print("loss:",loss) gradients=gdo.compute_gradients(yhat,en_batch)