def __init__(self, num_units, gate_mod=None, ngram=False, no_feedback=False, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, layer_norm=False, activation=None, reuse=None, name=None, dtype=None, **kwargs): super(LSTMCell_mod, self).__init__(_reuse=reuse, name=name, dtype=dtype, **kwargs) print("LSTM cell mode: {0}".format(gate_mod)) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._gate_mod = gate_mod self._ngram = ngram self._no_feedback = no_feedback self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializers.get(initializer) self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._layer_norm = layer_norm if activation: self._activation = activations.get(activation) else: self._activation = math_ops.tanh if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None): super(ExtHighWayLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: tf.logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if num_unit_shards is not None or num_proj_shards is not None: tf.logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units self._linear1 = None self._linear2 = None if self._use_peepholes: self._w_f_diag = None self._w_i_diag = None self._w_o_diag = None
def __call__(self, inputs, state, scope=None): """Long short-term memory cell(LSTM)""" with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # parameters of gates are concated into one multiply for efficiency if self._state_is_tuple: # 一般都走这个分支,取出c_t和h_t c, h = state else: c, h = array_ops.split(value=concat, num_or_size_splits=4, axis=1) # 参考了《Recurrent Neural Network Regularization》,一次计算四个gate concat = _linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c* sigmoid(f+self._forget_bias)+ sigmoid(i)* self._activation(j)) new_h = self._activation(new_c)*sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) # 注意这里返回的输出是h_t,而state是(c,h) return new_h, new_state
def call(self, ginputs, state): """ Run one step of LSTM. """ sigmoid = tf.sigmoid grad = tf.slice(ginputs, [0, 0], [-1, self._split_lo]) inputs = tf.slice(ginputs, [0, self._split_lo], [-1, -1]) (c_prev, m_prev) = state # input_size = inputs.get_shape().with_rank(2)[1] # if input_size.value is None: # raise ValueError("Could not infer input size from inputs.get_shape()[-1]") scope = tf.get_variable_scope() with tf.variable_scope(scope, initializer=self._initializer) as unit_scope: # i = input_gate, j = new_input, f = forget_gate, o = output_gate inputs_norm = batch_normalization(inputs, name_scope="lstm_inputs") m_prev_norm = batch_normalization(m_prev, name_scope="lstm_hidden") # lstm_matrix = _linear([inputs_norm, m_prev_norm], 4 * self._num_units, bias=True) lstm_matrix = math_ops.matmul(tf.concat( [inputs, m_prev], 1), self._kernel) # inputs_norm, m_prev_norm lstm_matrix = nn_ops.bias_add(lstm_matrix, self._bias) i, j, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1) c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) m = sigmoid(o) * self._activation(c) new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else tf.concat([c, m], 1)) output = tf.concat([grad, m], 1) return output, new_state
def __init__(self, num_units, num_var=1, split=1, varepsilon=1e-24, use_peepholes=False, cell_clip=None, initializer=None, forget_bias=1.0, state_is_tuple=True, activation=None): super(AdamLSTMCell, self).__init__(num_units, use_peepholes=use_peepholes, cell_clip=cell_clip, initializer=initializer, forget_bias=forget_bias, state_is_tuple=state_is_tuple, activation=activation) self.rank = num_var self._split_lo = split # 1 + self.rank : momentum + variance self._state_size = LSTMStateTuple(num_units * (1 + self.rank), num_units * (1 + self.rank)) self.eps = varepsilon
def _default_dropout_state_filter_visitor(substate): if isinstance(substate, LSTMStateTuple): # Do not perform dropout on the memory state. return LSTMStateTuple(c=False, h=True) elif isinstance(substate, tensor_array_ops.TensorArray): return False return True
def call(self, inputs, state): sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) # get context from encoder outputs context = self._simple_attention(self._encoder_vector, self._encoder_proj, h) if self._linear is None: self._linear = _Linear([inputs, context, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=self._linear([inputs, context, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def __call__(self, inputs, states): """this method is inheritated, and always calculate layer by layer""" sigmoid = tf.sigmoid if self._state_is_tuple: hs = () for state in states: c, h = state # c and h: tensor_size = (batch_size, hidden_size) hs += ( h, ) # hs : size = time_lag, i.e. time_lag * (batch_size, hidden_size) else: hs = () for state in states: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) hs += (h, ) meta_variable_size = 4 * self.output_size concat = BinaryMera_wavefn(inputs, hs, meta_variable_size, self._num_orders, self._virtual_dim, True) i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): char_inputs = inputs[0] state_inputs = inputs[1] check_state_0 = tf.reduce_sum(state_inputs, axis=-1) check_state_1 = tf.reduce_sum(check_state_0, axis=-1) state_inputs_indices_for_lexicon = tf.where( tf.not_equal(check_state_0, 0)) state_inputs_indices_for_not_lexicon = tf.squeeze( tf.where(tf.equal(check_state_1, 0))) state_inputs_indices_for_not_lexicon = tf.cond( pred=tf.equal(tf.rank(state_inputs_indices_for_not_lexicon), 0), true_fn=lambda: tf.expand_dims( state_inputs_indices_for_not_lexicon, axis=0), false_fn=lambda: state_inputs_indices_for_not_lexicon) char_inputs_indices_for_lexicon = tf.where( tf.not_equal(tf.reduce_sum(check_state_0, axis=-1), 0)) char_inputs_indices_for_not_lexicon = tf.where( tf.equal(tf.reduce_sum(check_state_0, axis=-1), 0)) if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) gate_inputs = tf.matmul(tf.concat([char_inputs, h], 1), self._kernel) gate_inputs = tf.nn.bias_add(gate_inputs, self._bias) i, j, f, o = tf.split(value=gate_inputs, num_or_size_splits=4, axis=1) new_c_without_lexicon = self._new_c_without_lexicon( i=i, f=f, j=j, c=c, indices_tensor=state_inputs_indices_for_not_lexicon) new_c = tf.scatter_nd_update( self._char_state_tensor, indices=char_inputs_indices_for_not_lexicon, updates=new_c_without_lexicon) new_c = tf.cond(tf.not_equal( tf.shape(state_inputs_indices_for_not_lexicon)[-1], tf.shape(state_inputs)[0]), true_fn=lambda: self._if_not_empty_lexicon_state( i, j, char_inputs, state_inputs, char_inputs_indices_for_lexicon, state_inputs_indices_for_lexicon, new_c), false_fn=lambda: new_c) new_h = tf.multiply(self._activation(new_c), tf.nn.sigmoid(o)) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state
def call(self, inputs, state): sigmoid = math_ops.sigmoid() tanh = math_ops.tanh() if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) delt_t = float(array_ops.slice(inputs, 0, 1)) #时间差, 暂时转为浮点型 text = array_ops.slice(inputs, 1, 128) #text向量 concat_time = _linear([text, h], 3 * self.num_units, bias=True) # 时间衰减部分 concat_text = _linear([text, h], 3 * self.num_units, bias=True) # 文本部分 output = _linear([text, h], self.num_units, bias=True) i0, j0, f0 = array_ops.split(value=concat_time, num_or_size_splits=3, axis=1) # 时间衰减部分 i1, j1, f1 = array_ops.split(value=concat_text, num_or_size_splits=3, axis=1) # 文本部分 new_c = c * math_ops.exp( -1 * delt_t) * sigmoid(f0 + self._forget_bias) + ( 1 - math_ops.exp(-1 * delt_t)) * sigmoid(i0) * tanh(j0) new_c = new_c * sigmoid(f1 + self._forget_bias) + sigmoid(i1) * tanh(j1) new_h = tanh(new_c) * sigmoid(output) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: one = constant_op.constant(1, dtype=dtypes.int32) c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one) # tf.split(state, 2, axis=1 concat = self.stochastic_linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(concat, 4, axis=1) if self._layer_norm: i = self._norm(i, "input", dtype=inputs.dtype) j = self._norm(j, "transform", dtype=inputs.dtype) f = self._norm(f, "forget", dtype=inputs.dtype) o = self._norm(o, "output", dtype=inputs.dtype) new_c = (c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.nn.tanh(j)) new_h = tf.nn.tanh(new_c) * tf.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], axis=1) return new_h,
def call(self, inputs, state): """Long short-term memory cell (LSTM).""" sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) concat = self._line_sep([inputs, h], 4 * self._num_units, bias=False) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) bn_new_c = self.layer_norm(new_c, scope='c') new_h = self._activation(bn_new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def __call__(self, inputs, states): """Now we have multiple states, state->states""" sigmoid = tf.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: hs = () for state in states: # every state is a tuple of (c,h) c, h = state hs += (h, ) else: hs = () for state in states: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) hs += (h, ) output_size = 4 * self._num_units concat = tensor_network_linear(inputs, hs, output_size, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def get_cost_l(encoder_embed_input, decoder_embed_input, l_y, decoder_y, target_sequence_length, max_target_sequence_length, reuse=False): encode_outputs, encode_states, z_mean, z_stddev, new_states = encoder( encoder_embed_input, l_y, keep_prob, reuse) samples = tf.random_normal(tf.shape(z_stddev)) z = z_mean + tf.exp(z_stddev * 0.5) * samples h_state = tf.nn.softplus(tf.matmul(z, weights_de['w_']) + biases_de['b_']) #c_state = tf.nn.softplus(tf.matmul(z, weights_de['w_2']) + biases_de['b_2']) decoder_initial_state = LSTMStateTuple(h_state, encode_states[1]) decoder_output, predicting_logits, training_logits, masks, target = decoder( decoder_embed_input, decoder_y, target_sequence_length, max_target_sequence_length, decoder_initial_state, keep_prob, reuse) #KL term------------- latent_loss = 0.5 * tf.reduce_sum( tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1) latent_cost = tf.reduce_mean(latent_loss) encropy_loss = tf.contrib.seq2seq.sequence_loss(training_logits, target, masks) #/batch_size cost = tf.reduce_mean(encropy_loss + latentscale_iter * (latent_loss)) return cost, encropy_loss, latent_cost, training_logits
def build_encoder_bi(self, encoder_rnn_layer_size, encoder_num_units, encoder_cell_type="LSTM"): encoder_cell_type = encoder_cell_type.lower() #定义encoder的rnn_layer with tf.name_scope("encoder"): fw_rnn_layer = self.get_rnn_layer(encoder_rnn_layer_size, encoder_num_units, encoder_cell_type) bw_rnn_layer = self.get_rnn_layer(encoder_rnn_layer_size, encoder_num_units, encoder_cell_type) #双向rnn展开 ''' bi_state的结构:(fw_state,bw_state) fw_state=((c,h),(c,h)...) bw_state = ((c,h),(c,h)...) ''' bi_outputs, bi_state = tf.nn.bidirectional_dynamic_rnn( fw_rnn_layer, bw_rnn_layer, self.embedded_src_batch, sequence_length=self.src_batch_seq_len, time_major=False, dtype=tf.float32) ''' 将前向rnn和后向rnn的output的最后一个维度连接起来,比如 fw:[128,10,100],bw:[128,10,100],那么连接后为[128,10,200] 这样导致一个问题就是encoder rnn的输出和decoder rnn的输入对应不上了,encoder因为拼接了fw和bw变成了200, 有 2个解决办法,将encoder rnn的num_units变成decoder的一半或者反过来将decoder rnn的num_units增大一倍 ''' encoder_outpus = tf.concat(bi_outputs, -1) ''' bi_state同样有fw和bw,怎样拼接在一起呢? (1)参照output的拼接直接,拼接c和h的最后一个维度:这种方法问题在于tuple必须是特殊类型的tuple,比如LSTMStateTuple (2)直接将fw和bw的结果堆叠在一起这样cell的个数相当于翻了一倍,需要调整encoder或者decoder的cell个数 另外使用不同的cell也有区别的,LSTM有c和h,而GRU只有一个值。 ''' fw_encoder_state = bi_state[0] bw_encoder_state = bi_state[1] encoder_states = [] if encoder_cell_type == "lstm" or encoder_cell_type == "basiclstm": #i循环cell的个数 for i in range(encoder_rnn_layer_size): #连接当前cell fw和bw的c,h c = tf.concat( [fw_encoder_state[i][0], bw_encoder_state[i][0]], -1) h = tf.concat( [fw_encoder_state[i][1], bw_encoder_state[i][1]], -1) encoder_states.append(LSTMStateTuple(c, h)) else: #GRU #state中每个cell只有一个值 for i in range(encoder_rnn_layer_size): state = tf.concat( [fw_encoder_state[i], bw_encoder_state[i]], -1) encoder_states.append(state) encoder_states = tuple(encoder_states) print("bidirectional encoder-encoder_outputs:", encoder_outpus) print("bidirectional encoder-encoder_states:", encoder_states) return encoder_outpus, encoder_states
def call(self, inputs, state): sigmoid = math_ops.sigmoid c, h = state gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1), self._kernel) gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) master_f_gate = self.cummax(gate_inputs[:, :self._levels]) master_f_gate = array_ops.expand_dims(master_f_gate, -1) master_i_gate = self.cummax(gate_inputs[:, self._levels:self._levels * 2], reversed=True) master_i_gate = array_ops.expand_dims(master_i_gate, -1) f, i, o, j = array_ops.split(value=gate_inputs[:, self._levels * 2:], num_or_size_splits=4, axis=None) c_last = array_ops.reshape(c, [-1, self.levels, self.chunk_size]) overlap = master_f_gate * master_i_gate c_out = overlap * (sigmoid(f) * c_last + sigmoid(i) * c) + \ (master_f_gate - overlap) * c_last + \ (master_i_gate - overlap) * self._activation(j) h_out = sigmoid(o) * self._activation(c_out) new_c = array_ops.reshape(c_out, [-1, self._num_units]) new_h = array_ops.reshape(h_out, [-1, self._num_units]) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size, self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size, 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ B = self._block_size # print('state_size') # print(state.get_shape().as_list()) sigmoid = math_ops.sigmoid one = constant_op.constant(1, dtype=dtypes.int32) # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one) #gate_inputs = math_ops.matmul( # array_ops.concat([inputs, h], 1), self._kernel) gate_inputs = BH_dense(inputs, 4 * self._num_units, B, self.transform, kernel_weights=self._kernel) # gate_inputs = BH_matmul( # array_ops.concat([inputs, h], 1), self._kernel, B, "Fourier") gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=one) forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype) # Note that using `add` and `multiply` instead of `+` and `*` gives a # performance improvement. So using those at the cost of readability. add = math_ops.add multiply = math_ops.multiply #multiply = Circ_matmul() new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), self._activation(j))) new_h = multiply(self._activation(new_c), sigmoid(o)) new_h = bit_utils.round_bit(new_h, self._f_bit) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat([new_c, new_h], 1) return new_h, new_state
def _default_dropout_state_filter_visitor(substate): from tensorflow.python.ops.rnn_cell_impl import LSTMStateTuple # pylint: disable=g-import-not-at-top if isinstance(substate, LSTMStateTuple): # Do not perform dropout on the memory state. return LSTMStateTuple(c=False, h=True) elif isinstance(substate, tensor_array_ops.TensorArray): return False return True
def get_cost_l(encoder_embed_input, decoder_embed_input, l_y, decoder_y, target_sequence_length, max_target_sequence_length, reuse=False): encode_outputs, encode_states, z_mean, z_stddev, new_states = encoder( encoder_embed_input, l_y, keep_prob, reuse) samples = tf.random_normal(tf.shape(z_stddev)) z = z_mean + tf.exp(z_stddev * 0.5) * samples h_state = tf.nn.softplus(tf.matmul(z, weights_de['w_']) + biases_de['b_']) #c_state = tf.nn.softplus(tf.matmul(z, weights_de['w_2']) + biases_de['b_2']) decoder_initial_state = LSTMStateTuple(h_state, encode_states[1]) decoder_output, predicting_logits, training_logits, masks, target = decoder( decoder_embed_input, decoder_y, target_sequence_length, max_target_sequence_length, decoder_initial_state, keep_prob, reuse) #KL term------------- latent_loss = 0.5 * tf.reduce_sum( tf.exp(z_stddev) - 1. - z_stddev + tf.square(z_mean), 1) latent_cost = tf.reduce_mean(latent_loss) #laten_ = latentscale_iter * tf.reduce_mean(latent_loss) #encropy_loss = tf.contrib.seq2seq.sequence_loss(training_logits, target, masks) decoder_input = tf.nn.embedding_lookup(dic_embeddings, decoder_embed_input) s_loss = tf.square(training_logits - decoder_input) mask_loss = tf.reduce_sum(tf.transpose(s_loss, [2, 0, 1]), 0) encropy_loss = tf.reduce_mean(tf.multiply(mask_loss, masks), 1) #print encropy_loss #print latent_loss #decoder_input=tf.nn.embedding_lookup(dic_embeddings, decoder_embed_input) #s_loss=tf.square(training_logits-decoder_input) #print s_loss #s_loss=-training_logits*tf.log(decoder_input) #print vae #mask_loss=tf.reduce_mean(tf.transpose(vae, [2, 0, 1]),0) #print mask_loss #encropy_loss = tf.reduce_sum(tf.multiply(mask_loss, masks)) #print encropy_loss #encropy_loss=-tf.reduce_mean(tf.reduce_sum(training_logits*tf.log(decoder_input),reduction_indices=[1])) #print encropy_loss #print decoder_input #print training_logits #print masks #encropy_loss=tf.nn.cross_entropy_with_logits(decoder_input,training_logits,masks) #print encropy_loss #mask_loss=tf.reduce_sum(tf.transpose(s_loss, [2, 0, 1]),0) #encropy_loss=tf.reduce_mean(tf.multiply(mask_loss,masks),1) #print 'encropy_loss',encropy_loss #print 'latent_loss',latent_loss #cost = encropy_loss + #print latent_loss #print encropy_loss cost = tf.add(encropy_loss, (latentscale_iter * (latent_loss))) #print 'cost',cost return cost, encropy_loss, latent_cost, training_logits
def build_graph(self): """ builds the computational graph that performs a step-by-step evaluation of the input data batches """ self.unpacked_input_data = utility.unpack_into_tensorarray( self.input_data, 1, self.sequence_length) outputs = tf.TensorArray(tf.float32, self.sequence_length) free_gates = tf.TensorArray(tf.float32, self.sequence_length) allocation_gates = tf.TensorArray(tf.float32, self.sequence_length) write_gates = tf.TensorArray(tf.float32, self.sequence_length) read_weightings = tf.TensorArray(tf.float32, self.sequence_length) write_weightings = tf.TensorArray(tf.float32, self.sequence_length) usage_vectors = tf.TensorArray(tf.float32, self.sequence_length) controller_state = self.controller.get_state( ) if self.controller.has_recurrent_nn else (tf.zeros(1), tf.zeros(1)) memory_state = self.memory.init_memory() if not isinstance(controller_state, LSTMStateTuple): controller_state = LSTMStateTuple(controller_state[0], controller_state[1]) final_results = None with tf.variable_scope("sequence_loop") as scope: time = tf.constant(0, dtype=tf.int32) final_results = tf.while_loop( cond=lambda time, *_: time < self.sequence_length, body=self._loop_body, loop_vars=(time, memory_state, outputs, free_gates, allocation_gates, write_gates, read_weightings, write_weightings, usage_vectors, controller_state), parallel_iterations=32, swap_memory=True) dependencies = [] if self.controller.has_recurrent_nn: dependencies.append(self.controller.update_state(final_results[9])) with tf.control_dependencies(dependencies): self.packed_output = utility.pack_into_tensor(final_results[2], axis=1) self.packed_memory_view = { 'free_gates': utility.pack_into_tensor(final_results[3], axis=1), 'allocation_gates': utility.pack_into_tensor(final_results[4], axis=1), 'write_gates': utility.pack_into_tensor(final_results[5], axis=1), 'read_weightings': utility.pack_into_tensor(final_results[6], axis=1), 'write_weightings': utility.pack_into_tensor(final_results[7], axis=1), 'usage_vectors': utility.pack_into_tensor(final_results[8], axis=1) }
def call(self, inputs, state): """Long short-term memory cell with attention (LSTMA).""" if self._state_is_tuple: state,histotry = state cell_output, new_state = self._cell(inputs, state) #print("new state",new_state) output = cell_output #print("output",output) c_new, h_new = new_state # print("c_new", c_new) # print("h_new", h_new) label_emb = tf.nn.relu(tf.matmul(output, self.emb_M3)) # label_emb = tf.expand_dims(label_emb, axis=1) #print("label emb",label_emb) #print("new stat", new_state) pre_history = histotry pre_history= tf.reshape(pre_history, shape=[-1, self.config.use_K_histroy, self.config.label_emb_size]) #print("pre_history",pre_history) new_history = tf.slice(pre_history, [0, 1, 0], [-1, self.config.use_K_histroy - 1, self.config.label_emb_size]) #print("new_history", new_history) # print("label_emb", label_emb) concat_his = tf.concat([new_history, tf.expand_dims(label_emb,axis=1)], axis=1) #print("concat_his_tmp", concat_his) concat_all = tf.concat([concat_his, tf.expand_dims(c_new,axis=1)], axis=1) #print("c_new",c_new) concat_all_flatten = tf.reshape(concat_all, shape=[-1, (self.config.use_K_histroy + 1) * self.config.label_emb_size]) concat_his_flatten = tf.reshape(concat_his, shape=[-1, self.config.use_K_histroy * self.config.label_emb_size]) c = tf.nn.relu(tf.matmul(concat_all_flatten, self.emb_M4k)) new_state= LSTMStateTuple(c, h_new) new_wrapper_state = (new_state, concat_his_flatten) return output, new_wrapper_state
def autoencoder_seq(x: tf.Tensor, noise, initial_state, seq_len, n_joints, lstm_size): """ :param x: Tensor of shape [BATCH_SIZE, MOTION_SELECTION] :return: Tuple of Tensors of shapes ( [BATCH_SIZE, MOTION_SELECTION] , [BATCH_SIZE, SEQ_LEN, N_JOINTS] ) """ motion_selection = x.shape[1].value with tf.variable_scope('encoder'): state_predictions, final_predictor_state = encoder( x, noise, LSTMStateTuple(*initial_state[0]), seq_len, n_joints, motion_selection) with tf.variable_scope('decoder'): predicted_motion_selection, final_classifier_state = decoder( state_predictions, LSTMStateTuple(*initial_state[1]), n_joints, motion_selection) return predicted_motion_selection, state_predictions, ( final_predictor_state, final_classifier_state)
def __init__(self, state_space_size, action_space_size, scope, trainer): with tf.variable_scope(scope): # Input self.inputs = tf.placeholder(shape=[None, state_space_size], dtype=tf.float32) # Recurrent network for temporal dependencies lstm_cell = BasicLSTMCell(256, state_is_tuple=True) c_init = np.zeros_like((1, lstm_cell.state_size.c), dtype=np.float32) h_init = np.zeros_like((1, lstm_cell.state_size.h), dtype=np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) state_in = LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.inputs, initial_state=state_in, sequence_length=tf.shape(self.inputs)[0], time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) # Output layers for policy and value estimations self.policy = slim.fully_connected(rnn_out, action_space_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected(rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) # Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, action_space_size, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum(self.policy * self.actions_onehot, [1]) # Loss functions self.value_loss = 0.5 * tf.reduce_sum(tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = - tf.reduce_sum(self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum(tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 # Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm(self.gradients, 40.0) # Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads, global_vars))
def decoder(decoder_embed_input, decoder_y, target_length, max_target_length, l_z, l_y, states, keep_prob, reuse=False): with tf.variable_scope("decoder", reuse=reuse): #l_y = y_scale*l_y l_yz = tf.concat([l_z, l_y], 1) u_mean = tf.contrib.layers.fully_connected(inputs=l_yz, num_outputs=a_size, activation_fn=None, scope="u_mean") u_stddev = tf.contrib.layers.fully_connected(inputs=l_yz, num_outputs=a_size, activation_fn=None, scope="u_std") samples = tf.random_normal(tf.shape(u_stddev)) l_u = u_mean + tf.exp(u_stddev * 0.5) * samples l_yzu = tf.concat([l_yz, l_u, l_y], 1) h_states = tf.nn.softplus( tf.matmul(l_yzu, weights_de['w_']) + biases_de['b_']) decoder_initial_state = LSTMStateTuple(states[0], h_states) #(C,H) decode_lstm = tf.contrib.rnn.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) decode_cell = tf.contrib.rnn.DropoutWrapper(decode_lstm, output_keep_prob=keep_prob) output_layer = Dense(n_input) #TOTAL_SIZE decoder_input_ = tf.concat([ tf.fill([batch_size, 1], vocab_to_int['<GO>']), decoder_embed_input ], 1) # add 1 GO to the end decoder_input = tf.nn.embedding_lookup(dic_embeddings, decoder_input_) decoder_input = tf.concat([decoder_input, decoder_y], 2) #dic_embedding+y(one-hot) # # input_=tf.transpose(decoder_input,[1,0,2]) training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_input, sequence_length=target_length) training_decoder = tf.contrib.seq2seq.BasicDecoder( decode_cell, training_helper, decoder_initial_state, output_layer) output, _, _ = tf.contrib.seq2seq.dynamic_decode( training_decoder, impute_finished=True, maximum_iterations=max_target_length) predicting_logits = tf.identity(output.sample_id, name='predictions') training_logits = tf.identity(output.rnn_output, 'logits') masks = tf.sequence_mask(target_length, max_target_length, dtype=tf.float32, name='masks') #(batch_size,max_target_length) return training_logits, masks, u_mean, u_stddev
def call(self, inputs, state): """ Args: inputs: `2-D` tensor with shape `[batch_size, input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size, num_units]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size, 2 * num_units]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ c, h = state # state consists of same objects # (b, mem_slot * mem_size) h_mat = tf.reshape(h, [-1, self._mem_slots, self._mem_size]) inputs_mat = tf.reshape(inputs, [-1, self._mem_slots, self._mem_size]) input_plus_h = array_ops.concat([inputs_mat, h_mat], 2) # (b, slots, 2*mem_size) # gate_inputs = math_ops.matmul(input_plus_h, self._kernel) # (b, 2*units) * (2*units, 3) gate_inputs = tf.tensordot( input_plus_h, self._kernel, axes=[[2], [0]]) # (b, slots, 2*mem_size) (2*mem_size, 3) gate_inputs = nn_ops.bias_add(gate_inputs, self._bias) # (b, 2*slots, 3) # vector -> matrix as initial state is vector mem_mat = tf.reshape(c, [-1, self._mem_slots, self._mem_size]) att_mem_mat = self._attend_over_memory(mem_mat, inputs) # att_mem = tf.layers.flatten(att_mem_mat) # i = input_gate, f = forget_gate, o = output_gate i, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=self.gate_num, axis=2) # print(i.get_shape(), "i") # (b, slots, 1) forget_bias_tensor = constant_op.constant(self._forget_bias, dtype=f.dtype) sigmoid = math_ops.sigmoid add = math_ops.add multiply = math_ops.multiply c_mat = tf.reshape(c, [-1, self._mem_slots, self._mem_size]) new_c = add(multiply(c_mat, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), self._activation(att_mem_mat))) new_h = multiply(self._activation(new_c), sigmoid(o)) # matrix -> vector new_c = tf.layers.flatten(new_c) new_h = tf.layers.flatten(new_h) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None): super(CustomLSTMCell, self).__init__(_reuse=reuse) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units self._linear1 = None self._linear2 = None if self._use_peepholes: self._w_f_diag = None self._w_i_diag = None self._w_o_diag = None
def call(self, inputs, state): """Long short-term memory cell with attention (LSTMA).""" if self._state_is_tuple: state, hisTrack= state states ,attns, attn_states=state #print("inputs",inputs) #print("state",state) cell_output, new_state = self._cell(inputs,state) #print("cell_out",cell_output) #print("new_state",new_state) new_state, _ ,_ =new_state c_prev,m_prev = new_state m_prev=tf.expand_dims(m_prev,axis=1) #print("c_prev",c_prev) #print("m_prev", m_prev) label_emb= tf.nn.relu(tf.matmul(cell_output,self.emb_M3)) label_emb = tf.expand_dims(label_emb,axis=1) #print(label_emb) hisTrack=tf.reshape(hisTrack,shape=[-1,self.config.use_K_histroy,self.config.label_emb_size]) new_hisTrack = tf.slice(hisTrack, [0,1, 0], [-1,self.config.use_K_histroy-1, self.config.label_emb_size]) #print("new_hisTrack", new_hisTrack) #print("label_emb", label_emb) concat_hisTrack=tf.concat([new_hisTrack,label_emb],axis=1) #print("concat_hisTrack_tmp",concat_hisTrack) concat_all= tf.concat([concat_hisTrack,m_prev],axis=1) concat_all_flatten=tf.reshape(concat_all,shape=[-1,(self.config.use_K_histroy+1)*self.config.label_emb_size]) concat_hisTrack_flatten=tf.reshape(concat_hisTrack,shape=[-1,self.config.use_K_histroy*self.config.label_emb_size]) m =tf.nn.relu(tf.matmul(concat_all_flatten,self.emb_M4k)) new_state_tuple= (LSTMStateTuple(cell_output,m),attns,attn_states) new_send_state=(new_state_tuple,concat_hisTrack_flatten) #print("new_send_state",new_send_state) #print("cell_output",cell_output) return cell_output, new_send_state
def __init__(self, num_units, highway=False, cell_clip=None, initializer=None, forget_bias=1.0, activation=None, reuse=None, name=None, use_layer_norm=False): """Initialize the parameters for an LSTM cell with simplified highway connections as described in 'Deep Semantic Role Labeling: What works and what's next' (He et al. 2017). Args: num_units: int, The number of units in the LSTM cell. highway: (optional) Python boolean describing whether to include highway connections cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight matrices. Uses an orthonormal initializer if none is provided. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. activation: Activation function of the inner states. Default: `tanh`. reuse: (optional) Python boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. use_layer_norm: (optional) Python boolean describing whether to use layer normalization """ super(HighwayLSTMCell, self).__init__(_reuse=reuse, name=name) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._highway = highway self._cell_clip = cell_clip self._initializer = initializer self._forget_bias = forget_bias self._activation = activation or math_ops.tanh self._state_size = (LSTMStateTuple(num_units, num_units)) self._output_size = num_units # initialized in self.build self._input_kernel = None self._hidden_kernel = None self._bias = None self.use_layer_norm = use_layer_norm
def get_init_state(args, name, q_type, shape): hinit_embed = make_var('hinit_ebd_' + name, shape) cinit_embed = make_var('cinit_ebd_' + name, shape) h_init = tf.expand_dims(tf.nn.embedding_lookup(hinit_embed, q_type), axis=0) c_init = tf.expand_dims(tf.nn.embedding_lookup(cinit_embed, q_type), axis=0) cell_init_state = { 'lstm': lambda: LSTMStateTuple(c_init, h_init), 'sru': lambda: h_init, 'gru': lambda: h_init, 'rnn': lambda: h_init }[args.cell.replace('bi-', '')]() return cell_init_state
def call(self, inputs, state): """Long short-term memory cell (LSTM). Args: inputs: `2-D` tensor with shape `[batch_size x input_size]`. state: An `LSTMStateTuple` of state tensors, each shaped `[batch_size x self.state_size]`, if `state_is_tuple` has been set to `True`. Otherwise, a `Tensor` shaped `[batch_size x 2 * self.state_size]`. Returns: A pair containing the new hidden state, and the new state (either a `LSTMStateTuple` or a concatenated state, depending on `state_is_tuple`). """ sigmoid = tf.sigmoid self._step = self._step + 1 # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) if self._linear is None: self._linear = _Linear([inputs, h], 4 * self._num_units, True) i, j, f, o = tf.split(value=self._linear([inputs, h]), num_or_size_splits=4, axis=1) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h_cnt = self._activation(new_c) * sigmoid(o) if self._step % self._skip_size == 0: w_h_skip, b_h_skip = self.weight_bias( [self._num_units, self._num_units], [self._num_units]) new_h_skip = sigmoid(tf.matmul(h, w_h_skip) + b_h_skip) masked_w1, masked_w2 = self.masked_weight(_load=False) new_h = new_h_cnt * masked_w1 + new_h_skip * masked_w2 else: new_h = new_h_cnt if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state