def GetParams(self): """Tests for scale & elementwise layers in TF-TRT.""" input_name = "input" input_dims = [10, 24, 24, 20] output_name = "output" g = ops.Graph() with g.as_default(): x = array_ops.placeholder( dtype=dtypes.float32, shape=input_dims, name=input_name) for weights_shape in [ (1,), # scale (24, 1, 1), # scale (24, 24, 20), # scale (20,), # elementwise (1, 24, 1, 1), # elementwise (1, 24, 24, 1), # elementwise (1, 24, 24, 20), # elementwise (24, 20), # elementwise ]: a = self._ConstOp(weights_shape) f = x + a x = math_ops.sigmoid(f) a = self._ConstOp(weights_shape) f = a + x x = math_ops.sigmoid(f) gen_array_ops.reshape(x, [5, -1], name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], output_names=[output_name], expected_output_dims=[(5, 23040)])
def call(self, inputs, state): """ """ (c_prev, m_prev) = state self._batch_size = inputs.shape[0].value or array_ops.shape(inputs)[0] scope = vs.get_variable_scope() with vs.variable_scope(scope, initializer=self._initializer): x = array_ops.concat([inputs, m_prev], axis=1) with vs.variable_scope("first_gemm"): if self._linear1 is None: # no bias for bottleneck self._linear1 = _Linear(x, self._fact_size, False) R_fact = self._linear1(x) with vs.variable_scope("second_gemm"): if self._linear2 is None: self._linear2 = _Linear(R_fact, 4*self._num_units, True) R = self._linear2(R_fact) i, j, f, o = array_ops.split(R, 4, 1) c = (math_ops.sigmoid(f + self._forget_bias) * c_prev + math_ops.sigmoid(i) * math_ops.tanh(j)) m = math_ops.sigmoid(o) * self._activation(c) if self._num_proj is not None: with vs.variable_scope("projection"): if self._linear3 is None: self._linear3 = _Linear(m, self._num_proj, False) m = self._linear3(m) new_state = rnn_cell_impl.LSTMStateTuple(c, m) return m, new_state
def __call__(self, inputs, state, scope=None): """LSTM cell with layer normalization and recurrent dropout.""" with vs.variable_scope(scope or type(self).__name__) as scope: # LayerNormBasicLSTMCell # pylint: disable=unused-variables c, h = state args = array_ops.concat(1, [inputs, h]) concat = self._linear(args) i, j, f, o = array_ops.split(1, 4, concat) if self._layer_norm: i = self._norm(i, "input") j = self._norm(j, "transform") f = self._norm(f, "forget") o = self._norm(o, "output") g = self._activation(j) if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: g = nn_ops.dropout(g, self._keep_prob, seed=self._seed) new_c = (c * math_ops.sigmoid(f + self._forget_bias) + math_ops.sigmoid(i) * g) if self._layer_norm: new_c = self._norm(new_c, "state") new_h = self._activation(new_c) * math_ops.sigmoid(o) new_state = rnn_cell.LSTMStateTuple(new_c, new_h) return new_h, new_state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) i = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "i") j = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "j") f = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "f") o = linear_tt([inputs, h], self._num_units, self._mat_ranks, bias =True, scope = "o") # concat = _linear([inputs, h], 4 * self._num_units, True) # # i = input_gate, j = new_input, f = forget_gate, o = output_gate # i , j, f, o = array_ops.split(1, 4, concat) new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state
def _logits_to_prediction(self, logits=None): predictions = {PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PredictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PredictionKey.PROBABILITIES] = math_ops.sigmoid(logits) predictions[PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def LSTMCell(cls, x, mprev, cprev, weights): xm = array_ops.concat([x, mprev], 1) i_i, i_g, f_g, o_g = array_ops.split( value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1) new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid( i_g) * math_ops.tanh(i_i) new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0) new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c) return new_m, new_c
def _logits_to_prediction(self, logits=None): predictions = {PedictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits) # Workaround for argmax dropping the second demension. predictions[PedictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(1, 2, linear([inputs, state], 2 * self._num_units, True, 1.0)) r, u = sigmoid(r), sigmoid(u) with vs.variable_scope("Candidate"): c = tanh(linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c return new_h, new_h
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope(scope or "gru_cell"): with vs.variable_scope("gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(1, 2, _linear([inputs, state], 2 * self._num_units, True, 1.0, scope=scope)) r, u = sigmoid(r), sigmoid(u) with vs.variable_scope("candidate"): c = self._activation(_linear([inputs, r * state], self._num_units, True, scope=scope)) new_h = u * state + (1 - u) * c return new_h, new_h
def _logits_to_predictions(self, logits): """See `_MultiClassHead`.""" predictions = {prediction_key.PredictionKey.LOGITS: logits} if self.logits_dimension == 1: predictions[prediction_key.PredictionKey.LOGISTIC] = math_ops.sigmoid( logits) logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) predictions[prediction_key.PredictionKey.PROBABILITIES] = math_ops.sigmoid( logits) predictions[prediction_key.PredictionKey.CLASSES] = math_ops.to_int64( math_ops.greater(logits, 0)) return predictions
def __call__(self, inputs, state, scope=None): """Recurrent Highway Network cell (RHN).""" with vs.variable_scope(scope or type(self).__name__): # "BasicRHNCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: y = state else: y = array_ops.split(1, 1, state) assert self._recurrence_depth > 0 and type(self._recurrence_depth) is int # h_transform = [None] * self._recurrence_depth # t = [None] * self._recurrence_depth # s = [None] * self._recurrence_depth # concat = [None] * self._recurrence_depth # for i in range(self._recurrence_depth): # if i == 0: # concat[i] = _linear([inputs, h], 2 * self._num_units, True) # # h = nonlinear transform, t = transfer gate # h_transform[i], t[i] = array_ops.split(1, 2, concat[i]) # t[i] = sigmoid(t[i] + self._transfer_bias) # s[i] = self._activation(h_transform[i]) * t[i] + \ # (1.0 - t[i]) * _linear([inputs], 1 * self._num_units, False) # if i > 0: # concat[i] = _linear([h], 2 * self._num_units, True) # # h = nonlinear transform, t = transfer gate # h_transform[i], t[i] = array_ops.split(1, 2, concat[i]) # t[i] = sigmoid(t[i] + self._transfer_bias) # s[i] = self._activation(h_transform[i]) * t[i] + \ # (1.0 - t[i]) * s[i-1] # ALTERNATIVE IMPLEMENTATION: for i in range(self._recurrence_depth): if i == 0: concat = _linear([inputs, y], 2 * self._num_units, True) # h = nonlinear transform, t = transfer gate h, t = array_ops.split(1, 2, concat) t = sigmoid(t + self._transfer_bias) s = self._activation(h) * t + \ (1.0 - t) * _linear([inputs], 1 * self._num_units, False) if i > 0: concat = _linear([s], 2 * self._num_units, True) # h = nonlinear transform, t = transfer gate h, t = array_ops.split(1, 2, concat) t = sigmoid(t + self._transfer_bias) s = self._activation(h) * t + \ (1.0 - t) * s new_y = s if self._state_is_tuple: new_state = RHNStateTuple(new_y) else: new_state = array_ops.concat(1, new_y) return new_y
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM) with hypernetworks and layer normalization.""" with vs.variable_scope(scope or type(self).__name__): # Parameters of gates are concatenated into one multiply for efficiency. total_h, total_c = tf.split(1, 2, state) h = total_h[:, 0:self._num_units] c = total_c[:, 0:self._num_units] self.hyper_state = tf.concat(1, [total_h[:, self._num_units:], total_c[:, self._num_units:]]) hyper_input = tf.concat(1, [inputs, h]) hyper_output, hyper_new_state = self.hyper_cell(hyper_input, self.hyper_state) self.hyper_output = hyper_output self.hyper_state = hyper_new_state input_below_ = rnn_cell._linear([inputs], 4 * self._num_units, False, scope="out_1") input_below_ = self.hyper_norm(input_below_, 4 * self._num_units, scope="hyper_x") state_below_ = rnn_cell._linear([h], 4 * self._num_units, False, scope="out_2") state_below_ = self.hyper_norm(state_below_, 4 * self._num_units, scope="hyper_h") if self.is_layer_norm: s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32) input_below_ = ln(input_below_, s1, b1) state_below_ = ln(state_below_, s2, b2) lstm_matrix = tf.add(input_below_, state_below_) i, j, f, o = array_ops.split(1, 4, lstm_matrix) new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j)) # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now. # new_c_ = ln(new_c, s3, b3) new_c_ = new_c new_h = self._activation(new_c_) * sigmoid(o) hyper_h, hyper_c = tf.split(1, 2, hyper_new_state) new_total_h = tf.concat(1, [new_h, hyper_h]) new_total_c = tf.concat(1, [new_c, hyper_c]) new_total_state = tf.concat(1, [new_total_h, new_total_c]) return new_h, new_total_state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = array_ops.split(1, 2, state) concat = linear([inputs, h], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = c * sigmoid(f + self._forget_bias) + sigmoid(i) * tanh(j) new_h = tanh(new_c) * sigmoid(o) return new_h, array_ops.concat(1, [new_c, new_h])
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" dtype = inputs.dtype batch_size, feature_size = inputs.get_shape().as_list() if self._use_tgate: # Time gate feature_size = feature_size - 1 tvscope = vs.get_variable_scope() with vs.variable_scope(tvscope, initializer=None) as unit_scope: with vs.variable_scope(unit_scope) as time_gate_scope: w_t1 = vs.get_variable( "w_t1", shape=[1, self._num_units], dtype=dtype) bias_t1 = vs.get_variable( "bias_t1", [self._num_units], dtype=dtype, initializer=init_ops.constant_initializer(0.0, dtype=dtype)) w_tx1 = vs.get_variable( "w_tx1", shape=[feature_size, self._num_units], dtype=dtype) seq = tf.slice(inputs, begin=[0, 0], size=[batch_size, feature_size]) delta_t = tf.slice(inputs, begin=[0, 56], size=[batch_size, 1]) t1_act = (self._activation(math_ops.matmul(delta_t, w_t1)) + math_ops.matmul(seq, w_tx1) + bias_t1) t1 = sigmoid(t1_act) inputs = seq # for initial state (state, state_decay) = state with vs.variable_scope("gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. value = sigmoid(_linear( [inputs, state], 2 * self._num_units, True, 1.0)) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) with vs.variable_scope("candidate"): c = self._activation(_linear([inputs, r * state], self._num_units, True)) new_h = u * state + (1 - u) * c if self._use_tgate: new_h_decay = u * t1 * state_decay + (1 - u * t1) * c new_state = (new_h, new_h_decay) new_state = (TGRUStateTuple(new_h, new_h_decay)) new_h = tf.concat([new_h, new_h_decay], axis=1) else: new_state = (new_h, new_h) new_state = (TGRUStateTuple(new_h, new_h)) return new_h, new_state
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" dim = self._num_units with vs.variable_scope(scope or type(self).__name__): # "GRUCell" with vs.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. with vs.variable_scope( "Layer_Parameters"): s1 = vs.get_variable("s1", initializer=tf.ones([2*dim]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([2*dim]), dtype=tf.float32) s3 = vs.get_variable("s3", initializer=tf.ones([dim]), dtype=tf.float32) s4 = vs.get_variable("s4", initializer=tf.ones([dim]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([2*dim]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([2*dim]), dtype=tf.float32) b3 = vs.get_variable("b3", initializer=tf.zeros([dim]), dtype=tf.float32) b4 = vs.get_variable("b4", initializer=tf.zeros([dim]), dtype=tf.float32) # Code below initialized for all cells # s1 = tf.Variable(tf.ones([2 * dim]), name="s1") # s2 = tf.Variable(tf.ones([2 * dim]), name="s2") # s3 = tf.Variable(tf.ones([dim]), name="s3") # s4 = tf.Variable(tf.ones([dim]), name="s4") # b1 = tf.Variable(tf.zeros([2 * dim]), name="b1") # b2 = tf.Variable(tf.zeros([2 * dim]), name="b2") # b3 = tf.Variable(tf.zeros([dim]), name="b3") # b4 = tf.Variable(tf.zeros([dim]), name="b4") input_below_ = rnn_cell._linear([inputs], 2 * self._num_units, False, scope="out_1") input_below_ = ln(input_below_, s1, b1) state_below_ = rnn_cell._linear([state], 2 * self._num_units, False, scope="out_2") state_below_ = ln(state_below_, s2, b2) out =tf.add(input_below_, state_below_) r, u = array_ops.split(1, 2, out) r, u = sigmoid(r), sigmoid(u) with vs.variable_scope("Candidate"): input_below_x = rnn_cell._linear([inputs], self._num_units, False, scope="out_3") input_below_x = ln(input_below_x, s3, b3) state_below_x = rnn_cell._linear([state], self._num_units, False, scope="out_4") state_below_x = ln(state_below_x, s4, b4) c_pre = tf.add(input_below_x,r * state_below_x) c = self._activation(c_pre) new_h = u * state + (1 - u) * c return new_h, new_h
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with vs.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) s1 = vs.get_variable("s1", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s2 = vs.get_variable("s2", initializer=tf.ones([4 * self._num_units]), dtype=tf.float32) s3 = vs.get_variable("s3", initializer=tf.ones([self._num_units]), dtype=tf.float32) b1 = vs.get_variable("b1", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b2 = vs.get_variable("b2", initializer=tf.zeros([4 * self._num_units]), dtype=tf.float32) b3 = vs.get_variable("b3", initializer=tf.zeros([self._num_units]), dtype=tf.float32) # s1 = tf.Variable(tf.ones([4 * self._num_units]), name="s1") # s2 = tf.Variable(tf.ones([4 * self._num_units]), name="s2") # s3 = tf.Variable(tf.ones([self._num_units]), name="s3") # # b1 = tf.Variable(tf.zeros([4 * self._num_units]), name="b1") # b2 = tf.Variable(tf.zeros([4 * self._num_units]), name="b2") # b3 = tf.Variable(tf.zeros([self._num_units]), name="b3") input_below_ = rnn_cell._linear([inputs], 4 * self._num_units, False, scope="out_1") input_below_ = ln(input_below_, s1, b1) state_below_ = rnn_cell._linear([h], 4 * self._num_units, False, scope="out_2") state_below_ = ln(state_below_, s2, b2) lstm_matrix = tf.add(input_below_, state_below_) i, j, f, o = array_ops.split(1, 4, lstm_matrix) new_c = (c * sigmoid(f) + sigmoid(i) * self._activation(j)) # Currently normalizing c causes lot of nan's in the model, thus commenting it out for now. # new_c_ = ln(new_c, s3, b3) new_c_ = new_c new_h = self._activation(new_c_) * sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) return new_h, new_state
def GetParams(self): """Test for multi connection neighboring nodes wiring tests in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [2, 3, 7, 5] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) e = constant_op.constant( np.random.normal(.05, .005, [3, 2, 3, 4]), name="weights", dtype=dtype) conv = nn.conv2d( input=x, filter=e, data_format="NCHW", strides=[1, 1, 1, 1], padding="VALID", name="conv") b = constant_op.constant( np.random.normal(2.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype) t = conv + b b = constant_op.constant( np.random.normal(5.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype) q = conv - b edge = math_ops.sigmoid(q) b = constant_op.constant( np.random.normal(5.0, 1.0, [1, 4, 1, 1]), name="bias", dtype=dtype) d = b + conv edge3 = math_ops.sigmoid(d) edge1 = gen_math_ops.tan(conv) t = t - edge1 q = q + edge t = t + q t = t + d t = t - edge3 array_ops.squeeze(t, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], expected_engines=["my_trt_op_0", "my_trt_op_1"], expected_output_dims=(2, 4, 5, 4), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def _Model(x): w = variable_scope.get_variable( "w", (64, 64), initializer=init_ops.random_uniform_initializer(seed=312)) b = variable_scope.get_variable( "b", (64), initializer=init_ops.zeros_initializer()), return math_ops.sigmoid(math_ops.matmul(x, w) + b)
def predictions(self, examples): """Add operations to compute predictions by the model. If logistic_loss is being used, predicted probabilities are returned. Otherwise, (raw) linear predictions (w*x) are returned. Args: examples: Examples to compute predictions on. Returns: An Operation that computes the predictions for examples. Raises: ValueError: if examples are not well defined. """ self._assertSpecified( ['example_weights', 'sparse_features', 'dense_features'], examples) self._assertList(['sparse_features', 'dense_features'], examples) result = self._linear_predictions(examples) if self._options['loss_type'] == 'logistic_loss': # Convert logits to probability for logistic loss predictions. with name_scope('sdca/logistic_prediction'): result = math_ops.sigmoid(result) return result
def call(self, inputs, state): """Gated recurrent unit (GRU) with nunits cells.""" with vs.variable_scope("gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. bias_ones = self._bias_initializer if self._bias_initializer is None: dtype = inputs.dtype bias_ones = init_ops.constant_initializer(1.0, dtype=dtype) # pylint: disable=protected-access value = math_ops.sigmoid( rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True, bias_ones, self._kernel_initializer)) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) # pylint: enable=protected-access with vs.variable_scope("candidate"): # pylint: disable=protected-access with vs.variable_scope("input_projection"): hi = rnn_cell_impl._linear(inputs, self._num_units, True, self._bias_initializer, self._kernel_initializer) with vs.variable_scope("hidden_projection"): hh = r * (rnn_cell_impl._linear(state, self._num_units, True, self._bias_initializer, self._kernel_initializer)) # pylint: enable=protected-access c = self._activation(hi + hh) new_h = u * state + (1 - u) * c return new_h, new_h
def embed(self, func, embedding_classes, embedding_size, inputs, dtype=None, scope=None, keep_prob=1.0, initializer=None): embedder_cell = func(self._cell, embedding_classes, embedding_size, initializer=initializer) # Like rnn(..) in rnn.py, but we call only the Embedder, not the RNN cell outputs = [] with vs.variable_scope(scope or "Embedder") as varscope: if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) for time, input_ in enumerate(inputs): if time > 0: vs.get_variable_scope().reuse_variables() embedding = embedder_cell.__call__(input_, scope) if keep_prob < 1: embedding = tf.nn.dropout(embedding, keep_prob) # annotation = C~_t = tanh ( E(x_t) + b_c) b_c = tf.get_variable("annotation_b", [embedding_size]) annotation = tanh(tf.nn.bias_add(embedding, b_c)) # weighted annotation = i_t * C~_t # i = sigmoid ( E(x_t) + b_i) b_i = tf.get_variable("input_b", [embedding_size]) i = sigmoid(tf.nn.bias_add(embedding, b_i)) w_annotation = i * annotation outputs.append(w_annotation) # return empty state, will be initialized by decoder batch_size = array_ops.shape(inputs[0])[0] state = self._cell.zero_state(batch_size, dtype) return (outputs, state)
def call(self, inputs, state, att_score=None): """Gated recurrent unit (GRU) with nunits cells.""" if self._gate_linear is None: bias_ones = self._bias_initializer if self._bias_initializer is None: bias_ones = init_ops.constant_initializer( 1.0, dtype=inputs.dtype) with vs.variable_scope("gates"): # Reset gate and update gate. self._gate_linear = _Linear( [inputs, state], 2 * self._num_units, True, bias_initializer=bias_ones, kernel_initializer=self._kernel_initializer) value = math_ops.sigmoid(self._gate_linear([inputs, state])) r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) r_state = r * state if self._candidate_linear is None: with vs.variable_scope("candidate"): self._candidate_linear = _Linear( [inputs, r_state], self._num_units, True, bias_initializer=self._bias_initializer, kernel_initializer=self._kernel_initializer) c = self._activation(self._candidate_linear([inputs, r_state])) u = (1.0 - att_score) * u new_h = u * state + (1 - u) * c return new_h, new_h
def _get_eval_ops(self, features, targets, metrics=None): """See base class.""" logits = self._logits(features) result = {"loss": metrics_lib.streaming_mean(self._loss( logits, targets, features))} # Adds default metrics. if metrics is None: # TODO(b/29366811): This currently results in both an "accuracy" and an # "accuracy/threshold_0.500000_mean" metric for binary classification. metrics = {("accuracy", "classes"): metrics_lib.streaming_accuracy} # Adds additional useful metrics for the special case of binary # classification. # TODO(zakaria): Move LogisticRegressor.get_default_metrics to metrics # and handle eval metric from targetcolumn. if self._target_column.num_label_columns == 1: predictions = math_ops.sigmoid(logits) targets_float = math_ops.to_float(targets) default_metrics = ( logistic_regressor.LogisticRegressor.get_default_metrics()) for metric_name, metric_op in default_metrics.items(): result[metric_name] = metric_op(predictions, targets_float) if metrics: class_metrics = {} proba_metrics = {} for name, metric_op in six.iteritems(metrics): if isinstance(name, tuple): if len(name) != 2: raise ValueError("Ignoring metric {}. It returned a tuple with " "len {}, expected 2.".format(name, len(name))) else: if name[1] not in ["classes", "probabilities"]: raise ValueError("Ignoring metric {}. The 2nd element of its " "name should be either 'classes' or " "'probabilities'.".format(name)) elif name[1] == "classes": class_metrics[name[0]] = metric_op else: proba_metrics[name[0]] = metric_op elif isinstance(name, str): class_metrics[name] = metric_op else: raise ValueError("Ignoring metric {}. Its name is not in the correct " "form.".format(name)) if class_metrics: predictions = self._target_column.logits_to_predictions(logits, proba=False) result.update(self._run_metrics(predictions, targets, class_metrics, self._target_column.get_weight_tensor( features))) if proba_metrics: predictions = self._target_column.logits_to_predictions(logits, proba=True) result.update(self._run_metrics(predictions, targets, proba_metrics, self._target_column.get_weight_tensor( features))) return result
def create_estimator_spec( self, features, mode, logits, labels=None, train_op_fn=None): """See `Head`.""" with ops.name_scope('head'): logits = head_lib._check_logits(logits, self.logits_dimension) # pylint:disable=protected-access # Predict. pred_keys = prediction_keys.PredictionKeys with ops.name_scope(None, 'predictions', (logits,)): probabilities = math_ops.sigmoid(logits, name=pred_keys.PROBABILITIES) predictions = { pred_keys.LOGITS: logits, pred_keys.PROBABILITIES: probabilities, } if mode == model_fn.ModeKeys.PREDICT: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.PREDICT, predictions=predictions, export_outputs={ '': export_output.ClassificationOutput(scores=probabilities) }) # Eval. unweighted_loss, processed_labels = self.create_loss( features=features, mode=mode, logits=logits, labels=labels) # Averages loss over classes. per_example_loss = math_ops.reduce_mean( unweighted_loss, axis=-1, keep_dims=True) weights = head_lib._weights(features, self._weight_column) # pylint:disable=protected-access training_loss = losses.compute_weighted_loss( per_example_loss, weights=weights, reduction=losses.Reduction.SUM) if mode == model_fn.ModeKeys.EVAL: return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.EVAL, predictions=predictions, loss=training_loss, eval_metric_ops=self._eval_metric_ops( labels=processed_labels, probabilities=probabilities, weights=weights, per_example_loss=per_example_loss)) # Train. if train_op_fn is None: raise ValueError('train_op_fn can not be None.') with ops.name_scope(''): summary.scalar( head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS), # pylint:disable=protected-access training_loss) summary.scalar( head_lib._summary_key( # pylint:disable=protected-access self._name, metric_keys.MetricKeys.LOSS_MEAN), losses.compute_weighted_loss( unweighted_loss, weights=weights, reduction=losses.Reduction.MEAN)) return model_fn.EstimatorSpec( mode=model_fn.ModeKeys.TRAIN, predictions=predictions, loss=training_loss, train_op=train_op_fn(training_loss))
def __init__(self, logits=None, p=None, dtype=dtypes.int32, validate_args=True, allow_nan_stats=False, name="Bernoulli"): """Construct Bernoulli distributions. Args: logits: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent Bernoulli distribution where the probability of an event is sigmoid(logits). p: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent Bernoulli distribution. dtype: dtype for samples. validate_args: Whether to assert that `0 <= p <= 1`. If not validate_args, `log_pmf` may return nans. allow_nan_stats: Boolean, default False. If False, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If True, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. name: A name for this distribution. Raises: ValueError: If p and logits are passed, or if neither are passed. """ self._allow_nan_stats = allow_nan_stats self._name = name self._dtype = dtype self._validate_args = validate_args check_op = check_ops.assert_less_equal if p is None and logits is None: raise ValueError("Must pass p or logits.") elif p is not None and logits is not None: raise ValueError("Must pass either p or logits, not both.") elif p is None: with ops.op_scope([logits], name): self._logits = array_ops.identity(logits, name="logits") with ops.name_scope(name): with ops.name_scope("p"): self._p = math_ops.sigmoid(self._logits) elif logits is None: with ops.name_scope(name): with ops.name_scope("p"): p = array_ops.identity(p) one = constant_op.constant(1., p.dtype) zero = constant_op.constant(0., p.dtype) self._p = control_flow_ops.with_dependencies( [check_op(p, one), check_op(zero, p)] if validate_args else [], p) with ops.name_scope("logits"): self._logits = math_ops.log(self._p) - math_ops.log(1. - self._p) with ops.name_scope(name): with ops.name_scope("q"): self._q = 1. - self._p self._batch_shape = array_ops.shape(self._logits) self._event_shape = array_ops.constant([], dtype=dtypes.int32)
def _kl_bernoulli_bernoulli(a, b, name=None): """Calculate the batched KL divergence KL(a || b) with a and b Bernoulli. Args: a: instance of a Bernoulli distribution object. b: instance of a Bernoulli distribution object. name: (optional) Name to use for created operations. default is "kl_bernoulli_bernoulli". Returns: Batchwise KL(a || b) """ with ops.name_scope(name, "kl_bernoulli_bernoulli", [a.logits, b.logits]): return (math_ops.sigmoid(a.logits) * (-nn.softplus(-a.logits) + nn.softplus(-b.logits)) + math_ops.sigmoid(-a.logits) * (-nn.softplus(a.logits) + nn.softplus(b.logits)))
def __call__(self, inputs, state, scope=None): with vs.variable_scope(scope or type(self).__name__): batch_size = inputs.get_shape().with_rank(2)[0] input_size = inputs.get_shape().with_rank(2)[1] if self.W is None: self.W = vs.get_variable("W", [input_size, self._num_units], initializer=self.input_weights_init) if self.U is None: self.U = vs.get_variable("U", [self._num_units, self._num_units], initializer=self.recc_weights_init) if self.bias is None: self.bias = vs.get_variable("Bias", [self._num_units], initializer=init_ops.constant_initializer(0.0)) if self._sensitivity: if self.W_s is None: self.W_s = vs.get_variable("W_s", [input_size, self._num_units], initializer=self.input_weights_init) if self.U_s is None: self.U_s = vs.get_variable("U_s", [self._num_units, self._num_units], initializer=self.recc_weights_init) if self.bias_s is None: self.bias_s = vs.get_variable("Bias_s", [self._num_units], initializer=init_ops.constant_initializer(0.0)) s = sigmoid(math_ops.matmul(inputs, self.W_s) + math_ops.matmul(state, self.U_s) + self.bias_s) # s *= 3.0 else: s = 1.0 s = 1.0 state_cos = s*tf.cos(state) weighted_input = math_ops.matmul(inputs, self.W) + math_ops.matmul(state, self.U) + self.bias new_state = s - state_cos + (s + state_cos) * weighted_input if not self._update_gate: state = state + self._dt * new_state else: if self.W_u is None: self.W_u = vs.get_variable("W_u", [input_size, self._num_units], initializer=self.input_weights_init) if self.U_u is None: self.U_u = vs.get_variable("U_u", [self._num_units, self._num_units], initializer=self.recc_weights_init) if self.bias_u is None: self.bias_u = vs.get_variable("Bias_u", [self._num_units], initializer=init_ops.constant_initializer(0.0)) u = sigmoid(math_ops.matmul(inputs, self.W_u) + math_ops.matmul(state, self.U_u) + self.bias_u) state = u * state + (1.0-u) * self._dt * new_state self.update_info.append(u) # self.sigma = vs.get_variable("sigma", [self._num_units], initializer=init_ops.constant_initializer(1.0)) output = self._activation(state, self._sigma) self.states_info.append(state) return output, state
def _cdf(self, positive_counts): if self.validate_args: positive_counts = math_ops.floor( distribution_util.embed_check_nonnegative_discrete( positive_counts, check_integer=False)) return math_ops.betainc( self.total_count, positive_counts + 1., math_ops.sigmoid(-self.logits))
def __call__(self, inputs, state, scope=None): """Convolutional Long short-term memory cell (ConvLSTM).""" with vs.variable_scope(scope or self.name): # "ConvLSTMCell" c, h = array_ops.split(3, 2, state) # batch_size * height * width * channel concat = _conv([inputs, h], 4 * self.hidden_num, self.filter_size) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(3, 4, concat) new_c = (c * sigmoid(f + self.forget_bias) + sigmoid(i) * self.activation(j)) new_h = self.activation(new_c) * sigmoid(o) new_state = array_ops.concat(3, [new_c, new_h]) return new_h, new_state
def __call__(self,inputs, state, scope=None): inputs = convert_to_tensor(inputs) with vs.variable_scope(scope or type(self).__name__): W_z = tf.Variable(self.W_z,name="W_z") W_r = tf.Variable(self.W_r,name="W_r") W_h = tf.Variable(self.W_h,name="W_h") U_z = tf.Variable(self.U_z,name="U_z") U_r = tf.Variable(self.U_r,name="U_r") U_h = tf.Variable(self.U_h,name="U_h") b_z = tf.Variable(self.b_z,name="b_z") b_r = tf.Variable(self.b_r,name="b_r") b_h = tf.Variable(self.b_h,name="b_h") z = math_ops.sigmoid(math_ops.matmul(inputs,W_z*self.W_z_mask)+math_ops.matmul(state,self.U_z_mask*U_z)+self.b_z) r = math_ops.sigmoid(math_ops.matmul(inputs,W_r*self.W_r_mask)+math_ops.matmul(state,self.U_r_mask*U_r)+self.b_r) hh = math_ops.tanh(math_ops.matmul(inputs,W_h)+math_ops.matmul(state*r,U_h)+self.b_h) h = (1-z)*hh + z*state return h,h
def get_logits_and_prob(logits=None, p=None, multidimensional=False, validate_args=True, name=None): """Converts logits to probabilities and vice-versa, and returns both. Args: logits: Numeric `Tensor` representing log-odds. p: Numeric `Tensor` representing probabilities. multidimensional: Given `p` a [N1, N2, ... k] dimensional tensor, whether the last dimension represents the probability between k classes. This will additionally assert that the values in the last dimension sum to one. If `False`, will instead assert that each value is in `[0, 1]`. validate_args: Whether to assert `0 <= p <= 1` if multidimensional is `False`, otherwise that the last dimension of `p` sums to one. name: A name for this operation (optional). Returns: Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then the corresponding entry in the returned logits will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `p` nor `logits` were passed in, or both were. """ if p is None and logits is None: raise ValueError("Must pass p or logits.") elif p is not None and logits is not None: raise ValueError("Must pass either p or logits, not both.") elif p is None: with ops.name_scope(name, values=[logits]): logits = array_ops.identity(logits, name="logits") with ops.name_scope(name): with ops.name_scope("p"): p = math_ops.sigmoid(logits) elif logits is None: with ops.name_scope(name): with ops.name_scope("p"): p = array_ops.identity(p) if validate_args: one = constant_op.constant(1., p.dtype) dependencies = [check_ops.assert_non_negative(p)] if multidimensional: dependencies += [ assert_close(math_ops.reduce_sum( p, reduction_indices=[-1]), one, message="p does not sum to 1.") ] else: dependencies += [ check_ops.assert_less_equal( p, one, message="p has components greater than 1.") ] p = control_flow_ops.with_dependencies(dependencies, p) with ops.name_scope("logits"): logits = math_ops.log(p) - math_ops.log(1. - p) return (logits, p)
def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: state Tensor, 2D, batch x state_size. scope: VariableScope for the created subgraph; defaults to "LSTMCell". Returns: A tuple containing: - A 2D, batch x output_dim, Tensor representing the output of the LSTM after reading "inputs" when previous state was "state". Here output_dim is: num_proj if num_proj was set, num_units otherwise. - A 2D, batch x state_size, Tensor representing the new state of LSTM after reading "inputs" when previous state was "state". Raises: ValueError: if an input_size was specified and the provided inputs have a different dimension. """ num_proj = self._num_units if self._num_proj is None else self._num_proj c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype actual_input_size = inputs.get_shape().as_list()[1] if self._input_size and self._input_size != actual_input_size: raise ValueError( "Actual input size not same as specified: %d vs %d." % actual_input_size, self._input_size) with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer): # "LSTMCell" concat_w = _get_concat_variable( "W", [actual_input_size + num_proj, 4 * self._num_units], dtype, self._num_unit_shards) b = vs.get_variable("B", shape=[4 * self._num_units], initializer=array_ops.zeros_initializer, dtype=dtype) # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat(1, [inputs, m_prev]) lstm_matrix = nn_ops.bias_add( math_ops.matmul(cell_inputs, concat_w), b) i, j, f, o = array_ops.split(1, 4, lstm_matrix) # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable("W_F_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable("W_I_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable("W_O_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * tanh(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * tanh(j)) if self._cell_clip is not None: c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * tanh(c) else: m = sigmoid(o) * tanh(c) if self._num_proj is not None: concat_w_proj = _get_concat_variable( "W_P", [self._num_units, self._num_proj], dtype, self._num_proj_shards) m = math_ops.matmul(m, concat_w_proj) return m, array_ops.concat(1, [c, m])
def _test_tanh(data): """ One iteration of TANH """ with tf.Graph().as_default(): in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype) out = math_ops.sigmoid(in_data) compare_tflite_with_tvm(data, 'Placeholder:0', [in_data], [out])
def testBernoulliWithSigmoidProbs(self): p = np.array([8.3, 4.2]) dist = bernoulli.BernoulliWithSigmoidProbs(logits=p) with self.test_session(): self.assertAllClose(math_ops.sigmoid(p).eval(), dist.probs.eval())
def call(self, inputs, state): """ Run one time step of the cell. That is, given the current inputs and the state from the last time step, calculate the current state and cell output. You will notice that TensorFlow LSTMCell has a lot of other features. But we will not try them. Focus on the very basic LSTM functionality. Hint 1: If you try to figure out the tensor shapes, use print(a.get_shape()) to see the shape. Hint 2: In LSTM there exist both matrix multiplication and element-wise multiplication. Try not to mix them. :param inputs: The input at the current time step. The last dimension of it should be 1. :param state: The state value of the cell from the last time step. The state size can be found from function state_size(self). :return: A tuple containing (output, new_state). For details check TensorFlow LSTMCell class. """ ############################################# # TODO: YOUR CODE HERE # ############################################# params = self.params c_prev = array_ops.slice(state, [0, 0], [-1, params[0]]) h_prev = array_ops.slice(state, [0, params[0]], [-1, params[1]]) W = self.W b = self.b W_fh = W['W_fh'] W_ih = W['W_ih'] W_ch = W['W_ch'] W_oh = W['W_oh'] W_fi = W['W_fi'] W_ii = W['W_ii'] W_ci = W['W_ci'] W_oi = W['W_oi'] W_h = W['W_h'] W_fc = W['W_fc'] W_ic = W['W_ic'] W_oc = W['W_oc'] b_f = b['b_f'] b_i = b['b_i'] b_c = b['b_c'] b_o = b['b_o'] f = math_ops.sigmoid( tf.matmul(h_prev, W_fh) + tf.multiply(inputs, W_fi) + b_f + tf.matmul(c_prev, W_fc)) i = math_ops.sigmoid( tf.matmul(h_prev, W_ih) + tf.multiply(inputs, W_ii) + b_i + tf.matmul(c_prev, W_ic)) _c = math_ops.tanh( tf.matmul(h_prev, W_ch) + tf.multiply(inputs, W_ci) + b_c) c = f * c_prev + i * _c o = math_ops.sigmoid( tf.matmul(h_prev, W_oh) + tf.multiply(inputs, W_oi) + b_o + tf.matmul(c, W_oc)) h = o * math_ops.tanh(c) h = tf.matmul(h, W_h) new_state = (array_ops.concat([c, h], 1)) output = h return output, new_state
def call(self, inputs, state): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError("Could not infer input size from inputs.get_shape()[-1]") scope = vs.get_variable_scope() with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: if self._num_unit_shards is not None: unit_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_unit_shards)) # i = input_gate, j = new_input, f = forget_gate, o = output_gate lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True) i, j, f, o = array_ops.split( value=lstm_matrix, num_or_size_splits=4, axis=1) # Diagonal connections if self._use_peepholes: with vs.variable_scope(unit_scope) as projection_scope: if self._num_unit_shards is not None: projection_scope.set_partitioner(None) w_f_diag = vs.get_variable( "w_f_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable( "w_i_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable( "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: with vs.variable_scope("projection") as proj_scope: if self._num_proj_shards is not None: proj_scope.set_partitioner( partitioned_variables.fixed_size_partitioner( self._num_proj_shards)) m = _linear(m, self._num_proj, bias=False) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat([c, m], 1)) return m, new_state
def __call__(self, inputs, state, scope=None): """ Phased long short-term memory cell (P-LSTM).""" with vs.variable_scope(scope or type(self).__name__): # Parameters of gates are concatenated into one multiply for efficiency. c_prev, h_prev = state # (batch_size, seq_len, 2) # NB: here we explicitly give t as input. x = tf.reshape(inputs[:, 0], (-1, 1)) t = inputs[:, 1][ -1] # Now we only accept one id. We have a batch so it's a bit more complex. # maybe the information should come from the outside. To be defined later. concat = _linear([x, h_prev], 4 * self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1) dtype = inputs.dtype tau = vs.get_variable('tau', shape=[self._num_units], initializer=random_exp_initializer( 0, self.tau_init), dtype=dtype) r_on = vs.get_variable('r_on', shape=[self._num_units], initializer=init_ops.constant_initializer( self.r_on_init), dtype=dtype) s = vs.get_variable( 's', shape=[self._num_units], initializer=init_ops.random_uniform_initializer( 0., tau.initialized_value()), dtype=dtype) times = tf.tile(tf.reshape(t, [-1, 1]), [1, self._num_units]) phase = phi(times, s, tau) kappa = time_gate_fast(phase, r_on, self._leak_rate, self._training_phase) w_o_peephole = None if self._use_peepholes: w_i_peephole = vs.get_variable('W_I_peephole', shape=[self._num_units], dtype=dtype) w_f_peephole = vs.get_variable('W_F_peephole', shape=[self._num_units], dtype=dtype) w_o_peephole = vs.get_variable('W_O_peephole', shape=[self._num_units], dtype=dtype) f += w_f_peephole * c_prev i += w_i_peephole * c_prev new_c_tilde = sigmoid(f) * c_prev + sigmoid(i) * self._activation( j) if self._use_peepholes: o += w_o_peephole * new_c_tilde new_h_tilde = sigmoid(o) * self._activation(new_c_tilde) """ Hi all, Yes, Philippe, you are correct in that Equation 4 should reference c_tilde and not c. I can add a point to the paper to mention that, and will update Figure 1 so the line is correctly drawn to c_tilde instead. The intuition here is that the gates should be blind to the effect of the khronos gate; input, forget and output gate should all operate as if the cell were a normal LSTM cell, while the khronos gate allows it to either operate or not operate (and then linearly interpolates between these two states). If the output gate is influenced by the khronos gate (if the peepholes reference c instead of c_tilde), then the PLSTM would no longer be a gated LSTM cell, but somehow be self-dependent on the time gate's actual operation. I think everyone's right in that it wouldn't influence much -- but it should be updated in the paper. Thanks very much for pointing out the issue, Philippe! -Danny""" # Apply Khronos gate new_h = kappa * new_h_tilde + (1 - kappa) * h_prev new_c = kappa * new_c_tilde + (1 - kappa) * c_prev new_state = (new_c, new_h) return new_h, new_state
def get_logits_and_probs(logits=None, probs=None, multidimensional=False, validate_args=False, name="get_logits_and_probs"): """Converts logit to probabilities (or vice-versa), and returns both. Args: logits: Floating-point `Tensor` representing log-odds. probs: Floating-point `Tensor` representing probabilities. multidimensional: Python `bool`, default `False`. If `True`, represents whether the last dimension of `logits` or `probs`, a `[N1, N2, ... k]` dimensional tensor, representing the logit or probability of `shape[-1]` classes. validate_args: Python `bool`, default `False`. When `True`, either assert `0 <= probs <= 1` (if not `multidimensional`) or that the last dimension of `probs` sums to one. name: A name for this operation (optional). Returns: logits, probs: Tuple of `Tensor`s. If `probs` has an entry that is `0` or `1`, then the corresponding entry in the returned logit will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `probs` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[probs, logits]): if (probs is None) == (logits is None): raise ValueError("Must pass probs or logits, but not both.") if probs is None: logits = ops.convert_to_tensor(logits, name="logits") if not logits.dtype.is_floating: raise TypeError("logits must having floating type.") # We can early return since we constructed probs and therefore know # they're valid. if multidimensional: if validate_args: logits = embed_check_categorical_event_shape(logits) return logits, nn.softmax(logits, name="probs") return logits, math_ops.sigmoid(logits, name="probs") probs = ops.convert_to_tensor(probs, name="probs") if not probs.dtype.is_floating: raise TypeError("probs must having floating type.") if validate_args: with ops.name_scope("validate_probs"): one = constant_op.constant(1., probs.dtype) dependencies = [check_ops.assert_non_negative(probs)] if multidimensional: probs = embed_check_categorical_event_shape(probs) dependencies += [ assert_close(math_ops.reduce_sum(probs, -1), one, message="probs does not sum to 1.") ] else: dependencies += [ check_ops.assert_less_equal( probs, one, message="probs has components greater than 1.") ] probs = control_flow_ops.with_dependencies(dependencies, probs) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(probs) - log(probs[pivot]). A side-effect of # being consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional case # explicitly keeps the pivot dimension. return math_ops.log(probs), probs return math_ops.log(probs) - math_ops.log1p(-1. * probs), probs
def predictions(self, logits, keys=None): """Return predictions based on keys. See `base_head.Head` for details. Args: logits: logits `Tensor` with shape `[D0, D1, ... DN, logits_dimension]`. For many applications, the shape is `[batch_size, logits_dimension]`. keys: a list or tuple of prediction keys. Each key can be either the class variable of prediction_keys.PredictionKeys or its string value, such as: prediction_keys.PredictionKeys.CLASSES or 'classes'. If not specified, it will return the predictions for all valid keys. Returns: A dict of predictions. """ pred_keys = prediction_keys.PredictionKeys valid_keys = [ pred_keys.LOGITS, pred_keys.LOGISTIC, pred_keys.PROBABILITIES, pred_keys.CLASS_IDS, pred_keys.CLASSES, pred_keys.ALL_CLASS_IDS, pred_keys.ALL_CLASSES ] if keys: base_head.check_prediction_keys(keys, valid_keys) else: keys = valid_keys logits = base_head.check_logits_final_dim(logits, self.logits_dimension) predictions = {} with ops.name_scope('predictions', values=(logits, )): if pred_keys.LOGITS in keys: predictions[pred_keys.LOGITS] = logits if pred_keys.LOGISTIC in keys: logistic = math_ops.sigmoid(logits, name=pred_keys.LOGISTIC) predictions[pred_keys.LOGISTIC] = logistic two_class_logits = array_ops.concat( (array_ops.zeros_like(logits), logits), axis=-1, name='two_class_logits') if pred_keys.PROBABILITIES in keys: probabilities = nn.softmax(two_class_logits, name=pred_keys.PROBABILITIES) predictions[pred_keys.PROBABILITIES] = probabilities if pred_keys.CLASS_IDS in keys or pred_keys.CLASSES in keys: class_ids = math_ops.argmax(two_class_logits, axis=-1, name=pred_keys.CLASS_IDS) class_ids = array_ops.expand_dims(class_ids, axis=-1) if pred_keys.CLASS_IDS in keys: predictions[pred_keys.CLASS_IDS] = class_ids if pred_keys.CLASSES in keys: if self._label_vocabulary is not None: classes = self._class_string_table.lookup(class_ids) else: classes = string_ops.as_string(class_ids, name='str_classes') predictions[pred_keys.CLASSES] = classes if pred_keys.ALL_CLASS_IDS in keys: predictions[pred_keys.ALL_CLASS_IDS] = base_head.all_class_ids( logits, n_classes=2) if pred_keys.ALL_CLASSES in keys: predictions[pred_keys.ALL_CLASSES] = base_head.all_classes( logits, n_classes=2, label_vocabulary=self._label_vocabulary) return predictions
def get_logits_and_prob(logits=None, p=None, multidimensional=False, validate_args=False, name="GetLogitsAndProb"): """Converts logits to probabilities and vice-versa, and returns both. Args: logits: Numeric `Tensor` representing log-odds. p: Numeric `Tensor` representing probabilities. multidimensional: `Boolean`, default `False`. If `True`, represents whether the last dimension of `logits` or `p`, a [N1, N2, ... k] dimensional tensor, represent the logits / probability between k classes. For `p`, this will additionally assert that the values in the last dimension sum to one. If `False`, this will instead assert that each value of `p` is in `[0, 1]`, and will do nothing to `logits`. validate_args: `Boolean`, default `False`. Whether to assert `0 <= p <= 1` if multidimensional is `False`, otherwise that the last dimension of `p` sums to one. name: A name for this operation (optional). Returns: Tuple with `logits` and `p`. If `p` has an entry that is `0` or `1`, then the corresponding entry in the returned logits will be `-Inf` and `Inf` respectively. Raises: ValueError: if neither `p` nor `logits` were passed in, or both were. """ with ops.name_scope(name, values=[p, logits]): if p is None and logits is None: raise ValueError("Must pass p or logits.") elif p is not None and logits is not None: raise ValueError("Must pass either p or logits, not both.") elif p is None: logits = array_ops.identity(logits, name="logits") with ops.name_scope("p"): if multidimensional: p = nn.softmax(logits) else: p = math_ops.sigmoid(logits) elif logits is None: with ops.name_scope("p"): p = array_ops.identity(p) if validate_args: one = constant_op.constant(1., p.dtype) dependencies = [check_ops.assert_non_negative(p)] if multidimensional: dependencies += [ assert_close(math_ops.reduce_sum( p, reduction_indices=[-1]), one, message="p does not sum to 1.") ] else: dependencies += [ check_ops.assert_less_equal( p, one, message="p has components greater than 1.") ] p = control_flow_ops.with_dependencies(dependencies, p) with ops.name_scope("logits"): if multidimensional: # Here we don't compute the multidimensional case, in a manner # consistent with respect to the unidimensional case. We do so # following the TF convention. Typically, you might expect to see # logits = log(p) - log(gather(p, pivot)). A side-effect of being # consistent with the TF approach is that the unidimensional case # implicitly handles the second dimension but the multidimensional # case explicitly keeps the pivot dimension. logits = math_ops.log(p) else: logits = math_ops.log(p) - math_ops.log(1. - p) return (logits, p)
def _swish(input_tensor, scale): custom = op_hint.OpHint("cool_activation") input_tensor, scale = custom.add_inputs(input_tensor, scale) output = math_ops.sigmoid(input_tensor) * input_tensor * scale output, = custom.add_outputs(output) return output
def _entropy(self): return (-self.logits * (math_ops.sigmoid(self.logits) - 1) + nn.softplus(-self.logits))
def _forward(self, x): return math_ops.sigmoid(x)
inputs = tf.concat(1, [state, word]) with tf.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. W_reset = tf.get_variable(name="reset_weight", shape=[state_size+input_size, state_size], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) W_update = tf.get_variable(name="update_weight", shape=[state_size+input_size, state_size], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b_reset = tf.get_variable(name="reset_bias", shape=[state_size], initializer=tf.constant_initializer(0.0)) b_update = tf.get_variable(name="update_bias", shape=[state_size], initializer=tf.constant_initializer(0.0)) reset = sigmoid(tf.matmul(inputs, W_reset) + b_reset) update = sigmoid(tf.matmul(inputs, W_update) + b_update) with tf.variable_scope("Candidate"): W_candidate = tf.get_variable(name="candidate_weight", shape=[state_size+input_size, state_size], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b_candidate = tf.get_variable(name="candidate_bias", shape=[state_size], \ initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) reset_input = tf.concat(1, [reset * state, word]) candidate = tanh(tf.matmul(reset_input, W_reset) + b_candidate) new_h = update * state + (1 - update) * candidate ### WORKS!!!
def __call__(self, inputs, state, scope=None): """Run one step of LSTM. Args: inputs: input Tensor, 2D, batch x num_units. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, batch x state_size`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. scope: VariableScope for the created subgraph; defaults to "LSTMCell". Returns: A tuple containing: - A `2-D, [batch x output_dim]`, Tensor representing the output of the LSTM after reading `inputs` when previous state was `state`. Here output_dim is: num_proj if num_proj was set, num_units otherwise. - Tensor(s) representing the new state of LSTM after reading `inputs` when the previous state was `state`. Same type and shape(s) as `state`. Raises: ValueError: If input size cannot be inferred from inputs via static shape inference. """ num_proj = self._num_units if self._num_proj is None else self._num_proj if self._state_is_tuple: (c_prev, m_prev) = state else: c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) dtype = inputs.dtype input_size = inputs.get_shape().with_rank(2)[1] if input_size.value is None: raise ValueError( "Could not infer input size from inputs.get_shape()[-1]") with vs.variable_scope(scope or type(self).__name__, initializer=self._initializer): # "LSTMCell" concat_w = _get_concat_variable( "W", [input_size.value + num_proj, 4 * self._num_units], dtype, self._num_unit_shards) b = vs.get_variable("B", shape=[4 * self._num_units], initializer=array_ops.zeros_initializer, dtype=dtype) # i = input_gate, j = new_input, f = forget_gate, o = output_gate cell_inputs = array_ops.concat(1, [inputs, m_prev]) lstm_matrix = nn_ops.bias_add( math_ops.matmul(cell_inputs, concat_w), b) i, j, f, o = array_ops.split(1, 4, lstm_matrix) # Diagonal connections if self._use_peepholes: w_f_diag = vs.get_variable("W_F_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable("W_I_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable("W_O_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * self._activation(j)) if self._cell_clip is not None: # pylint: disable=invalid-unary-operand-type c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) # pylint: enable=invalid-unary-operand-type if self._use_peepholes: m = sigmoid(o + w_o_diag * c) * self._activation(c) else: m = sigmoid(o) * self._activation(c) if self._num_proj is not None: concat_w_proj = _get_concat_variable( "W_P", [self._num_units, self._num_proj], dtype, self._num_proj_shards) m = math_ops.matmul(m, concat_w_proj) if self._proj_clip is not None: # pylint: disable=invalid-unary-operand-type m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) # pylint: enable=invalid-unary-operand-type new_state = (LSTMStateTuple(c, m) if self._state_is_tuple else array_ops.concat(1, [c, m])) return m, new_state