def build_init_cell(self): with tf.variable_scope("init_cell"): # always zero dummy = tf.placeholder(tf.float32, [1, 1], name='dummy') # memory M_init_linear = tf.tanh(Linear(dummy, self.mem_size * self.mem_dim, name='M_init_linear')) M_init = tf.reshape(M_init_linear, [self.mem_size, self.mem_dim]) # read weights read_w_init = tf.Variable(tf.zeros([self.read_head_size, self.mem_size])) read_init = tf.Variable(tf.zeros([self.read_head_size, 1, self.mem_dim])) for idx in xrange(self.read_head_size): # initialize bias distribution with `tf.range(mem_size-2, 0, -1)` read_w_linear_idx = Linear(dummy, self.mem_size, is_range=True, name='read_w_linear_%s' % idx) read_w_init = tf.scatter_update(read_w_init, [idx], tf.nn.softmax(read_w_linear_idx)) read_init_idx = tf.tanh(Linear(dummy, self.mem_dim, name='read_init_%s' % idx)) read_init = tf.scatter_update(read_init, [idx], tf.reshape(read_init_idx, [1, 1, self.mem_dim])) # write weights write_w_init = tf.Variable(tf.zeros([self.write_head_size, self.mem_size])) for idx in xrange(self.write_head_size): write_w_linear_idx = Linear(dummy, self.mem_size, is_range=True, name='write_w_linear_%s' % idx) write_w_init = tf.scatter_update(write_w_init, [idx], tf.nn.softmax(write_w_linear_idx)) # controller state output_init = tf.Variable(tf.zeros([self.controller_layer_size, self.controller_dim])) hidden_init = tf.Variable(tf.zeros([self.controller_layer_size, self.controller_dim])) for idx in xrange(self.controller_layer_size): output_init = tf.scatter_update(output_init, [idx], tf.reshape( tf.tanh(Linear(dummy, self.controller_dim, name='output_init_%s' % idx)), [1, self.controller_dim] ) ) hidden_init = tf.scatter_update(hidden_init, [idx], tf.reshape( tf.tanh(Linear(dummy, self.controller_dim, name='hidden_init_%s' % idx)), [1, self.controller_dim] ) ) new_output= tf.tanh(Linear(dummy, self.output_dim, name='new_output')) inputs = { 'input': dummy, } outputs = { 'new_output': new_output, 'M': M_init, 'read_w': read_w_init, 'write_w': write_w_init, 'read': tf.reshape(read_init, [self.read_head_size, self.mem_dim]), 'output': output_init, 'hidden': hidden_init } return inputs, outputs
def lstm_cell(i, o, state): """ Create a LSTM cell. See e.g.: http://arxiv.org/pdf/1402.1128v1.pdf Note that in this formulation, we omit the various connections between the previous state and the gates. """ i_list = tf.pack([i, i, i, i]) #print i_list.get_shape().as_list() o_list = tf.pack([o, o, o, o]) ins = tf.batch_matmul(i_list, fico_x) outs = tf.batch_matmul(o_list, fico_m) h_x = ins + outs + fico_b #print h_x.get_shape().as_list() #forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) forget_gate = tf.sigmoid(h_x[0,:,:]) #input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) input_gate = tf.sigmoid(h_x[1,:,:]) #update = tf.tanh(tf.matmul(i, cx) + tf.matmul(o, cm) + cb) update = tf.tanh(h_x[2,:,:]) state = forget_gate*state + input_gate*update #output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) output_gate = tf.sigmoid(h_x[3,:,:]) h = output_gate * tf.tanh(state) #print 'h', h.get_shape().as_list() return h, state
def build_node(self, x_in, c_in, h_in, scope="lstm_cell"): #print (x_in, c_in, h_in, scope) #print [type(thing) for thing in (x_in, c_in, h_in, scope)] # print [(item.name, item.dtype) for thing in (h_in, c_in) for item in thing] # print (x_in.name, x_in.dtype) with tf.variable_scope(scope): # print x.shape # print h_in.get_shape() x_with_h = tf.concat(2, [x_in, h_in]) ones_for_bias = tf.constant(np.ones([batch_size,1,1]), name="b", dtype=tf.float32) x_h_concat = tf.concat(2, [ones_for_bias, x_with_h]) # forget gate layer # print "w_f: ", self.w_f.get_shape() # print "x_h_concat: ", x_h_concat.get_shape() f = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_f)) # candidate values i = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_i)) candidate_c = tf.tanh(tf.batch_matmul(x_h_concat, self.w_c)) # new cell state (hidden) # forget old values of c old_c_to_keep = tf.mul(f, c_in) # scaled candidate values of c new_c_to_keep = tf.mul(i, candidate_c) c = tf.add(old_c_to_keep, new_c_to_keep) # new scaled output o = tf.sigmoid(tf.batch_matmul(x_h_concat, self.w_o)) h = tf.mul(o, tf.tanh(c)) return (c, h)
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # Parameters of gates are concatenated into one multiply for # efficiency. if self._state_is_tuple: c_prev, h_prev = state else: c_prev, h_prev = tf.split( value=state, num_or_size_splits=2, axis=1) concat = tf.contrib.rnn._linear( [inputs, h_prev], 4 * self._num_units, True) # i = input_gate, g = new_input, f = forget_gate, o = output_gate i, g, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) c = (c_prev * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(g)) h = tf.tanh(c) * tf.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(c, h) else: new_state = tf.concat([c, h], 1) return h, new_state
def build_generator(self): image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image]) question = tf.placeholder(tf.int32, [self.batch_size, self.max_words_q]) state = tf.zeros([self.batch_size, self.stacked_lstm.state_size]) loss = 0.0 for i in range(max_words_q): if i==0: ques_emb_linear = tf.zeros([self.batch_size, self.input_embedding_size]) else: tf.get_variable_scope().reuse_variables() ques_emb_linear = tf.nn.embedding_lookup(self.embed_ques_W, question[:,i-1]) ques_emb_drop = tf.nn.dropout(ques_emb_linear, 1-self.drop_out_rate) ques_emb = tf.tanh(ques_emb_drop) output, state = self.stacked_lstm(ques_emb, state) # multimodal (fusing question & image) state_drop = tf.nn.dropout(state, 1-self.drop_out_rate) state_linear = tf.nn.xw_plus_b(state_drop, self.embed_state_W, self.embed_state_b) state_emb = tf.tanh(state_linear) image_drop = tf.nn.dropout(image, 1-self.drop_out_rate) image_linear = tf.nn.xw_plus_b(image_drop, self.embed_image_W, self.embed_image_b) image_emb = tf.tanh(image_linear) scores = tf.mul(state_emb, image_emb) scores_drop = tf.nn.dropout(scores, 1-self.drop_out_rate) scores_emb = tf.nn.xw_plus_b(scores_drop, self.embed_scor_W, self.embed_scor_b) # FINAL ANSWER generated_ANS = tf.nn.xw_plus_b(scores_drop, self.embed_scor_W, self.embed_scor_b) return generated_ANS, image, question
def __call__(self, x_placeholder, h_prev, C_prev): with tf.variable_scope(self.scope, reuse=True): embedding = tf.get_variable('embedding') W = tf.get_variable('weight') x_embedding = tf.nn.embedding_lookup(embedding, x_placeholder) if self.is_training: x_embedding = tf.nn.dropout(x_embedding, self.keep_prob) # forget gate concat_input = tf.concat(1, [h_prev, x_embedding]) gates = tf.matmul(concat_input, W) m_f, m_i, m_C_update, m_o = tf.split(1, 4, gates) # forget gate f = tf.sigmoid(m_f) # input gate i = tf.sigmoid(m_i) # output gate o = tf.sigmoid(m_o) # Cell update C_update = tf.tanh(m_C_update) # cell after update # Add a dropout layer. C = tf.mul(f, C_prev) + tf.mul(i, C_update) # output h = tf.mul(o, tf.tanh(C)) return h, C
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = tf.split(state, 2, 1) x_size = x.get_shape().as_list()[1] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) # Keep W_xh and W_hh separate here as well to use different init methods. w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) bias = tf.get_variable( 'bias', [4 * self.num_units], initializer=tf.constant_initializer(0.0)) concat = tf.concat([x, h], 1) w_full = tf.concat([w_xh, w_hh], 0) hidden = tf.matmul(concat, w_full) + bias i, j, f, o = tf.split(hidden, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat([new_c, new_h], 1) # fuk tuples.
def lstm_step(tensors, state): # TODO group linear operations for more efficiency with tf.variable_scope("lstm"): x, = tensors h = state["h"] c = state["c"] assert is_tensor(x) assert is_tensor(h) assert is_tensor(c) num_units = get_shape_values(h)[-1] assert get_shape_values(c)[-1] == num_units forget_logit = add_bias("forget_bias", linear("forget_x", x, num_units) + linear("forget_h", h, num_units)) input_logit = add_bias("input_bias", linear("input_x", x, num_units) + linear("input_h", h, num_units)) output_logit = add_bias("output_bias", linear("output_x", x, num_units) + linear("output_h", h, num_units)) update_logit = add_bias("update_bias", linear("update_x", x, num_units) + linear("update_h", h, num_units)) f = tf.nn.sigmoid(forget_logit) i = tf.nn.sigmoid(input_logit) o = tf.nn.sigmoid(output_logit) u = tf.tanh(update_logit) new_c = f * c + i * u new_h = tf.tanh(new_c) * o return {"h": new_h, "c": new_c}
def __call__(self, inputs, state, timestep = 0, scope=None): """Most basic RNN: output = new_state = tanh(W * input + U * state + B).""" current_state = state for highway_layer in xrange(self.num_highway_layers): with tf.variable_scope('highway_factor_'+str(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: highway_factor = tf.tanh(multiplicative_integration([inputs, current_state], self._num_units)) else: highway_factor = tf.tanh(linear([current_state], self._num_units, True)) with tf.variable_scope('gate_for_highway_factor_'+str(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: gate_for_highway_factor = tf.sigmoid(multiplicative_integration([inputs, current_state], self._num_units, initial_bias_value = -3.0)) else: gate_for_highway_factor = tf.sigmoid(linear([current_state], self._num_units, True, -3.0)) gate_for_hidden_factor = 1 - gate_for_highway_factor if self.use_recurrent_dropout and self.is_training: highway_factor = tf.nn.dropout(highway_factor, self.recurrent_dropout_factor) current_state = highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor return current_state, current_state
def lnlstm(xs, ms, s, scope, nh, init_scale=1.0): nbatch, nin = [v.value for v in xs[0].get_shape()] with tf.variable_scope(scope): wx = tf.get_variable("wx", [nin, nh*4], initializer=ortho_init(init_scale)) gx = tf.get_variable("gx", [nh*4], initializer=tf.constant_initializer(1.0)) bx = tf.get_variable("bx", [nh*4], initializer=tf.constant_initializer(0.0)) wh = tf.get_variable("wh", [nh, nh*4], initializer=ortho_init(init_scale)) gh = tf.get_variable("gh", [nh*4], initializer=tf.constant_initializer(1.0)) bh = tf.get_variable("bh", [nh*4], initializer=tf.constant_initializer(0.0)) b = tf.get_variable("b", [nh*4], initializer=tf.constant_initializer(0.0)) gc = tf.get_variable("gc", [nh], initializer=tf.constant_initializer(1.0)) bc = tf.get_variable("bc", [nh], initializer=tf.constant_initializer(0.0)) c, h = tf.split(axis=1, num_or_size_splits=2, value=s) for idx, (x, m) in enumerate(zip(xs, ms)): c = c*(1-m) h = h*(1-m) z = _ln(tf.matmul(x, wx), gx, bx) + _ln(tf.matmul(h, wh), gh, bh) + b i, f, o, u = tf.split(axis=1, num_or_size_splits=4, value=z) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) u = tf.tanh(u) c = f*c + i*u h = o*tf.tanh(_ln(c, gc, bc)) xs[idx] = h s = tf.concat(axis=1, values=[c, h]) return xs, s
def __call__(self, inputs, state, scope=None): with tf.device("/gpu:"+str(self._gpu_for_layer)): """JZS1, mutant 1 with n units cells.""" with tf.variable_scope(scope or type(self).__name__): # "JZS1Cell" with tf.variable_scope("Zinput"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs''' z = tf.sigmoid(linear([inputs], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) with tf.variable_scope("Rinput"): '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state''' r = tf.sigmoid(linear([inputs,state], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 3''' with tf.variable_scope("Candidate"): component_0 = linear([r*state], self._num_units, True) component_1 = tf.tanh(tf.tanh(inputs) + component_0) component_2 = component_1*z component_3 = state*(1 - z) h_t = component_2 + component_3 return h_t, h_t #there is only one hidden state output to keep track of.
def __call__(self, inputs, state, scope=None): with tf.device("/gpu:"+str(self._gpu_for_layer)): """JZS3, mutant 2 with n units cells.""" with tf.variable_scope(scope or type(self).__name__): # "JZS1Cell" with tf.variable_scope("Zinput"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. '''equation 1''' z = tf.sigmoid(linear([inputs, tf.tanh(state)], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 2''' with tf.variable_scope("Rinput"): r = tf.sigmoid(linear([inputs, state], self._num_units, True, 1.0, weight_initializer = self._weight_initializer, orthogonal_scale_factor = self._orthogonal_scale_factor)) '''equation 3''' with tf.variable_scope("Candidate"): component_0 = linear([state*r,inputs], self._num_units, True) component_2 = (tf.tanh(component_0))*z component_3 = state*(1 - z) h_t = component_2 + component_3 return h_t, h_t #there is only one hidden state output to keep track of.
def LSTMCell(cls, x, mprev, cprev, weights): xm = tf.concat(1, [x, mprev]) i_i, i_g, f_g, o_g = tf.split(1, 4, tf.matmul(xm, weights)) new_c = tf.sigmoid(f_g) * cprev + tf.sigmoid(i_g) * tf.tanh(i_i) new_c = tf.clip_by_value(new_c, -50.0, 50.0) new_m = tf.sigmoid(o_g) * tf.tanh(new_c) return new_m, new_c
def buildNeuralNet(inputNodes, hiddenLayers, outputNodes): layers = [] weights = [] x = tf.placeholder(tf.float32, shape=[None, 3]) # Input Layer weightsInput = tf.Variable( tf.random_normal([3, inputNodes], name="InputWeights")) layerInput = tf.tanh(tf.matmul(x, weightsInput)) weights.append(weightsInput) layers.append(layerInput) # Hidden Layer for layer in range(1, hiddenLayers + 1): name = "HiddenWeights" + str(layer) weightsHidden = tf.Variable(tf.random_normal([inputNodes, inputNodes], name=name)) layerHidden = tf.tanh(tf.matmul(layers[-1], weightsHidden)) weights.append(weightsHidden) layers.append(layerHidden) # Output Layer weightsOutput = tf.Variable( tf.random_normal([inputNodes, outputNodes], name="OutputWeights")) y = tf.sigmoid(tf.matmul(layers[-1], weightsOutput)) weights.append(weightsOutput) layers.append(y) return x, layers, weights
def __call__(self, x, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): h, c = tf.split(state, 2, 1) h_size = self.num_units x_size = x.get_shape().as_list()[1] batch_size = x.get_shape().as_list()[0] w_init = None # uniform h_init = lstm_ortho_initializer(1.0) w_xh = tf.get_variable( 'W_xh', [x_size, 4 * self.num_units], initializer=w_init) w_hh = tf.get_variable( 'W_hh', [self.num_units, 4 * self.num_units], initializer=h_init) concat = tf.concat([x, h], 1) # concat for speed. w_full = tf.concat([w_xh, w_hh], 0) concat = tf.matmul(concat, w_full) #+ bias # live life without garbage. # i = input_gate, j = new_input, f = forget_gate, o = output_gate concat = layer_norm_all(concat, batch_size, 4, h_size, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g new_h = tf.tanh(layer_norm(new_c, h_size, 'ln_c')) * tf.sigmoid(o) return new_h, tf.concat([new_h, new_c], 1)
def build_graph(input_size, minibatch_size): flat_size = conv1_features * input_size//2 * input_size//2 inputs = tf.placeholder(tf.float32, shape=[minibatch_size, input_size, input_size, input_channels], name='inputs') labels = tf.placeholder(tf.float32, shape=[minibatch_size], name='labels') with tf.name_scope('conv1') as scope: conv1_init = tf.truncated_normal([conv1_size, conv1_size, input_channels, conv1_features], stddev=random_init_stddev, dtype=tf.float32) conv1_weights = tf.Variable(conv1_init, name='weights') conv1_bias = tf.Variable(tf.zeros([conv1_features], dtype=tf.float32), name='bias') conv1_y = tf.nn.conv2d(inputs, conv1_weights, [1, conv1_stride, conv1_stride, 1], padding='SAME', name='y') conv1_biased = tf.nn.bias_add(conv1_y, conv1_bias) conv1_activity = tf.tanh(conv1_biased , name='activity') with tf.name_scope('output') as scope: output_init = tf.truncated_normal([flat_size, output_features], stddev=random_init_stddev, dtype=tf.float32) output_weights = tf.Variable(output_init, name='weights') output_bias = tf.Variable(tf.zeros([output_features], dtype=tf.float32), name='bias') conv1_flat = tf.reshape(conv1_activity, [minibatch_size, flat_size]) output_y = tf.matmul(conv1_flat, output_weights, name='y') output_raw = tf.nn.bias_add(output_y, output_bias) output_tanh = tf.tanh(output_raw) output = tf.reshape(output_tanh, [minibatch_size]) with tf.name_scope('loss') as scope: minibatch_loss = tf.squared_difference(labels, output) loss = tf.reduce_mean(minibatch_loss) tf.scalar_summary('loss', loss) return inputs, labels, output, loss
def lstm_cell(x, h, c, name=None, reuse=False): """LSTM returning hidden state and content cell at a specific timestep.""" nin = x.shape[-1].value nout = h.shape[-1].value with tf.variable_scope(name, default_name="lstm", values=[x, h, c], reuse=reuse): wx = tf.get_variable("kernel/input", [nin, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) wh = tf.get_variable("kernel/hidden", [nout, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) b = tf.get_variable("bias", [nout * 4], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) z = tf.matmul(x, wx) + tf.matmul(h, wh) + b i, f, o, u = tf.split(z, 4, axis=1) i = tf.sigmoid(i) f = tf.sigmoid(f + 1.0) o = tf.sigmoid(o) u = tf.tanh(u) c = f * c + i * u h = o * tf.tanh(c) return h, c
def __call__(self, inputs, state, timestep = 0, scope=None): """Long short-term memory cell (LSTM). The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10 """ with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. h, c = tf.split(1, 2, state) '''note that bias is set to 0 because batch norm bias is added later''' with tf.variable_scope('inputs_weight_matrix'): inputs_concat = linear([inputs], 4 * self._num_units, False) inputs_concat = layer_norm(inputs_concat, num_variables_in_tensor = 4, scope = "inputs_concat_layer_norm") with tf.variable_scope('state_weight_matrix'): h_concat = linear([h], 4 * self._num_units, False) h_concat = layer_norm(h_concat,num_variables_in_tensor = 4, scope = "h_concat_layer_norm") i, j, f, o = tf.split(1, 4, multiplicative_integration([inputs_concat,h_concat], 4*self._num_units, 0.0, weights_already_calculated = True)) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) '''apply layer norm to the hidden state transition''' with tf.variable_scope('layer_norm_hidden_state'): new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o) return new_h, tf.concat(1, [new_h, new_c]) #reversed this
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. c, h = tf.split(1, 2, state) concat = linear.linear([inputs, h], 4 * self._num_units, True) fs = [] # This can be made more efficient since we're doing more than needs to be # done, but for now w/e for child_state in child_states: c_k, h_k = tf.split(1, 2, child_state) concat = linear.linear([inputs, h_k], 4 * self._num_units, True) i_k, j_k, f_k, o_k = tf.split(1, 4, concat) fs.append(f_k) # i = input_gate, j = new_input, f = forget_gate, o = output_gate # TODO: forget gate for each child, probably need to split by number # of child states or something i, j, f, o = tf.split(1, 4, concat) # If no children just treat it like a regular lstm if not fs: fs.append(f) new_c = sum(c * tf.sigmoid(fs + self._forget_bias)) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, tf.concat(1, [new_c, new_h])
def build(self, inp): """Build LSTM graph. Args: inp: input, state. Returns: results: state. """ self.lazy_init_var() x = inp['input'] state = inp['state'] with tf.variable_scope(self.scope): c = tf.slice(state, [0, 0, 0, 0], [-1, -1, -1, self.hid_depth]) h = tf.slice(state, [0, 0, 0, self.hid_depth], [-1, -1, -1, self.hid_depth]) g_i = tf.sigmoid(Conv2D(self.w_xi)(x) + Conv2D(self.w_hi)(h) + self.b_i) g_f = tf.sigmoid(Conv2D(self.w_xf)(x) + Conv2D(self.w_hf)(h) + self.b_f) g_o = tf.sigmoid(Conv2D(self.w_xo)(x) + Conv2D(self.w_ho)(h) + self.b_o) u = tf.tanh(Conv2D(self.w_xu)(x) + Conv2D(self.w_hu)(h) + self.b_u) c = g_f * c + g_i * u h = g_o * tf.tanh(c) state = tf.concat(3, [c, h]) return state
def decoder_body(time, old_state, output_ta_t, attention_tracker): if feedback: def from_previous(): prev_1 = tf.matmul(old_state, W_out) + b_out return tf.gather(embeddings, tf.argmax(prev_1, 1)) x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0)) else: x_t = input_ta.read(time) # attention part2 = tf.matmul(old_state, W_a) + b_a part2 = tf.expand_dims(part2, 1) john = part1 + part2 e = tf.reduce_sum(v_a * tf.tanh(john), [2]) alpha = tf.nn.softmax(e) alpha = tf.to_float(mask(attention_lengths)) * alpha alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True) attention_tracker = attention_tracker.write(time, alpha) context = tf.reduce_sum(tf.expand_dims(alpha, 2) * tf.squeeze(hidden), [1]) # GRU con = tf.concat(1, [x_t, old_state, context]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r*old_state, context]) c = tf.tanh(tf.matmul(con, W_c) + b_c) new_state = (1-z)*c + z*old_state output_ta_t = output_ta_t.write(time, new_state) return (time + 1, new_state, output_ta_t, attention_tracker)
def lstm_cell(i, o, state): input_gate = tf.sigmoid(tf.matmul(i, ix) + tf.matmul(o, im) + ib) forget_gate = tf.sigmoid(tf.matmul(i, fx) + tf.matmul(o, fm) + fb) update = tf.matmul(i, cx) + tf.matmul(o, cm) + cb state = forget_gate * state + input_gate * tf.tanh(update) output_gate = tf.sigmoid(tf.matmul(i, ox) + tf.matmul(o, om) + ob) return output_gate * tf.tanh(state), state
def model_encoder_decoder(encoder_inputs, world_state_vectors, batch_size): h_encoder,c1,h1 = encoder(encoder_inputs) U_V_precalc = precalc_Ux_Vh(encoder_inputs,h_encoder) ## Decoder loop with tf.name_scope('Decoder') as scope: # Initial states s_t = tf.tanh( tf.matmul(h1,w_trans_s)+b_trans_s , name='s_0') c_t = tf.tanh( tf.matmul(c1,w_trans_c)+b_trans_c , name='c_0') # Definition of the cell computation. logits = [] # logits per rolling predictions = [] for i in xrange(self._decoder_unrollings): # world state vector at step i y_t = world_state_vectors[i] # batch_size x num_local_feats (feat_id format) # embeed world vector | relu nodes ey = tf.nn.relu(tf.matmul(y_t,w_emby) + b_emby, name='Ey') # context vector z_t = context_vector(s_t,h_encoder,U_V_precalc,encoder_inputs,batch_size) # Dropout ey = tf.nn.dropout(ey, keep_prob) s_t,c_t = decoder_cell(ey,s_t,z_t,c_t) s_t = tf.nn.dropout(s_t, keep_prob) # Hidden linear layer before output, proyects z_t,y_t, and s_t to an embeeding-size layer hq = ey + tf.matmul(s_t,ws) + tf.matmul(z_t,wz) + b_q # Output layer logit = tf.matmul(hq,wo) + b_o prediction = tf.nn.softmax(logit,name='prediction') logits.append(logit) predictions.append(prediction) #END-FOR-DECODER-UNROLLING #END-DECODER-SCOPE return logits,predictions
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = state # Keep W_xh and W_hh separate here as well to reuse initialization methods x_size = x.get_shape().as_list()[1] print x.get_shape().as_list() W_xh = tf.get_variable('W_xh', [x_size, 4 * self.num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable('W_hh', [self.num_units, 4 * self.num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self.num_units]) # hidden = tf.matmul(x, W_xh) + tf.matmul(h, W_hh) + bias # improve speed by concat. concat = tf.concat(1, [x, h]) W_both = tf.concat(0, [W_xh, W_hh]) hidden = tf.matmul(concat, W_both) + bias i, j, f, o = tf.split(1, 4, hidden) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) new_h = tf.tanh(new_c) * tf.sigmoid(o) return new_h, (new_c, new_h)
def __call__(self, x, state, scope=None): with tf.variable_scope(scope or type(self).__name__): c, h = state x_size = x.get_shape().as_list()[1] W_xh = tf.get_variable('W_xh', [x_size, 4 * self.num_units], initializer=orthogonal_initializer()) W_hh = tf.get_variable('W_hh', [self.num_units, 4 * self.num_units], initializer=bn_lstm_identity_initializer(0.95)) bias = tf.get_variable('bias', [4 * self.num_units]) xh = tf.matmul(x, W_xh) hh = tf.matmul(h, W_hh) bn_xh = batch_norm(xh, 'xh', self.training) bn_hh = batch_norm(hh, 'hh', self.training) hidden = bn_xh + bn_hh + bias i, j, f, o = tf.split(1, 4, hidden) new_c = c * tf.sigmoid(f) + tf.sigmoid(i) * tf.tanh(j) bn_new_c = batch_norm(new_c, 'c', self.training) new_h = tf.tanh(bn_new_c) * tf.sigmoid(o) return new_h, (new_c, new_h)
def sample_inference(state,model_input,model_sample_params): p = model_sample_params lstm_input = tf.tanh(parallel_batch_mvx(p['linear_input'],model_input)) state,lstm_output = lstm_rollout(state,lstm_input,p['lstm_layer']) mean = parallel_batch_mvx(p['linear_output_mean'],tf.tanh(lstm_output)) * 0.05 return state,mean
def construct_composition(self, phrase_max_size, composition_function, dim, batch_size, embedding_train): holder = {} composed = {} one_word_holder = tf.placeholder(tf.int32, [batch_size, 1], '1_word_holder') holder[1] = one_word_holder one_word_holder = tf.reshape(one_word_holder, [batch_size]) if embedding_train and composition_function != 'Add': one_word_embed = tf.tanh(tf.nn.embedding_lookup(self._embed, one_word_holder)) else: one_word_embed = tf.nn.embedding_lookup(self._embed, one_word_holder) composed[1] = one_word_embed if composition_function == 'RNN': rnn_cell = tf.nn.rnn_cell.BasicRNNCell(dim) #in basicRNN, output equals state initial_state = rnn_cell.zero_state(batch_size, tf.float32) elif composition_function == 'GRU': rnn_cell = tf.nn.rnn_cell.GRUCell(dim) initial_state = rnn_cell.zero_state(batch_size, tf.float32) elif composition_function == 'LSTM': rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(dim, forget_bias=0.0) initial_state = rnn_cell.zero_state(batch_size, tf.float32) elif composition_function == 'GAC': word_iw = tf.Variable(initial_value=tf.random_uniform([dim, dim], -0.5 / dim, 0.5 / dim), name='word_iw') state_iw = tf.Variable(initial_value=tf.random_uniform([dim, dim], -0.5 / dim, 0.5 / dim), name='state_iw') bias_iw = tf.Variable(initial_value=tf.zeros([dim], tf.float32), name='bias_iw') word_if = tf.Variable(initial_value=tf.random_uniform([dim, dim], -0.5 / dim, 0.5 / dim), name='word_if') state_if = tf.Variable(initial_value=tf.random_uniform([dim, dim], -0.5 / dim, 0.5 / dim), name='state_if') bias_if = tf.Variable(initial_value=tf.zeros([dim], tf.float32), name='bias_if') initial_state = tf.zeros([batch_size, dim], tf.float32) elif composition_function == 'CNN': weight_conv = tf.Variable(initial_value=tf.random_uniform([3, dim, 1, dim], -0.5 / dim, 0.5 / dim), name='weight_conv') bias_conv = tf.Variable(initial_value=tf.zeros([dim], tf.float32), name='bias_conv') for i in xrange(2, phrase_max_size+1): phrase_holder = tf.placeholder(tf.int32, [batch_size, i], '%s_word_holder'%i) holder[i] = phrase_holder embed = tf.nn.embedding_lookup(self._embed, phrase_holder) if composition_function in ['RNN', 'GRU', 'LSTM']: state = initial_state with tf.variable_scope('RNN') as scope: tf.get_variable_scope().set_initializer(tf.random_uniform_initializer(minval=-0.5 / dim, maxval=0.5 / dim)) #initialize weight matrix of RNN by this initializer for step in xrange(i): if step > 0 or i > 2: tf.get_variable_scope().reuse_variables()#to reuse variable in RNN output, state = rnn_cell(embed[:, step, :], state) elif composition_function == 'GAC': state = initial_state for step in xrange(i): input_embed = embed[:, step, :] input_gate = tf.sigmoid(tf.matmul(input_embed, word_iw) + tf.matmul(state, state_iw) + bias_iw) forget_gate = tf.sigmoid(tf.matmul(input_embed, word_if) + tf.matmul(state, state_if) + bias_if) state = tf.tanh(tf.mul(input_gate, input_embed) + tf.mul(forget_gate, state)) output = state elif composition_function == 'CNN': embed = tf.pad(tf.reshape(embed, [batch_size, i, dim, 1]), [[0, 0], [1, 1], [0, 0], [0, 0]], mode='CONSTANT') #padding by zero vector conv = tf.tanh(tf.nn.conv2d(embed, weight_conv, [1, 1, dim, 1], 'SAME') + bias_conv) max_pooled = tf.nn.max_pool(conv, [1, i+2, 1, 1], [1, i+2, 1, 1], 'SAME') output = tf.reshape(max_pooled, [batch_size, dim]) elif composition_function == 'Add': output = tf.reduce_mean(embed, 1) composed[i] = output return holder, composed
def bottleneck(self, x): with tf.variable_scope("bottleneck"): hparams = self.hparams x = tf.layers.dense(x, hparams.bottleneck_bits, name="bottleneck") if hparams.mode == tf.estimator.ModeKeys.TRAIN: noise = 2.0 * tf.random_uniform(common_layers.shape_list(x)) - 1.0 return tf.tanh(x) + noise * hparams.bottleneck_noise, 0.0 return tf.tanh(x), 0.0
def lstm_cell(i, o, state): values = tf.split(1, gate_count, tf.matmul(i, input_weights) + tf.matmul(o, output_weights) + bias) input_gate = tf.sigmoid(values[0]) forget_gate = tf.sigmoid(values[1]) update = values[2] state = forget_gate * state + input_gate * tf.tanh(update) output_gate = tf.sigmoid(values[3]) return output_gate * tf.tanh(state), state
def lstm_cell(i, o, state): mat = tf.matmul(i, nx) + tf.matmul(o, nm) + nb mat_input, mat_forget, update, mat_output = tf.split(mat, 4, 1) input_gate = tf.sigmoid(mat_input) forget_gate = tf.sigmoid(mat_forget) output_gate = tf.sigmoid(mat_output) state = forget_gate * state + input_gate * tf.tanh(update) return output_gate * tf.tanh(state), state
def pix2pix_generator(net, num_outputs, blocks=None, upsample_method='nn_upsample_conv', is_training=False): # pylint: disable=unused-argument """Defines the network architecture. Args: net: A `Tensor` of size [batch, height, width, channels]. Note that the generator currently requires square inputs (e.g. height=width). num_outputs: The number of (per-pixel) outputs. blocks: A list of generator blocks or `None` to use the default generator definition. upsample_method: The method of upsampling images, one of 'nn_upsample_conv' or 'conv2d_transpose' is_training: Whether or not we're in training or testing mode. Returns: A `Tensor` representing the model output and a dictionary of model end points. Raises: ValueError: if the input heights do not match their widths. """ end_points = {} blocks = blocks or _default_generator_blocks() input_size = net.get_shape().as_list() height, width = input_size[1], input_size[2] if height != width: raise ValueError('The input height must match the input width.') input_size[3] = num_outputs upsample_fn = functools.partial(upsample, method=upsample_method) encoder_activations = [] ########### # Encoder # ########### with tf.variable_scope('encoder'): with tf.contrib.framework.arg_scope([layers.conv2d], kernel_size=[4, 4], stride=2, activation_fn=tf.nn.leaky_relu): for block_id, block in enumerate(blocks): # No normalizer for the first encoder layers as per 'Image-to-Image', # Section 5.1.1 if block_id == 0: # First layer doesn't use normalizer_fn net = layers.conv2d(net, block.num_filters, normalizer_fn=None) elif block_id < len(blocks) - 1: net = layers.conv2d(net, block.num_filters) else: # Last layer doesn't use activation_fn nor normalizer_fn net = layers.conv2d(net, block.num_filters, activation_fn=None, normalizer_fn=None) encoder_activations.append(net) end_points['encoder%d' % block_id] = net ########### # Decoder # ########### reversed_blocks = list(blocks) reversed_blocks.reverse() with tf.variable_scope('decoder'): # Dropout is used at both train and test time as per 'Image-to-Image', # Section 2.1 (last paragraph). with tf.contrib.framework.arg_scope([layers.dropout], is_training=is_training): for block_id, block in enumerate(reversed_blocks): if block_id > 0: net = tf.concat([net, encoder_activations[-block_id - 1]], axis=3) # The Relu comes BEFORE the upsample op: net = tf.nn.relu(net) net = upsample_fn(net, block.num_filters, [2, 2]) if block.decoder_keep_prob > 0: net = layers.dropout(net, keep_prob=block.decoder_keep_prob) end_points['decoder%d' % block_id] = net with tf.variable_scope('output'): logits = layers.conv2d(net, num_outputs, [4, 4], activation_fn=None) # print(logits) # logits = tf.reshape(logits, input_size) end_points['logits'] = logits end_points['predictions'] = tf.tanh(logits) return logits, end_points
def user_attention(self): inputs = tf.reshape(self.x, [-1, self.max_sen_len, self.embedding_dim]) sen_len = tf.reshape(self.sen_len, [-1]) with tf.name_scope('u_word_encode'): outputs1, state = tf.nn.dynamic_rnn( cell=tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0), inputs=inputs, sequence_length=sen_len, dtype=tf.float32, scope='u_word' ) inputs2 = tf.reshape(outputs1, [-1, self.max_sen_len, self.hidden_size]) rnn_inputs = tf.nn.dropout(inputs2, keep_prob=.5) with tf.name_scope('u_word_encode2'): outputs2, state = tf.nn.dynamic_rnn( cell=tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0), inputs=rnn_inputs, sequence_length=sen_len, dtype=tf.float32, scope='u_word2' ) inputs3 = tf.add(tf.reshape(outputs2, [-1, self.max_sen_len, self.hidden_size]), inputs2) with tf.name_scope('u_word_encode3'): outputs, state = tf.nn.dynamic_rnn( cell=tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0), inputs=rnn_inputs, sequence_length=sen_len, dtype=tf.float32, scope='u_word3' ) batch_size = tf.shape(outputs)[0] with tf.name_scope('u_word_attention'): output = tf.reshape(outputs, [-1, self.hidden_size]) u = tf.matmul(output, self.weights['u_wh_1']) + self.biases['u_wh_1'] u = tf.reshape(u, [-1, self.max_doc_len*self.max_sen_len, self.hidden_size]) u += tf.matmul(self.user, self.weights['wu_1'])[:,None,:] u = tf.tanh(u) u = tf.reshape(u, [-1, self.hidden_size]) alpha = tf.reshape(tf.matmul(u, self.weights['u_v_1']), [batch_size, 1, self.max_sen_len]) alpha = self.softmax(alpha, self.sen_len, self.max_sen_len) outputs = tf.matmul(alpha, outputs) outputs = tf.reshape(outputs, [-1, self.max_doc_len, self.hidden_size]) # sentences deep supervise learning. with tf.name_scope('u_sen_softmax'): self.u_sens = tf.reshape(outputs, [-1, self.max_doc_len * self.hidden_size]); self.u_sens_scores = tf.matmul(self.u_sens, self.weights['u_sen_softmax']) + self.biases['u_sen_softmax'] # self.u_sens_predictions = tf.argmax(self.u_sens_scores, 1, name="u_sen_predictions") with tf.name_scope('u_sen_loss'): sen_losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.u_sens_scores, labels=self.input_y) self.u_sen_losses = tf.reduce_mean(sen_losses) with tf.name_scope('u_sentence_encode'): outputs, state = tf.nn.dynamic_rnn( cell=tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0), inputs=outputs, sequence_length=self.doc_len, dtype=tf.float32, scope='u_sentence' ) batch_size = tf.shape(outputs)[0] with tf.name_scope('u_sentence_attention'): output = tf.reshape(outputs, [-1, self.hidden_size]) u = tf.matmul(output, self.weights['u_wh_2']) + self.biases['u_wh_2'] u = tf.reshape(u, [-1, self.max_doc_len, self.hidden_size]) u += tf.matmul(self.user, self.weights['wu_2'])[:,None,:] u = tf.tanh(u) u = tf.reshape(u, [-1, self.hidden_size]) alpha = tf.reshape(tf.matmul(u, self.weights['u_v_2']), [batch_size, 1, self.max_doc_len]) alpha = self.softmax(alpha, self.doc_len, self.max_doc_len) outputs = tf.matmul(alpha, outputs) with tf.name_scope('u_softmax'): self.u_doc = tf.reshape(outputs, [batch_size, self.hidden_size]) self.u_scores = tf.matmul(self.u_doc, self.weights['u_softmax']) + self.biases['u_softmax'] self.u_predictions = tf.argmax(self.u_scores, 1, name="u_predictions") with tf.name_scope("u_loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.u_scores, labels=self.input_y) self.u_loss = tf.reduce_mean(losses) with tf.name_scope("u_accuracy"): correct_predictions = tf.equal(self.u_predictions, tf.argmax(self.input_y, 1)) self.u_correct_num = tf.reduce_sum(tf.cast(correct_predictions, dtype=tf.int32)) self.u_accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="u_accuracy")
def pixcnn_gated_nonlinearity(a, b): return tf.sigmoid(a) * tf.tanh(b)
def tf_tanh(x): return tf.tanh(x)
def resnet_cyclegan(inputs, output_channles, activation, prefix, args): def lrelu(x, leak=0.2, name="lrelu", alt_relu_impl=False): with tf.variable_scope(name): if alt_relu_impl: f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * x + f2 * abs(x) else: return tf.maximum(x, leak * x) def instance_norm(x): with tf.variable_scope("instance_norm"): epsilon = 1e-5 mean, var = tf.nn.moments(x, [1, 2], keep_dims=True) scale = tf.get_variable( 'scale', [x.get_shape()[-1]], initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02)) offset = tf.get_variable('offset', [x.get_shape()[-1]], initializer=tf.constant_initializer(0.0)) out = scale * tf.div(x - mean, tf.sqrt(var + epsilon)) + offset return out def general_conv2d(inputconv, o_d=64, f_h=7, f_w=7, s_h=1, s_w=1, stddev=0.02, padding="VALID", name="conv2d", do_norm=True, do_relu=True, relufactor=0): with tf.variable_scope(name): conv = tf.contrib.layers.conv2d( inputconv, o_d, f_w, s_w, padding, activation_fn=None, weights_initializer=tf.truncated_normal_initializer( stddev=stddev), biases_initializer=tf.constant_initializer(0.0)) if do_norm: conv = instance_norm(conv) if do_relu: if (relufactor == 0): conv = tf.nn.relu(conv, "relu") else: conv = lrelu(conv, relufactor, "lrelu") return conv def general_deconv2d(inputconv, outshape, o_d=64, f_h=7, f_w=7, s_h=1, s_w=1, stddev=0.02, padding="VALID", name="deconv2d", do_norm=True, do_relu=True, relufactor=0): with tf.variable_scope(name): conv = tf.contrib.layers.conv2d_transpose( inputconv, o_d, [f_h, f_w], [s_h, s_w], padding, activation_fn=None, weights_initializer=tf.truncated_normal_initializer( stddev=stddev), biases_initializer=tf.constant_initializer(0.0)) if do_norm: conv = instance_norm(conv) if do_relu: if (relufactor == 0): conv = tf.nn.relu(conv, "relu") else: conv = lrelu(conv, relufactor, "lrelu") return conv def build_resnet_block(inputres, dim, name="resnet", padding="REFLECT"): with tf.variable_scope(name): out_res = tf.pad(inputres, [[0, 0], [1, 1], [1, 1], [0, 0]], padding) out_res = general_conv2d(out_res, dim, 3, 3, 1, 1, 0.02, "VALID", "c1") out_res = tf.pad(out_res, [[0, 0], [1, 1], [1, 1], [0, 0]], padding) out_res = general_conv2d(out_res, dim, 3, 3, 1, 1, 0.02, "VALID", "c2", do_relu=False) return tf.nn.relu(out_res + inputres) ngf = args.ngf n_downsampling = args.resnet_conv_count n_resblock = args.resnet_res_count with tf.variable_scope("unet_cyclegan_%s" % prefix): f = 7 ks = 3 padding = args.resnet_padding pad_input = tf.pad(inputs, [[0, 0], [ks, ks], [ks, ks], [0, 0]], padding) # first 1x1 conv o_c1 = general_conv2d(pad_input, ngf, f, f, 1, 1, 0.02, name="c1") layers = [o_c1] # down sampling for i in range(n_downsampling): mult = int(2**i) o_c_tmp = general_conv2d(layers[-1], ngf * mult * 2, ks, ks, 2, 2, 0.02, "SAME", "c%d" % (i + 2)) layers.append(o_c_tmp) # res-block res_mul = int(2**(n_downsampling - 1)) for i in range(n_resblock): o_r_tmp = build_resnet_block(layers[-1], ngf * 2 * res_mul, "r%d" % (i + 1), padding) layers.append(o_r_tmp) # up sampling for i in range(n_downsampling): mult = int(2**(n_downsampling - i)) o_c_tmp = general_deconv2d(layers[-1], [ args.batch_size, int(512 / mult), int(512 / mult), int(ngf * mult / 2) ], int(ngf * mult / 2), ks, ks, 2, 2, 0.02, "SAME", "c%d" % (i + 2 + n_downsampling)) layers.append(o_c_tmp) # last 1x1 conv o_c6 = general_conv2d(layers[-1], output_channles, f, f, 1, 1, 0.02, "SAME", "c%d" % (2 + n_downsampling * 2), do_norm=False, do_relu=False) if activation == "tanh": out_gen = tf.tanh(o_c6, "t1") # [-1,1] elif activation == 'sigmoid': out_gen = tf.sigmoid(o_c6) elif activation == 'none': out_gen = o_c6 return out_gen
def build(self, inputs, is_training): """Build the graph for this configuration. Args: inputs: A dict of inputs. For training, should contain 'wav'. is_training: Whether we are training or not. Not used in this config. Returns: A dict of outputs that includes the 'predictions', 'loss', the 'encoding', the 'quantized_input', and whatever metrics we want to track for eval. """ del is_training num_stages = 10 num_layers = 30 filter_length = 3 width = 512 skip_width = 256 ae_num_stages = 10 ae_num_layers = 30 ae_filter_length = 3 ae_width = 128 # Encode the source with 8-bit Mu-Law. x = inputs['wav'] x_quantized = utils.mu_law(x) x_scaled = tf.cast(x_quantized, tf.float32) / 128.0 x_scaled = tf.expand_dims(x_scaled, 2) ### # The Non-Causal Temporal Encoder. ### en = masked.conv1d(x_scaled, causal=False, num_filters=ae_width, filter_length=ae_filter_length, name='ae_startconv') for num_layer in range(ae_num_layers): dilation = 2**(num_layer % ae_num_stages) d = tf.nn.relu(en) d = masked.conv1d(d, causal=False, num_filters=ae_width, filter_length=ae_filter_length, dilation=dilation, name='ae_dilatedconv_%d' % (num_layer + 1)) d = tf.nn.relu(d) en += masked.conv1d(d, num_filters=ae_width, filter_length=1, name='ae_res_%d' % (num_layer + 1)) en = masked.conv1d(en, num_filters=self.ae_bottleneck_width, filter_length=1, name='ae_bottleneck') en = masked.pool1d(en, self.ae_hop_length, name='ae_pool', mode='avg') encoding = en ### # The WaveNet Decoder. ### l = masked.shift_right(x_scaled) l = masked.conv1d(l, num_filters=width, filter_length=filter_length, name='startconv') # Set up skip connections. s = masked.conv1d(l, num_filters=skip_width, filter_length=1, name='skip_start') # Residual blocks with skip connections. for i in range(num_layers): dilation = 2**(i % num_stages) d = masked.conv1d(l, num_filters=2 * width, filter_length=filter_length, dilation=dilation, name='dilatedconv_%d' % (i + 1)) d = self._condition( d, masked.conv1d(en, num_filters=2 * width, filter_length=1, name='cond_map_%d' % (i + 1))) assert d.get_shape().as_list()[2] % 2 == 0 m = d.get_shape().as_list()[2] // 2 d_sigmoid = tf.sigmoid(d[:, :, :m]) d_tanh = tf.tanh(d[:, :, m:]) d = d_sigmoid * d_tanh l += masked.conv1d(d, num_filters=width, filter_length=1, name='res_%d' % (i + 1)) s += masked.conv1d(d, num_filters=skip_width, filter_length=1, name='skip_%d' % (i + 1)) s = tf.nn.relu(s) s = masked.conv1d(s, num_filters=skip_width, filter_length=1, name='out1') s = self._condition( s, masked.conv1d(en, num_filters=skip_width, filter_length=1, name='cond_map_out1')) s = tf.nn.relu(s) ### # Compute the logits and get the loss. ### logits = masked.conv1d(s, num_filters=256, filter_length=1, name='logits') logits = tf.reshape(logits, [-1, 256]) probs = tf.nn.softmax(logits, name='softmax') x_indices = tf.cast(tf.reshape(x_quantized, [-1]), tf.int32) + 128 loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=x_indices, name='nll'), 0, name='loss') return { 'predictions': probs, 'loss': loss, 'eval': { 'nll': loss }, 'quantized_input': x_quantized, 'encoding': encoding, }
def __call__(self, inputs, state, time_step): with tf.variable_scope("GRUBatchNorm", reuse=True): xgamma = tf.get_variable("xgamma") hgamma = tf.get_variable("hgamma") mgamma = tf.get_variable("mgamma") mbeta = tf.get_variable("mbeta") if self._full_bn: hx_gamma = tf.get_variable("hx_gamma") hx_beta = tf.get_variable("hx_beta") hh_gamma = tf.get_variable("hh_gamma") hh_beta = tf.get_variable("hh_beta") with tf.variable_scope("GRUCell", reuse=True): W = tf.get_variable("W") H = tf.get_variable("H") B = tf.get_variable("B") Wh = tf.get_variable("Wh") Hh = tf.get_variable("Hh") # Means and variables for each time_step. with tf.variable_scope("BNGRU-Stats-T%s" % time_step): xmean = tf.get_variable("xmean", initializer=tf.zeros([2 * self._num_units ]), trainable=False) xvar = tf.get_variable("xvar", initializer=tf.ones([2 * self._num_units]), trainable=False) hmean = tf.get_variable("hmean", initializer=tf.zeros([2 * self._num_units ]), trainable=False) hvar = tf.get_variable("hvar", initializer=tf.ones([2 * self._num_units]), trainable=False) mmean = tf.get_variable("mmean", initializer=tf.zeros([self._num_units]), trainable=False) mvar = tf.get_variable("mvar", initializer=tf.ones([self._num_units]), trainable=False) if self._full_bn: hx_mean = tf.get_variable("hx_mean", initializer=tf.zeros( [self._num_units]), trainable=False) hx_var = tf.get_variable("hx_var", initializer=tf.ones([self._num_units ]), trainable=False) hh_mean = tf.get_variable("hh_mean", initializer=tf.zeros( [self._num_units]), trainable=False) hh_var = tf.get_variable("hh_var", initializer=tf.ones([self._num_units ]), trainable=False) # "Full" BN: # [r, u] = sigmoid(BN(x*W) + BN(h*H) + B) # h_tilde = BN(x*Wh) + r o BN(h*Hh) # h_new = u * h_old + (1-u) * tanh(BN(h_tilde)) # # "Simple" BN: # [r, u] = sigmoid(BN(x*W) + BN(h*H) + B) # h_tilde = x*Wh + r o h*Hh # h_new = u * h_old + (1-u) * tanh(BN(h_tilde)) bn_x = _batch_norm(self._is_training, tf.matmul(inputs, W), xmean, xvar, xgamma) bn_h = _batch_norm(self._is_training, tf.matmul(state, H), hmean, hvar, hgamma) concat = bn_x + bn_h + B + self._bias r, u = tf.split(1, 2, concat) sig_r, sig_u = tf.sigmoid(r), tf.sigmoid(u) if self._full_bn: bn_Wh = _batch_norm(self._is_training, tf.matmul(inputs, Wh), hx_mean, hx_var, hx_gamma, hx_beta) bn_Hh = _batch_norm(self._is_training, tf.matmul(state, Hh), hh_mean, hh_var, hh_gamma, hh_beta) h_tilde = bn_Wh + sig_r * bn_Hh else: h_tilde = tf.matmul(inputs, Wh) + sig_r * tf.matmul(state, Hh) bn_h_tilde = _batch_norm(self._is_training, h_tilde, mmean, mvar, mgamma, mbeta) new_h = sig_u * state + (1 - sig_u) * tf.tanh(bn_h_tilde) if not self._is_training and time_step in [0, 1, 2, 3, 4, 5, 49, 99]: variable_summaries(new_h, "new_h/%s" % time_step) variable_summaries(r, "r/%s" % time_step) variable_summaries(u, "u/%s" % time_step) variable_summaries(h_tilde, "h_tilde/%s" % time_step) variable_summaries(xmean, "xmean/%s" % time_step) variable_summaries(xvar, "xvar/%s" % time_step) variable_summaries(hmean, "hmean/%s" % time_step) variable_summaries(hvar, "hvar/%s" % time_step) variable_summaries(mmean, "mmean/%s" % time_step) variable_summaries(mvar, "mvar/%s" % time_step) variable_summaries(xgamma, "xgamma/%s" % time_step) variable_summaries(hgamma, "hgamma/%s" % time_step) variable_summaries(mgamma, "mgamma/%s" % time_step) variable_summaries(mbeta, "mbeta/%s" % time_step) if self._full_bn: variable_summaries(hx_mean, "hx_mean/%s" % time_step) variable_summaries(hx_var, "hx_var/%s" % time_step) variable_summaries(hh_mean, "hh_mean/%s" % time_step) variable_summaries(hh_var, "hh_var/%s" % time_step) variable_summaries(hx_gamma, "hx_gamma/%s" % time_step) variable_summaries(hx_beta, "hx_beta/%s" % time_step) variable_summaries(hh_gamma, "hh_gamma/%s" % time_step) variable_summaries(hh_beta, "hh_beta/%s" % time_step) return new_h, new_h
test_labels = test_labels.reshape([-1, 1]) # input feature X = tf.placeholder(tf.float32, [None, 2]) Y_label = tf.placeholder(tf.float32, [None, 1]) # hidden layer 1 L1_size = 3 W1 = tf.Variable(tf.random_uniform([2, L1_size], -1, 1, seed=0)) B1 = tf.Variable(tf.zeros([L1_size])) Y1 = tf.nn.relu(X @ W1 + B1) # last layer (one perceptron) W_last = tf.Variable(tf.zeros([L1_size, 1])) B_last = tf.Variable(tf.zeros([1])) Y_predict = tf.tanh(Y1 @ W_last + B_last) error = tf.subtract(Y_label, Y_predict) mse = tf.reduce_mean(tf.square(error)) train = tf.train.GradientDescentOptimizer(0.03).minimize(mse) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # train err, target = 1., 0.000001 epoch, max_epochs = 0, 5000 while err > target and epoch < max_epochs:
def build(self, inputs): """Build the graph for this configuration. Args: inputs: A dict of inputs. For training, should contain 'wav'. Returns: A dict of outputs that includes the 'predictions', 'init_ops', the 'push_ops', and the 'quantized_input'. """ num_stages = 10 num_layers = 30 filter_length = 3 width = 512 skip_width = 256 num_z = 16 # Encode the source with 8-bit Mu-Law. x = inputs['wav'] batch_size = self.batch_size x_quantized = utils.mu_law(x) x_scaled = tf.cast(x_quantized, tf.float32) / 128.0 x_scaled = tf.expand_dims(x_scaled, 2) encoding = tf.placeholder(name='encoding', shape=[batch_size, num_z], dtype=tf.float32) en = tf.expand_dims(encoding, 1) init_ops, push_ops = [], [] ### # The WaveNet Decoder. ### l = x_scaled l, inits, pushs = utils.causal_linear(x=l, n_inputs=1, n_outputs=width, name='startconv', rate=1, batch_size=batch_size, filter_length=filter_length) for init in inits: init_ops.append(init) for push in pushs: push_ops.append(push) # Set up skip connections. s = utils.linear(l, width, skip_width, name='skip_start') # Residual blocks with skip connections. for i in range(num_layers): dilation = 2**(i % num_stages) # dilated masked cnn d, inits, pushs = utils.causal_linear(x=l, n_inputs=width, n_outputs=width * 2, name='dilatedconv_%d' % (i + 1), rate=dilation, batch_size=batch_size, filter_length=filter_length) for init in inits: init_ops.append(init) for push in pushs: push_ops.append(push) # local conditioning d += utils.linear(en, num_z, width * 2, name='cond_map_%d' % (i + 1)) # gated cnn assert d.get_shape().as_list()[2] % 2 == 0 m = d.get_shape().as_list()[2] // 2 d = tf.sigmoid(d[:, :, :m]) * tf.tanh(d[:, :, m:]) # residuals l += utils.linear(d, width, width, name='res_%d' % (i + 1)) # skips s += utils.linear(d, width, skip_width, name='skip_%d' % (i + 1)) s = tf.nn.relu(s) s = (utils.linear(s, skip_width, skip_width, name='out1') + utils.linear(en, num_z, skip_width, name='cond_map_out1')) s = tf.nn.relu(s) ### # Compute the logits and get the loss. ### logits = utils.linear(s, skip_width, 256, name='logits') logits = tf.reshape(logits, [-1, 256]) probs = tf.nn.softmax(logits, name='softmax') return { 'init_ops': init_ops, 'push_ops': push_ops, 'predictions': probs, 'encoding': encoding, 'quantized_input': x_quantized, }
def _rhn_fixed(self, x, prev_s, w_prev, w_skip, is_training, x_mask=None, s_mask=None): batch_size = prev_s.get_shape()[0].value start_idx = self.sample_arc[0] * 2 * self.lstm_hidden_size end_idx = start_idx + 2 * self.lstm_hidden_size if is_training: assert x_mask is not None, "x_mask is None" assert s_mask is not None, "s_mask is None" ht = tf.matmul(tf.concat([x * x_mask, prev_s * s_mask], axis=1), w_prev) else: ht = tf.matmul(tf.concat([x, prev_s], axis=1), w_prev) # with tf.variable_scope("rhn_layer_0"): # ht = layer_norm(ht, is_training) h, t = tf.split(ht, 2, axis=1) if self.sample_arc[0] == 0: h = tf.tanh(h) elif self.sample_arc[0] == 1: h = tf.nn.relu(h) elif self.sample_arc[0] == 2: h = tf.identity(h) elif self.sample_arc[0] == 3: h = tf.sigmoid(h) else: raise ValueError("Unknown func_idx {}".format(self.sample_arc[0])) t = tf.sigmoid(t) s = prev_s + t * (h - prev_s) layers = [s] start_idx = 1 used = np.zeros([self.rhn_depth], dtype=np.int32) for rhn_layer_id in range(1, self.rhn_depth): with tf.variable_scope("rhn_layer_{}".format(rhn_layer_id)): prev_idx = self.sample_arc[start_idx] func_idx = self.sample_arc[start_idx + 1] used[prev_idx] = 1 prev_s = layers[prev_idx] if is_training: ht = tf.matmul(prev_s * s_mask, w_skip[rhn_layer_id]) else: ht = tf.matmul(prev_s, w_skip[rhn_layer_id]) # ht = layer_norm(ht, is_training) h, t = tf.split(ht, 2, axis=1) if func_idx == 0: h = tf.tanh(h) elif func_idx == 1: h = tf.nn.relu(h) elif func_idx == 2: h = tf.identity(h) elif func_idx == 3: h = tf.sigmoid(h) else: raise ValueError("Unknown func_idx {}".format(func_idx)) t = tf.sigmoid(t) s = prev_s + t * (h - prev_s) layers.append(s) start_idx += 2 layers = [prev_layer for u, prev_layer in zip(used, layers) if u == 0] layers = tf.add_n(layers) / np.sum(1.0 - used) layers.set_shape([batch_size, self.lstm_hidden_size]) return layers
def _rhn_enas(self, x, prev_s, w_prev, w_skip, is_training, x_mask=None, s_mask=None): batch_size = prev_s.get_shape()[0].value start_idx = self.sample_arc[0] * 2 * self.lstm_hidden_size end_idx = start_idx + 2 * self.lstm_hidden_size if is_training: assert x_mask is not None, "x_mask is None" assert s_mask is not None, "s_mask is None" ht = tf.matmul(tf.concat([x * x_mask, prev_s * s_mask], axis=1), w_prev[start_idx:end_idx, :]) else: ht = tf.matmul(tf.concat([x, prev_s], axis=1), w_prev[start_idx:end_idx, :]) with tf.variable_scope("rhn_layer_0"): ht = batch_norm(ht, is_training) h, t = tf.split(ht, 2, axis=1) func_idx = self.sample_arc[0] h = tf.case( { tf.equal(func_idx, 0): lambda: tf.tanh(h), tf.equal(func_idx, 1): lambda: tf.nn.relu(h), tf.equal(func_idx, 2): lambda: tf.identity(h), tf.equal(func_idx, 3): lambda: tf.sigmoid(h), }, default=lambda: tf.constant(0.0, dtype=tf.float32), exclusive=True) t = tf.sigmoid(t) s = prev_s + t * (h - prev_s) layers = [s] start_idx = 1 used = [] for rhn_layer_id in range(1, self.rhn_depth): with tf.variable_scope("rhn_layer_{}".format(rhn_layer_id)): prev_idx = self.sample_arc[start_idx] func_idx = self.sample_arc[start_idx + 1] curr_used = tf.one_hot(prev_idx, depth=self.rhn_depth, dtype=tf.int32) used.append(curr_used) w_start = (prev_idx * self.num_funcs + func_idx) * self.lstm_hidden_size w_end = w_start + self.lstm_hidden_size w = w_skip[rhn_layer_id][w_start:w_end, :] prev_s = tf.concat(layers, axis=0) prev_s = prev_s[prev_idx * batch_size:(prev_idx + 1) * batch_size, :] if is_training: ht = tf.matmul(prev_s * s_mask, w) else: ht = tf.matmul(prev_s, w) ht = batch_norm(ht, is_training) h, t = tf.split(ht, 2, axis=1) h = tf.case( { tf.equal(func_idx, 0): lambda: tf.tanh(h), tf.equal(func_idx, 1): lambda: tf.nn.relu(h), tf.equal(func_idx, 2): lambda: tf.identity(h), tf.equal(func_idx, 3): lambda: tf.sigmoid(h), }, default=lambda: tf.constant(0.0, dtype=tf.float32), exclusive=True) t = tf.sigmoid(t) s = prev_s + t * (h - prev_s) layers.append(s) start_idx += 2 used = tf.add_n(used) used = tf.equal(used, 0) with tf.control_dependencies([tf.Assert(tf.reduce_any(used), [used])]): layers = tf.stack(layers) layers = tf.boolean_mask(layers, used) layers = tf.reduce_mean(layers, axis=0) layers.set_shape([batch_size, self.lstm_hidden_size]) layers = batch_norm(layers, is_training) return layers
def txt_encoder(self, txt_vec, reuse=False): with tf.variable_scope('txt_encoder', reuse=reuse): layer1 = tf.tanh(tf.layers.dense(txt_vec, 1024)) layer2 = tf.tanh(tf.layers.dense(layer1, 1024)) out = tf.tanh(tf.layers.dense(layer2, self.code_dim)) return out
def build(self, name): x = tf.placeholder(dtype=tf.float32, shape=(None, ) + self.state_dim, name="%s_input" % name) with tf.variable_scope(name): if len(self.state_dim) == 1: net = tf.nn.relu( dense_layer(x, 400, use_bias=True, scope="fc1", initializer=self.initializer)) net = tf.nn.relu( dense_layer(net, 300, use_bias=True, scope="fc2", initializer=self.initializer)) # use tanh to normalize output between [-1, 1] net = tf.nn.tanh( dense_layer(net, self.subgoal_dim, initializer=tf.random_uniform_initializer( -3e-3, 3e-3), scope="pi", use_bias=True)) else: # first convolutional layer with stride 4 net = conv2d(x, 3, stride=4, output_size=32, initializer=self.initializer, scope="conv1", use_bias=True) net = tf.nn.relu(net) # second convolutional layer with stride 2 net = conv2d(net, 3, stride=2, output_size=32, initializer=self.initializer, scope="conv2", use_bias=True) net = tf.nn.relu(net) # third convolutional layer with stride 1 net = conv2d(net, 3, stride=1, output_size=32, initializer=self.initializer, scope="conv3", use_bias=True) net = tf.nn.relu(net) # first dense layer net = tf.nn.relu( dense_layer(net, output_dim=200, initializer=self.initializer, scope="fc1", use_bias=True)) # second dense layer with subgoal embedded net = tf.nn.relu( dense_layer(net, output_dim=200, initializer=self.initializer, scope="fc2", use_bias=True)) # Q layer net = tf.tanh( dense_layer(net, output_dim=self.state_dim, initializer=tf.random_uniform_initializer( -4e-4, 4e-4), scope="pi", use_bias=True)) return net, x
def __call__(self, inputs, state, time_step): with tf.variable_scope("BasicLSTMCell", reuse=True): H = tf.get_variable("H") W = tf.get_variable("W") b = tf.get_variable("b") with tf.variable_scope("LSTMBatchNorm", reuse=True): xgamma = tf.get_variable("xgamma") hgamma = tf.get_variable("hgamma") cgamma = tf.get_variable("cgamma") cbeta = tf.get_variable("cbeta") with tf.variable_scope("BNLSTM-Stats-T%s" % time_step): xmean = tf.get_variable("xmean", initializer=tf.zeros([4 * self._num_units ]), trainable=False) xvar = tf.get_variable("xvar", initializer=tf.ones([4 * self._num_units]), trainable=False) hmean = tf.get_variable("hmean", initializer=tf.zeros([4 * self._num_units ]), trainable=False) hvar = tf.get_variable("hvar", initializer=tf.ones([4 * self._num_units]), trainable=False) cmean = tf.get_variable("cmean", initializer=tf.zeros([self._num_units]), trainable=False) cvar = tf.get_variable("cvar", initializer=tf.ones([self._num_units]), trainable=False) c, h = tf.split(1, 2, state) # i, j, f, o = BN(hH) + BN(xW) + b # these have no betas, we let the single bias b take care of this xconcat = _batch_norm(self._is_training, tf.matmul(inputs, W), xmean, xvar, xgamma) hconcat = _batch_norm(self._is_training, tf.matmul(h, H), hmean, hvar, hgamma) concat = xconcat + hconcat + b i, j, f, o = tf.split(1, 4, concat) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) new_c_bn = _batch_norm(self._is_training, new_c, cmean, cvar, cgamma, cbeta) new_h = tf.tanh(new_c_bn) * tf.sigmoid(o) if not self._is_training and time_step in [0, 1, 2, 3, 4, 5, 49, 99]: variable_summaries(new_c, "new_c/%s" % time_step) variable_summaries(new_h, "new_h/%s" % time_step) variable_summaries(i, "i/%s" % time_step) variable_summaries(j, "j/%s" % time_step) variable_summaries(f, "f/%s" % time_step) variable_summaries(o, "o/%s" % time_step) variable_summaries(xmean, "xmean/%s" % time_step) variable_summaries(hmean, "hmean/%s" % time_step) variable_summaries(cmean, "cmean/%s" % time_step) variable_summaries(xvar, "xvar/%s" % time_step) variable_summaries(hvar, "hvar/%s" % time_step) variable_summaries(cvar, "cvar/%s" % time_step) variable_summaries(xgamma, "xgamma/%s" % time_step) variable_summaries(hgamma, "hgamma/%s" % time_step) variable_summaries(cgamma, "cgamma/%s" % time_step) variable_summaries(cbeta, "cbeta/%s" % time_step) return new_h, tf.concat(1, [new_c, new_h])
input_layer=tf.placeholder(tf.float32,[1,input_nodes]) labels=tf.placeholder(tf.int64) learning_rate=tf.placeholder(tf.float32,shape=[]) ''' weights1=tf.Variable(tf.truncated_normal([input_nodes,hidden_nodes],stddev=0.1/np.sqrt(100.0))) baises1=tf.Variable(tf.zeros(shape=(1,hidden_nodes))) weights2=tf.Variable(tf.truncated_normal([hidden_nodes,output_nodes],stddev=0.1/np.sqrt(100.0))) baises2 =tf.Variable(tf.zeros(shape=(1,output_nodes))) ''' weights1=tf.Variable(w1) baises1=tf.Variable(b1) weights2=tf.Variable(w2) baises2 =tf.Variable(b2) a1=tf.tanh(tf.matmul(input_layer,weights1)+baises1) z2=tf.matmul(a1,weights2)+baises2 probabilities=tf.nn.softmax(z2) cost=(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=z2)) optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost) game_progress=[] init=tf.global_variables_initializer() with tf.Session() as session: session.run(init) game_lengths = [] game_lengths_avg = [] x1 = [] arr=[]
def gelu_fast(_x): return 0.5 * _x * (1 + tf.tanh( tf.sqrt(2 / np.pi) * (_x + 0.044715 * tf.pow(_x, 3))))
def __call__(self, input_all, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): input_main, input_hyper = tf.split(input_all, num_or_size_splits=[100 * 9, 8], axis=1) total_c, total_h = state c = total_c[:, 0:self.num_units] h = total_h[:, 0:self.num_units] hyper_state = tf.contrib.rnn.LSTMStateTuple( total_c[:, self.num_units:], total_h[:, self.num_units:]) w_init = None # uniform h_init = lstm_ortho_initializer(1.0) x_size = input_main.get_shape().as_list()[1] embedding_size = self.hyper_embedding_size num_units = self.num_units batch_size = input_hyper.get_shape().as_list()[0] W_xh = tf.get_variable('W_xh', [x_size, 4 * num_units], initializer=w_init) W_hh = tf.get_variable('W_hh', [num_units, 4 * num_units], initializer=h_init) bias = tf.get_variable('bias', [4 * num_units], initializer=tf.constant_initializer(0.0)) # concatenate the input and hidden states for hyperlstm input # hyper_input = tf.concat([x, h], 1) #zanshi bu lianjie h # hyper_input = tf.concat([input_hyper,h],1) hyper_input = input_hyper hyper_output, hyper_new_state = self.hyper_cell( hyper_input, hyper_state) xh = tf.matmul(input_main, W_xh) hh = tf.matmul(h, W_hh) # split Wxh contributions ix, jx, fx, ox = tf.split(xh, 4, 1) ix = hyper_norm(ix, hyper_output, embedding_size, num_units, 'hyper_ix') jx = hyper_norm(jx, hyper_output, embedding_size, num_units, 'hyper_jx') fx = hyper_norm(fx, hyper_output, embedding_size, num_units, 'hyper_fx') ox = hyper_norm(ox, hyper_output, embedding_size, num_units, 'hyper_ox') # split Whh contributions ih, jh, fh, oh = tf.split(hh, 4, 1) ih = hyper_norm(ih, hyper_output, embedding_size, num_units, 'hyper_ih') jh = hyper_norm(jh, hyper_output, embedding_size, num_units, 'hyper_jh') fh = hyper_norm(fh, hyper_output, embedding_size, num_units, 'hyper_fh') oh = hyper_norm(oh, hyper_output, embedding_size, num_units, 'hyper_oh') # split bias ib, jb, fb, ob = tf.split(bias, 4, 0) # bias is to be broadcasted. ib = hyper_bias(ib, hyper_output, embedding_size, num_units, 'hyper_ib') jb = hyper_bias(jb, hyper_output, embedding_size, num_units, 'hyper_jb') fb = hyper_bias(fb, hyper_output, embedding_size, num_units, 'hyper_fb') ob = hyper_bias(ob, hyper_output, embedding_size, num_units, 'hyper_ob') # i = input_gate, j = new_input, f = forget_gate, o = output_gate i = ix + ih + ib j = jx + jh + jb f = fx + fh + fb o = ox + oh + ob if self.use_layer_norm: concat = tf.concat([i, j, f, o], 1) concat = layer_norm_all(concat, batch_size, 4, num_units, 'ln_all') i, j, f, o = tf.split(concat, 4, 1) if self.use_recurrent_dropout: g = tf.nn.dropout(tf.tanh(j), self.dropout_keep_prob) else: g = tf.tanh(j) new_c = c * tf.sigmoid(f + self.forget_bias) + tf.sigmoid(i) * g if self.use_layer_norm: new_h = tf.tanh(layer_norm(new_c, num_units, 'ln_c')) * tf.sigmoid(o) else: new_h = tf.tanh(new_c) * tf.sigmoid(o) hyper_c, hyper_h = hyper_new_state new_total_c = tf.concat([new_c, hyper_c], 1) new_total_h = tf.concat([new_h, hyper_h], 1) return new_h, tf.contrib.rnn.LSTMStateTuple(new_total_c, new_total_h)
def tanh(x): return tf.tanh(x)
def __init__(self, sess, model, batch_size, confidence, targeted, learning_rate, binary_search_steps, max_iterations, abort_early, initial_const, clip_min, clip_max, num_labels, shape): """ Return a tensor that constructs adversarial examples for the given input. Generate uses tf.py_func in order to operate over tensors. :param sess: a TF session. :param model: a cleverhans.model.Model object. :param batch_size: Number of attacks to run simultaneously. :param confidence: Confidence of adversarial examples: higher produces examples with larger l2 distortion, but more strongly classified as adversarial. :param targeted: boolean controlling the behavior of the adversarial examples produced. If set to False, they will be misclassified in any wrong class. If set to True, they will be misclassified in a chosen target class. :param learning_rate: The learning rate for the attack algorithm. Smaller values produce better results but are slower to converge. :param binary_search_steps: The number of times we perform binary search to find the optimal tradeoff- constant between norm of the purturbation and confidence of the classification. :param max_iterations: The maximum number of iterations. Setting this to a larger value will produce lower distortion results. Using only a few iterations requires a larger learning rate, and will produce larger distortion results. :param abort_early: If true, allows early aborts if gradient descent is unable to make progress (i.e., gets stuck in a local minimum). :param initial_const: The initial tradeoff-constant to use to tune the relative importance of size of the pururbation and confidence of classification. If binary_search_steps is large, the initial constant is not important. A smaller value of this constant gives lower distortion results. :param clip_min: (optional float) Minimum input component value. :param clip_max: (optional float) Maximum input component value. :param num_labels: the number of classes in the model's output. :param shape: the shape of the model's input tensor. """ self.sess = sess self.TARGETED = targeted self.LEARNING_RATE = learning_rate self.MAX_ITERATIONS = max_iterations self.BINARY_SEARCH_STEPS = binary_search_steps self.ABORT_EARLY = abort_early self.CONFIDENCE = confidence self.initial_const = initial_const self.batch_size = batch_size self.clip_min = clip_min self.clip_max = clip_max self.model = model self.repeat = binary_search_steps >= 10 self.shape = shape = tuple([batch_size] + list(shape)) # the variable we're going to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np.float32)) # these are variables to be more efficient in sending data to tf self.timg = tf.Variable(np.zeros(shape), dtype=tf.float32, name='timg') self.tlab = tf.Variable(np.zeros((batch_size, num_labels)), dtype=tf.float32, name='tlab') self.const = tf.Variable(np.zeros(batch_size), dtype=tf.float32, name='const') # and here's what we use to assign them self.assign_timg = tf.placeholder(tf.float32, shape, name='assign_timg') self.assign_tlab = tf.placeholder(tf.float32, (batch_size, num_labels), name='assign_tlab') self.assign_const = tf.placeholder(tf.float32, [batch_size], name='assign_const') # the resulting instance, tanh'd to keep bounded from clip_min # to clip_max self.newimg = (tf.tanh(modifier + self.timg) + 1) / 2 self.newimg = self.newimg * (clip_max - clip_min) + clip_min # prediction BEFORE-SOFTMAX of the model self.output = model.get_logits(self.newimg) # distance to the input data self.other = (tf.tanh(self.timg) + 1) / \ 2 * (clip_max - clip_min) + clip_min self.l2dist = tf.reduce_sum(tf.square(self.newimg - self.other), list(range(1, len(shape)))) # compute the probability of the label class versus the maximum other real = tf.reduce_sum((self.tlab) * self.output, 1) other = tf.reduce_max( (1 - self.tlab) * self.output - self.tlab * 10000, 1) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE) # sum up the losses self.loss2 = tf.reduce_sum(self.l2dist) self.loss1 = tf.reduce_sum(self.const * loss1) self.loss = self.loss1 + self.loss2 # Setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) self.train = optimizer.minimize(self.loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] # these are the variables to initialize when we run self.setup = [] self.setup.append(self.timg.assign(self.assign_timg)) self.setup.append(self.tlab.assign(self.assign_tlab)) self.setup.append(self.const.assign(self.assign_const)) self.init = tf.variables_initializer(var_list=[modifier] + new_vars)
def create_generator(generator_inputs, generator_outputs_channels): layers = [] # ----------------------------------2018.1.10 changed by zyt-------------------------------- # encoder_1: [batch, 512, 512, in_channels] => [batch, 256, 256, ngf] with tf.variable_scope("encoder_1"): output = gen_conv(generator_inputs, a.ngf) layers.append(output) layer_specs = [ a.ngf * 2, # encoder_2: [batch, 256, 256, ngf] => [batch, 128, 128, ngf * 2] a.ngf * 4, # encoder_3: [batch, 128, 128, ngf*2] => [batch, 64, 64, ngf * 4] a.ngf * 8, # encoder_4: [batch, 64, 64, ngf * 4] => [batch, 32, 32, ngf * 8] a.ngf * 8, # encoder_5: [batch, 32, 32, ngf * 8] => [batch, 16, 16, ngf * 8] a.ngf * 8, # encoder_6: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] a.ngf * 8, # encoder_7: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] a.ngf * 8, # encoder_8: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] a.ngf * 8, # encoder_9: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8] ] for out_channels in layer_specs: with tf.variable_scope("encoder_%d" % (len(layers) + 1)): rectified = lrelu(layers[-1], 0.2) # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] convolved = gen_conv(rectified, out_channels) output = batchnorm(convolved) layers.append(output) layer_specs = [ (a.ngf * 8, 0.5 ), # decoder_9: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2] ( a.ngf * 8, 0.5 ), # decoder_8: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2] ( a.ngf * 8, 0.5 ), # decoder_7: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2] ( a.ngf * 8, 0.0 ), # decoder_6: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2] ( a.ngf * 8, 0.0 ), # decoder_5: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 8 * 2] ( a.ngf * 4, 0.0 ), # decoder_4: [batch, 32, 32, ngf * 8 * 2] => [batch, 64, 64, ngf * 4 * 2] ( a.ngf * 2, 0.0 ), # decoder_3: [batch, 64, 64, ngf * 4 * 2] => [batch, 128, 128, ngf * 2 * 2] ( a.ngf, 0.0 ), # decoder_2: [batch, 128, 128, ngf * 2 * 2] => [batch, 256, 256, ngf * 2] ] num_encoder_layers = len(layers) for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): skip_layer = num_encoder_layers - decoder_layer - 1 with tf.variable_scope("decoder_%d" % (skip_layer + 1)): if decoder_layer == 0: # first decoder layer doesn't have skip connections # since it is directly connected to the skip_layer input = layers[-1] else: input = tf.concat([layers[-1], layers[skip_layer]], axis=3) rectified = tf.nn.relu(input) # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] output = gen_deconv(rectified, out_channels) output = batchnorm(output) if dropout > 0.0: output = tf.nn.dropout(output, keep_prob=1 - dropout) layers.append(output) # decoder_1: [batch, 256, 256, ngf * 2] => [batch, 512, 512, generator_outputs_channels] with tf.variable_scope("decoder_1"): input = tf.concat([layers[-1], layers[0]], axis=3) rectified = tf.nn.relu(input) output = gen_deconv(rectified, generator_outputs_channels) output = tf.tanh(output) layers.append(output) return layers[-1]
def ResnetGenerator(n_samples, noise=None, dim=DIM): if noise is None: noise = tf.random_normal([n_samples, 128]) output = lib.ops.linear.Linear('Generator.Input', 128, 4 * 4 * 8 * dim, noise) output = tf.reshape(output, [-1, 8 * dim, 4, 4]) for i in xrange(6): output = BottleneckResidualBlock('Generator.4x4_{}'.format(i), 8 * dim, 8 * dim, 3, output, resample=None) output = BottleneckResidualBlock('Generator.Up1', 8 * dim, 4 * dim, 3, output, resample='up') for i in xrange(6): output = BottleneckResidualBlock('Generator.8x8_{}'.format(i), 4 * dim, 4 * dim, 3, output, resample=None) output = BottleneckResidualBlock('Generator.Up2', 4 * dim, 2 * dim, 3, output, resample='up') for i in xrange(6): output = BottleneckResidualBlock('Generator.16x16_{}'.format(i), 2 * dim, 2 * dim, 3, output, resample=None) output = BottleneckResidualBlock('Generator.Up3', 2 * dim, 1 * dim, 3, output, resample='up') for i in xrange(6): output = BottleneckResidualBlock('Generator.32x32_{}'.format(i), 1 * dim, 1 * dim, 3, output, resample=None) output = BottleneckResidualBlock('Generator.Up4', 1 * dim, dim / 2, 3, output, resample='up') for i in xrange(5): output = BottleneckResidualBlock('Generator.64x64_{}'.format(i), dim / 2, dim / 2, 3, output, resample=None) output = lib.ops.conv2d.Conv2D('Generator.Out', dim / 2, 3, 1, output, he_init=False) output = tf.tanh(output / 5.) return tf.reshape(output, [-1, OUTPUT_DIM])
def build_graph(self): # Reset previous graph. reset_graph() # Placeholders. x_source = tf.placeholder(tf.int32, shape=[None, None], name="x_source") source_seq_length = tf.placeholder(tf.int32, shape=[None], name="source_seq_length") x_target = tf.placeholder(tf.int32, shape=[None, None], name="x_target") target_seq_length = tf.placeholder(tf.int32, shape=[None], name="target_seq_length") labels = tf.placeholder(tf.float32, shape=[None], name="labels") input_dropout = tf.placeholder_with_default(1.0, shape=[], name="input_dropout") output_dropout = tf.placeholder_with_default(1.0, shape=[], name="output_dropout") decision_threshold = tf.placeholder_with_default( 0.5, shape=[], name="decision_threshold") # Embedding layer. with tf.variable_scope("embeddings"): if self.config.source_embeddings_path is not None and self.config.target_embeddings_path is not None: source_pretrained_embeddings,\ target_pretrained_embeddings = get_pretrained_embeddings( source_embeddings_path, target_embeddings_path, source_vocab, target_vocab) assert source_pretrained_embeddings.shape[ 1] == target_pretrained_embeddings.shape[1] self.config.embedding_size = source_pretrained_embeddings.shape[ 1] if self.config.fix_pretrained: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( source_pretrained_embeddings), trainable=False) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( target_pretrained_embeddings), trainable=False) else: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( source_pretrained_embeddings)) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( target_pretrained_embeddings)) else: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ]) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ]) source_rnn_inputs = tf.nn.embedding_lookup(source_embeddings, x_source) target_rnn_inputs = tf.nn.embedding_lookup(target_embeddings, x_target) source_rnn_inputs = tf.nn.dropout(source_rnn_inputs, keep_prob=input_dropout, name="source_seq_embeddings") target_rnn_inputs = tf.nn.dropout(target_rnn_inputs, keep_prob=input_dropout, name="target_seq_embeddings") # BiRNN encoder. with tf.variable_scope("birnn") as scope: if self.config.use_lstm: cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) else: cell_fw = tf.nn.rnn_cell.GRUCell(self.config.state_size) cell_bw = tf.nn.rnn_cell.GRUCell(self.config.state_size) cell_fw = tf.nn.rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=output_dropout) cell_bw = tf.nn.rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=output_dropout) if self.config.num_layers > 1: if self.config.use_lstm: cell_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) for _ in range(self.config.num_layers) ]) cell_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) for _ in range(self.config.num_layers) ]) else: cell_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.GRUCell(self.config.state_size) for _ in range(self.config.num_layers) ]) cell_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.GRUCell(self.config.state_size) for _ in range(self.config.num_layers) ]) with tf.variable_scope(scope): source_rnn_outputs, source_final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=source_rnn_inputs, sequence_length=source_seq_length, dtype=tf.float32) with tf.variable_scope(scope, reuse=True): target_rnn_outputs, target_final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=target_rnn_inputs, sequence_length=target_seq_length, dtype=tf.float32) self.config.state_size *= 2 # Mean and max pooling only work for 1 layer BiRNN. if self.config.use_mean_pooling: source_final_state = self.average_pooling( source_rnn_outputs, source_seq_length) target_final_state = self.average_pooling( target_rnn_outputs, target_seq_length) elif self.config.use_max_pooling: source_final_state = self.max_pooling(source_rnn_outputs) target_final_state = self.max_pooling(target_rnn_outputs) else: source_final_state_fw, source_final_state_bw = source_final_state target_final_state_fw, target_final_state_bw = target_final_state if self.config.num_layers > 1: source_final_state_fw = source_final_state_fw[-1] source_final_state_bw = source_final_state_bw[-1] target_final_state_fw = target_final_state_fw[-1] target_final_state_bw = target_final_state_bw[-1] if self.config.use_lstm: source_final_state_fw = source_final_state_fw.h source_final_state_bw = source_final_state_bw.h target_final_state_fw = target_final_state_fw.h target_final_state_bw = target_final_state_bw.h source_final_state = tf.concat( [source_final_state_fw, source_final_state_bw], axis=1) target_final_state = tf.concat( [target_final_state_fw, target_final_state_bw], axis=1) # Feed-forward neural network. with tf.variable_scope("feed_forward"): h_multiply = tf.multiply(source_final_state, target_final_state) h_abs_diff = tf.abs( tf.subtract(source_final_state, target_final_state)) W_1 = tf.get_variable( name="W_1", shape=[self.config.state_size, self.config.hidden_size]) W_2 = tf.get_variable( name="W_2", shape=[self.config.state_size, self.config.hidden_size]) b_1 = tf.get_variable(name="b_1", shape=[self.config.hidden_size], initializer=tf.constant_initializer(0.0)) h_semantic = tf.tanh( tf.matmul(h_multiply, W_1) + tf.matmul(h_abs_diff, W_2) + b_1) W_3 = tf.get_variable(name="W_3", shape=[self.config.hidden_size, 1]) b_2 = tf.get_variable(name="b_2", shape=[1], initializer=tf.constant_initializer(0.0)) logits = tf.matmul(h_semantic, W_3) + b_2 logits = tf.squeeze(logits, name="logits") # Sigmoid output layer. with tf.name_scope("output"): probs = tf.sigmoid(logits, name="probs") predicted_class = tf.cast(tf.greater(probs, decision_threshold), tf.float32, name="predicted_class") # Loss. with tf.name_scope("cross_entropy"): losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=labels, name="cross_entropy_per_sequence") mean_loss = tf.reduce_mean(losses, name="cross_entropy_loss") # Optimization. with tf.name_scope("optimization"): global_step = tf.Variable(initial_value=0, trainable=False, name="global_step") optimizer = tf.train.AdamOptimizer(self.config.learning_rate) trainable_variables = tf.trainable_variables() gradients = tf.gradients(mean_loss, trainable_variables, name="gradients") clipped_gradients, global_norm = tf.clip_by_global_norm( gradients, clip_norm=self.config.max_gradient_norm, name="clipped_gradients") train_op = optimizer.apply_gradients(zip(clipped_gradients, trainable_variables), global_step=global_step) # Evaluation metrics. accuracy = tf.metrics.accuracy(labels, predicted_class, name="accuracy") precision = tf.metrics.precision(labels, predicted_class, name="precision") recall = tf.metrics.recall(labels, predicted_class, name="recall") # Add summaries. tf.summary.scalar("loss", mean_loss) tf.summary.scalar("global_norm", global_norm) tf.summary.scalar("accuracy", accuracy[0]) tf.summary.scalar("precision", precision[0]) tf.summary.scalar("recall", recall[0]) tf.summary.scalar("logits" + "/sparsity", tf.nn.zero_fraction(logits)) tf.summary.histogram("logits" + "/activations", logits) tf.summary.histogram("probs", probs) # Add histogram for trainable variables. for var in trainable_variables: tf.summary.histogram(var.op.name, var) # Add histogram for gradients. for grad, var in zip(clipped_gradients, trainable_variables): if grad is not None: tf.summary.histogram(var.op.name + "/gradients", grad) # Assign placeholders and operations. self.x_source = x_source self.x_target = x_target self.source_seq_length = source_seq_length self.target_seq_length = target_seq_length self.labels = labels self.input_dropout = input_dropout self.output_dropout = output_dropout self.decision_threshold = decision_threshold self.train_op = train_op self.probs = probs self.predicted_class = predicted_class self.mean_loss = mean_loss self.accuracy = accuracy self.precision = precision self.recall = recall self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
def call(self, x, curr_state): state_input = tf.concat([curr_state, x], axis=1) new_state = tf.tanh(tf.matmul(state_input, self.W) + self.b) return (new_state, new_state)
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.005, max_gradient_norm=5.0, param_da=150, param_r=10): self.texts = tf.placeholder( tf.string, (None, None), 'texts') # shape: [batch, length] # todo: implement placeholders # shape: [batch] self.texts_length = tf.placeholder(tf.int64, None, 'texts_length') self.labels = tf.placeholder(tf.int64, None, 'labels') # shape: [batch] self.symbol2index = MutableHashTable( key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) batch_size = tf.shape(self.texts)[0] # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.index_input = self.symbol2index.lookup( self.texts) # shape: [batch, length] # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable( 'embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable( 'embed', dtype=tf.float32, initializer=embed) # todo: implement embedding inputs # shape: [batch, length, num_embed_units] self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) # todo: implement 3 RNNCells (BasicRNNCell, GRUCell, BasicLSTMCell) in a multi-layer setting with #num_units neurons and #num_layers layers # cell_fw = BasicRNNCell(num_units) # cell_bw = BasicRNNCell(num_units) cell_fw = GRUCell(num_units) cell_bw = GRUCell(num_units) # cell_fw = BasicLSTMCell(num_units) # cell_bw = BasicLSTMCell(num_units) # todo: implement bidirectional RNN outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") # use attention H = tf.concat(outputs, 2) # shape: (batch, length, 2*num_units) length = tf.shape(H)[1] with tf.variable_scope('logits'): # todo: implement self-attention mechanism, feel free to add codes to calculate temporary results Ws1 = tf.get_variable("Ws1", [2*num_units, param_da]) # shape: (2*num_units, param_da) Ws2 = tf.get_variable("Ws2", [param_da, param_r]) # shape: (param_da, param_r) TEMP_1 = tf.tanh(tf.matmul(tf.reshape(H, (batch_size*length, 2*num_units)), Ws1)) # shape: (batch*length, param_da) TEMP_2 = tf.transpose(tf.reshape(tf.matmul(TEMP_1, Ws2), (batch_size, length, param_r)), (0, 2, 1)) # shape: (batch, param_r, length) A = tf.nn.softmax(TEMP_2) # shape: (batch, param_r, length) M = tf.matmul(A, H) # shape: [batch, param_r, 2*num_units] # shape: [batch, param_r*2*num_units] flatten_M = tf.reshape(M, shape=[batch_size, param_r*2*num_units]) # shape: [batch, num_labels] logits = tf.layers.dense( flatten_M, num_labels, activation=None, name='projection') # not use attention # final_state = tf.concat([outputs[0][:, -1, :], outputs[1][:, 0, :]], 1) # logits = tf.layers.dense(final_state, num_labels, activation=None, name='projection') # todo: calculate additional loss, feel free to add codes to calculate temporary results identity = tf.reshape(tf.tile(tf.diag(tf.ones([param_r])), [batch_size, 1]), [ batch_size, param_r, param_r]) # shape: (batch, param_r, param_r) self.penalized_term = tf.reduce_mean(tf.square(tf.norm(tf.matmul(A, A, transpose_b=True) - identity, ord='fro', axis=[-2, -1]))) self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.labels, logits=logits), name='loss') + 0.0001*self.penalized_term # self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( # labels=self.labels, logits=logits), name='loss') predict_labels = tf.argmax(logits, 1, 'predict_labels') self.accuracy = tf.reduce_sum( tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=5, pad_step_number=True)
def cyclegan_generator_resnet(images, arg_scope_fn=cyclegan_arg_scope, num_resnet_blocks=6, num_filters=64, upsample_fn=cyclegan_upsample, kernel_size=3, tanh_linear_slope=0.0, is_training=False): """Defines the cyclegan resnet network architecture. As closely as possible following https://github.com/junyanz/CycleGAN/blob/master/models/architectures.lua#L232 FYI: This network requires input height and width to be divisible by 4 in order to generate an output with shape equal to input shape. Assertions will catch this if input dimensions are known at graph construction time, but there's no protection if unknown at graph construction time (you'll see an error). Args: images: Input image tensor of shape [batch_size, h, w, 3]. arg_scope_fn: Function to create the global arg_scope for the network. num_resnet_blocks: Number of ResNet blocks in the middle of the generator. num_filters: Number of filters of the first hidden layer. upsample_fn: Upsampling function for the decoder part of the generator. kernel_size: Size w or list/tuple [h, w] of the filter kernels for all inner layers. tanh_linear_slope: Slope of the linear function to add to the tanh over the logits. is_training: Whether the network is created in training mode or inference only mode. Not actually needed, just for compliance with other generator network functions. Returns: A `Tensor` representing the model output and a dictionary of model end points. Raises: ValueError: If the input height or width is known at graph construction time and not a multiple of 4. """ # Neither dropout nor batch norm -> dont need is_training del is_training end_points = {} input_size = images.shape.as_list() height, width = input_size[1], input_size[2] if height and height % 4 != 0: raise ValueError('The input height must be a multiple of 4.') if width and width % 4 != 0: raise ValueError('The input width must be a multiple of 4.') num_outputs = input_size[3] if not isinstance(kernel_size, (list, tuple)): kernel_size = [kernel_size, kernel_size] kernel_height = kernel_size[0] kernel_width = kernel_size[1] pad_top = (kernel_height - 1) // 2 pad_bottom = kernel_height // 2 pad_left = (kernel_width - 1) // 2 pad_right = kernel_width // 2 paddings = np.array( [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]], dtype=np.int32) spatial_pad_3 = np.array([[0, 0], [3, 3], [3, 3], [0, 0]]) with tf.contrib.framework.arg_scope(arg_scope_fn()): ########### # Encoder # ########### with tf.variable_scope('input'): # 7x7 input stage net = tf.pad(images, spatial_pad_3, 'REFLECT') net = layers.conv2d(net, num_filters, kernel_size=[7, 7], padding='VALID') end_points['encoder_0'] = net with tf.variable_scope('encoder'): with tf.contrib.framework.arg_scope([layers.conv2d], kernel_size=kernel_size, stride=2, activation_fn=tf.nn.relu, padding='VALID'): net = tf.pad(net, paddings, 'REFLECT') net = layers.conv2d(net, num_filters * 2) end_points['encoder_1'] = net net = tf.pad(net, paddings, 'REFLECT') net = layers.conv2d(net, num_filters * 4) end_points['encoder_2'] = net ################### # Residual Blocks # ################### with tf.variable_scope('residual_blocks'): with tf.contrib.framework.arg_scope([layers.conv2d], kernel_size=kernel_size, stride=1, activation_fn=tf.nn.relu, padding='VALID'): for block_id in xrange(num_resnet_blocks): with tf.variable_scope('block_{}'.format(block_id)): res_net = tf.pad(net, paddings, 'REFLECT') res_net = layers.conv2d(res_net, num_filters * 4) res_net = tf.pad(res_net, paddings, 'REFLECT') res_net = layers.conv2d(res_net, num_filters * 4, activation_fn=None) net += res_net end_points['resnet_block_%d' % block_id] = net ########### # Decoder # ########### with tf.variable_scope('decoder'): with tf.contrib.framework.arg_scope([layers.conv2d], kernel_size=kernel_size, stride=1, activation_fn=tf.nn.relu): with tf.variable_scope('decoder1'): net = upsample_fn(net, num_outputs=num_filters * 2, stride=[2, 2]) end_points['decoder1'] = net with tf.variable_scope('decoder2'): net = upsample_fn(net, num_outputs=num_filters, stride=[2, 2]) end_points['decoder2'] = net with tf.variable_scope('output'): net = tf.pad(net, spatial_pad_3, 'REFLECT') logits = layers.conv2d(net, num_outputs, [7, 7], activation_fn=None, normalizer_fn=None, padding='valid') logits = tf.reshape(logits, _dynamic_or_static_shape(images)) end_points['logits'] = logits end_points['predictions'] = tf.tanh( logits) + logits * tanh_linear_slope return end_points['predictions'], end_points
def create_generator(generator_inputs, generator_outputs_channels, NGF, is_training=True): layers = [] # encoder_1: [batch, 256, 256, in_channels] => [batch, 128, 128, ngf] with tf.variable_scope("encoder_1"): output = gen_conv(generator_inputs, NGF) layers.append(output) layer_specs = [ NGF * 2, # encoder_2: [batch, 128, 128, ngf] => [batch, 64, 64, ngf * 2] NGF * 4, # encoder_3: [batch, 64, 64, ngf * 2] => [batch, 32, 32, ngf * 4] NGF * 8, # encoder_4: [batch, 32, 32, ngf * 4] => [batch, 16, 16, ngf * 8] NGF * 8, # encoder_5: [batch, 16, 16, ngf * 8] => [batch, 8, 8, ngf * 8] NGF * 8, # encoder_6: [batch, 8, 8, ngf * 8] => [batch, 4, 4, ngf * 8] NGF * 8, # encoder_7: [batch, 4, 4, ngf * 8] => [batch, 2, 2, ngf * 8] NGF * 8, # encoder_8: [batch, 2, 2, ngf * 8] => [batch, 1, 1, ngf * 8] ] for out_channels in layer_specs: with tf.variable_scope("encoder_%d" % (len(layers) + 1)): rectified = lrelu(layers[-1], 0.2) # [batch, in_height, in_width, in_channels] => [batch, in_height/2, in_width/2, out_channels] convolved = gen_conv(rectified, out_channels) output = batchnorm(convolved) layers.append(output) if is_training: dropout_prob = 0.5 else: dropout_prob = 0. # test it for the NPU from HUAWEI (no dropout in their api yet) layer_specs = [ (NGF * 8, dropout_prob ), # decoder_8: [batch, 1, 1, ngf * 8] => [batch, 2, 2, ngf * 8 * 2] ( NGF * 8, dropout_prob ), # decoder_7: [batch, 2, 2, ngf * 8 * 2] => [batch, 4, 4, ngf * 8 * 2] ( NGF * 8, dropout_prob ), # decoder_6: [batch, 4, 4, ngf * 8 * 2] => [batch, 8, 8, ngf * 8 * 2] ( NGF * 8, 0.0 ), # decoder_5: [batch, 8, 8, ngf * 8 * 2] => [batch, 16, 16, ngf * 8 * 2] ( NGF * 4, 0.0 ), # decoder_4: [batch, 16, 16, ngf * 8 * 2] => [batch, 32, 32, ngf * 4 * 2] ( NGF * 2, 0.0 ), # decoder_3: [batch, 32, 32, ngf * 4 * 2] => [batch, 64, 64, ngf * 2 * 2] ( NGF, 0.0 ), # decoder_2: [batch, 64, 64, ngf * 2 * 2] => [batch, 128, 128, ngf * 2] ] num_encoder_layers = len(layers) for decoder_layer, (out_channels, dropout) in enumerate(layer_specs): skip_layer = num_encoder_layers - decoder_layer - 1 with tf.variable_scope("decoder_%d" % (skip_layer + 1)): if decoder_layer == 0: # first decoder layer doesn't have skip connections # since it is directly connected to the skip_layer input = layers[-1] else: input = tf.concat([layers[-1], layers[skip_layer]], axis=3) rectified = tf.nn.relu(input) # [batch, in_height, in_width, in_channels] => [batch, in_height*2, in_width*2, out_channels] print(out_channels) # try to circumvent the dimension problem on HiAi output = gen_deconv_npu_mod(rectified, out_channels) output = batchnorm(output) if dropout > 0.0: output = tf.nn.dropout(output, keep_prob=1 - dropout) layers.append(output) # decoder_1: [batch, 128, 128, ngf * 2] => [batch, 256, 256, generator_outputs_channels] with tf.variable_scope("decoder_1"): input = tf.concat([layers[-1], layers[0]], axis=3) rectified = tf.nn.relu(input) output = gen_deconv(rectified, generator_outputs_channels) output = tf.tanh(output) layers.append(output) return layers[-1]
def _build_recurrent_model(self, input, state, num_units, **kwargs): from rnn_cell import linear hidden_state = tf.tanh( linear([input, state], num_units, True, scope='BasicRNN'), 'BasicRNN/hidden_state') return hidden_state, hidden_state
def ResnetGenerator( input_shape=(512, 512, 1), #changed this from 256,256,3 output_channels=1, dim=64, n_downsamplings=2, n_blocks=9, norm='instance_norm'): Norm = _get_norm_layer(norm) def _residual_block(x): dim = x.shape[-1] h = x h = tf.pad(h, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT') h = keras.layers.Conv2D(dim, 3, padding='valid', use_bias=False)(h) h = Norm()(h) h = tf.nn.relu(h) h = tf.pad(h, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT') h = keras.layers.Conv2D(dim, 3, padding='valid', use_bias=False)(h) h = Norm()(h) return keras.layers.add([x, h]) # 0 h = inputs = keras.Input(shape=input_shape) # 1 h = tf.pad(h, [[0, 0], [3, 3], [3, 3], [0, 0]], mode='REFLECT') h = keras.layers.Conv2D(dim, 7, padding='valid', use_bias=False)(h) h = Norm()(h) h = tf.nn.relu(h) # 2 for _ in range(n_downsamplings): dim *= 2 h = keras.layers.Conv2D(dim, 3, strides=2, padding='same', use_bias=False)(h) h = Norm()(h) h = tf.nn.relu(h) # 3 for _ in range(n_blocks): h = _residual_block(h) # 4 for _ in range(n_downsamplings): dim //= 2 h = keras.layers.Conv2DTranspose(dim, 3, strides=2, padding='same', use_bias=False)(h) h = Norm()(h) h = tf.nn.relu(h) # 5 h = tf.pad(h, [[0, 0], [3, 3], [3, 3], [0, 0]], mode='REFLECT') h = keras.layers.Conv2D(output_channels, 7, padding='valid')(h) h = tf.tanh(h) return keras.Model(inputs=inputs, outputs=h)
def rnn_cell(rnn_input, state): with tf.variable_scope('rnn_cell', reuse=True): W = tf.get_variable('W', [num_classes + state_size, state_size]) b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0)) return tf.tanh(tf.matmul(tf.concat([rnn_input, state], 1), W) + b)
def binary_layer(self, embedding, dim, reuse=False): with tf.variable_scope('binary', reuse=reuse): layer1 = tf.tanh(tf.layers.dense(embedding, 1024)) layer2 = tf.tanh(tf.layers.dense(layer1, 1024)) out = tf.tanh(tf.layers.dense(layer2, dim)) return out