def layer_norm(x, num_units, scope='layer_norm', reuse=False, gamma_start=1.0, epsilon=1e-3, use_bias=True): """Calculate layer norm.""" axes = [1] mean = tf.reduce_mean(x, axes, keep_dims=True) x_shifted = x - mean var = tf.reduce_mean(tf.square(x_shifted), axes, keep_dims=True) inv_std = tf.rsqrt(var + epsilon) with tf.variable_scope(scope): if reuse is True: tf.get_variable_scope().reuse_variables() gamma = tf.get_variable( 'ln_gamma', [num_units], initializer=tf.constant_initializer(gamma_start)) if use_bias: beta = tf.get_variable( 'ln_beta', [num_units], initializer=tf.constant_initializer(0.0)) output = gamma * (x_shifted) * inv_std if use_bias: output += beta return output
def inference(inputs, name): ''' アーキテクチャの定義、グラフのビルド ''' # layer1 layer1_name = 'fc1_' + name with tf.variable_scope(layer1_name) as scope: weights = _variable_with_weight_decay( 'weights', shape=[9, 12], stddev=0.04, wd=0.004 ) biases = _variable_on_cpu('biases', [12], tf.constant_initializer(0.1)) #bn1 = batch_normalization(4, tf.matmul(inputs, weights)) #local1 = tf.nn.relu(bn1) #inner_product = tf.matmul(inputs, weights) local1 = tf.nn.relu(tf.add(tf.matmul(inputs, weights), biases)) #local1 = tf.nn.relu_layer(inputs, weights, biases, name=scope.name) #_activation_summary(local1) # softmax layer2_name = 'fc2_' + name with tf.variable_scope(layer2_name) as scope: weights = _variable_with_weight_decay( 'weights', [12, NUM_CLASSES], stddev=0.04, wd=0.0 ) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) linear = tf.nn.xw_plus_b(local1, weights, biases, name=scope.name) #_activation_summary(linear) return linear
def inference(input_tensor,train,regularizer): #第一层卷积 with tf.variable_scope('layer1-conv1'): conv1_weights = tf.get_variable("weight", [CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable("biases",[CONV1_DEEP], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor,conv1_weights, strides=[1,1,1,1],padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases)) #第二层池化 with tf.name_scope('layer2-pool1'): pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') #第三层卷积 with tf.variable_scope('layer3-conv2'): conv2_weights = tf.get_variable("weight", [CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable("biases",[CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1,conv2_weights, strides=[1,1,1,1],padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases)) #第四层池化 with tf.name_scope('layer4-pool2'): pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2,[pool_shape[0],nodes]) #第五层全连接层 with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable("weight",[nodes,FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc1_weights)) fc1_biases = tf.get_variable("bias",[FC_SIZE], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1,0.5) #第六层全连接层 with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable("weight",[FC_SIZE,NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias",[NUM_LABELS], initializer=tf.constant_initializer(0.1)) logit = tf.matmul(fc1,fc2_weights) + fc2_biases return logit
def _build_body(self): # input projection _Wi = tf.get_variable('Wi', [self.obs_size, self.n_hidden], initializer=xavier_initializer()) _bi = tf.get_variable('bi', [self.n_hidden], initializer=tf.constant_initializer(0.)) # add relu/tanh here if necessary _projected_features = tf.matmul(self._features, _Wi) + _bi _lstm_f = tf.contrib.rnn.LSTMCell(self.n_hidden, state_is_tuple=True) _lstm_op, self._next_state = _lstm_f(inputs=_projected_features, state=(self._state_c, self._state_h)) # reshape LSTM's state tuple (2,n_hidden) -> (1,n_hidden*2) _state_reshaped = tf.concat(axis=1, values=(self._next_state.c, self._next_state.h)) # output projection _Wo = tf.get_variable('Wo', [self.n_hidden*2, self.n_actions], initializer=xavier_initializer()) _bo = tf.get_variable('bo', [self.n_actions], initializer=tf.constant_initializer(0.)) # get logits _logits = tf.matmul(_state_reshaped, _Wo) + _bo # probabilities normalization : elemwise multiply with action mask self._probs = tf.multiply(tf.squeeze(tf.nn.softmax(_logits)), self._action_mask, name='probs') return _logits
def _initialize_weights(self): all_weights = dict() # Encoding layers for i, n_hidden in enumerate(self.hidden_units): weight_name = 'encoder%d_W' % i bias_name = 'encoder%d_b' % i if i == 0: weight_shape = [self.n_input, n_hidden] else: weight_shape = [self.hidden_units[i-1], n_hidden] all_weights[weight_name] = tf.get_variable(weight_name, weight_shape, initializer=tf.contrib.layers.xavier_initializer()) all_weights[bias_name] = tf.get_variable(bias_name, [n_hidden], initializer=tf.constant_initializer(0.0)) # Decoding layers hidden_units_rev = self.hidden_units[::-1] for i, n_hidden in enumerate(hidden_units_rev): weight_name = 'decoder%d_W' % i bias_name = 'decoder%d_b' % i if i != len(hidden_units_rev) - 1: # not the last layer weight_shape = [n_hidden, hidden_units_rev[i+1]] else: weight_shape = [n_hidden, self.n_input] all_weights[weight_name] = tf.get_variable(weight_name, weight_shape, initializer=tf.contrib.layers.xavier_initializer()) all_weights[bias_name] = tf.get_variable(bias_name, [n_hidden], initializer=tf.constant_initializer(0.0)) return all_weights
def _batch_norm(x, name, is_train): """ Apply a batch normalization layer. """ with tf.variable_scope(name): inputs_shape = x.get_shape() axis = list(range(len(inputs_shape) - 1)) param_shape = int(inputs_shape[-1]) moving_mean = tf.get_variable('mean', [param_shape], initializer=tf.constant_initializer(0.0), trainable=False) moving_var = tf.get_variable('variance', [param_shape], initializer=tf.constant_initializer(1.0), trainable=False) beta = tf.get_variable('offset', [param_shape], initializer=tf.constant_initializer(0.0)) gamma = tf.get_variable('scale', [param_shape], initializer=tf.constant_initializer(1.0)) control_inputs = [] def mean_var_with_update(): mean, var = tf.nn.moments(x, axis) update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, 0.995) update_moving_var = moving_averages.assign_moving_average(moving_var, var, 0.995) control_inputs = [update_moving_mean, update_moving_var] return tf.identity(mean), tf.identity(var) def mean_var(): mean = moving_mean var = moving_var return tf.identity(mean), tf.identity(var) mean, var = tf.cond(is_train, mean_var_with_update, mean_var) with tf.control_dependencies(control_inputs): normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-4) return normed
def __init__(self, epsilon=1e-2, shape=()): self._sum = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(0.0), name="runningsum", trainable=False) self._sumsq = tf.get_variable( dtype=tf.float64, shape=shape, initializer=tf.constant_initializer(epsilon), name="runningsumsq", trainable=False) self._count = tf.get_variable( dtype=tf.float64, shape=(), initializer=tf.constant_initializer(epsilon), name="count", trainable=False) self.shape = shape self.mean = tf.to_float(self._sum / self._count) self.std = tf.sqrt( tf.maximum( tf.to_float(self._sumsq / self._count) - tf.square(self.mean) , 1e-2 )) newsum = tf.placeholder(shape=self.shape, dtype=tf.float64, name='sum') newsumsq = tf.placeholder(shape=self.shape, dtype=tf.float64, name='var') newcount = tf.placeholder(shape=[], dtype=tf.float64, name='count') self.incfiltparams = U.function([newsum, newsumsq, newcount], [], updates=[tf.assign_add(self._sum, newsum), tf.assign_add(self._sumsq, newsumsq), tf.assign_add(self._count, newcount)])
def testBasicLSTMCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 8]) g, out_m = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8])}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) # Test BasicLSTMCell with input_size != num_units. m = tf.zeros([1, 4]) g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]), m.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 2)
def __init__(self,sess,n_features,n_actions,lr=0.001): self.sess = sess self.s = tf.placeholder(tf.float32,[1,n_features],name='state') self.a = tf.placeholder(tf.int32,None,name='act') self.td_error = tf.placeholder(tf.float32,None,"td_error") with tf.variable_scope('Actor'): l1 = tf.layers.dense( inputs = self.s, units = 20, activation = tf.nn.relu, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'l1' ) self.acts_prob = tf.layers.dense( inputs = l1, units = n_actions, activation = tf.nn.softmax, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'acts_prob' ) with tf.variable_scope('exp_v'): log_prob = tf.log(self.acts_prob[0,self.a]) self.exp_v = tf.reduce_mean(log_prob * self.td_error) with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)
def discriminator(X, reuse=False): with tf.variable_scope('discriminator'): if reuse: tf.get_variable_scope().reuse_variables() K = 64 M = 128 N = 256 W1 = tf.get_variable('D_W1', [4, 4, 1, K], initializer=tf.random_normal_initializer(stddev=0.1)) B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer()) W2 = tf.get_variable('D_W2', [4, 4, K, M], initializer=tf.random_normal_initializer(stddev=0.1)) B2 = tf.get_variable('D_B2', [M], initializer=tf.constant_initializer()) W3 = tf.get_variable('D_W3', [7*7*M, N], initializer=tf.random_normal_initializer(stddev=0.1)) B3 = tf.get_variable('D_B3', [N], initializer=tf.constant_initializer()) W4 = tf.get_variable('D_W4', [N, 1], initializer=tf.random_normal_initializer(stddev=0.1)) B4 = tf.get_variable('D_B4', [1], initializer=tf.constant_initializer()) X = tf.reshape(X, [-1, 28, 28, 1], 'reshape') conv1 = conv(X, W1, B1, stride=2, name='conv1') bn1 = tf.contrib.layers.batch_norm(conv1) conv2 = conv(tf.nn.dropout(lrelu(bn1), 0.4), W2, B2, stride=2, name='conv2') # conv2 = conv(lrelu(conv1), W2, B2, stride=2, name='conv2') bn2 = tf.contrib.layers.batch_norm(conv2) flat = tf.reshape(tf.nn.dropout(lrelu(bn2), 0.4), [-1, 7*7*M], name='flat') # flat = tf.reshape(lrelu(conv2), [-1, 7*7*M], name='flat') dense = lrelu(tf.matmul(flat, W3) + B3) logits = tf.matmul(dense, W4) + B4 prob = tf.nn.sigmoid(logits) return prob, logits
def generator(X, batch_size=64): with tf.variable_scope('generator'): K = 256 L = 128 M = 64 W1 = tf.get_variable('G_W1', [100, 7*7*K], initializer=tf.random_normal_initializer(stddev=0.1)) B1 = tf.get_variable('G_B1', [7*7*K], initializer=tf.constant_initializer()) W2 = tf.get_variable('G_W2', [4, 4, M, K], initializer=tf.random_normal_initializer(stddev=0.1)) B2 = tf.get_variable('G_B2', [M], initializer=tf.constant_initializer()) W3 = tf.get_variable('G_W3', [4, 4, 1, M], initializer=tf.random_normal_initializer(stddev=0.1)) B3 = tf.get_variable('G_B3', [1], initializer=tf.constant_initializer()) X = lrelu(tf.matmul(X, W1) + B1) X = tf.reshape(X, [batch_size, 7, 7, K]) deconv1 = deconv(X, W2, B2, shape=[batch_size, 14, 14, M], stride=2, name='deconv1') bn1 = tf.contrib.layers.batch_norm(deconv1) deconv2 = deconv(tf.nn.dropout(lrelu(bn1), 0.4), W3, B3, shape=[batch_size, 28, 28, 1], stride=2, name='deconv2') XX = tf.reshape(deconv2, [-1, 28*28], 'reshape') return tf.nn.sigmoid(XX)
def actor_network(states): h1_dim = 400 h2_dim = 300 # define policy neural network W1 = tf.get_variable("W1", [state_dim, h1_dim], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", [h1_dim], initializer=tf.constant_initializer(0)) h1 = tf.nn.relu(tf.matmul(states, W1) + b1) W2 = tf.get_variable("W2", [h1_dim, h2_dim], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", [h2_dim], initializer=tf.constant_initializer(0)) h2 = tf.nn.relu(tf.matmul(h1, W2) + b2) # use tanh to bound the action W3 = tf.get_variable("W3", [h2_dim, action_dim], initializer=tf.contrib.layers.xavier_initializer()) b3 = tf.get_variable("b3", [action_dim], initializer=tf.constant_initializer(0)) # we assume actions range from [-1, 1] # you can scale action outputs with any constant here a = tf.nn.tanh(tf.matmul(h2, W3) + b3) return a
def critic_network(states, action): h1_dim = 400 h2_dim = 300 # define policy neural network W1 = tf.get_variable("W1", [state_dim, h1_dim], initializer=tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", [h1_dim], initializer=tf.constant_initializer(0)) h1 = tf.nn.relu(tf.matmul(states, W1) + b1) # skip action from the first layer h1_concat = tf.concat(axis=1, values=[h1, action]) W2 = tf.get_variable("W2", [h1_dim + action_dim, h2_dim], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", [h2_dim], initializer=tf.constant_initializer(0)) h2 = tf.nn.relu(tf.matmul(h1_concat, W2) + b2) W3 = tf.get_variable("W3", [h2_dim, 1], initializer=tf.contrib.layers.xavier_initializer()) b3 = tf.get_variable("b3", [1], initializer=tf.constant_initializer(0)) v = tf.matmul(h2, W3) + b3 return v
def fc(self, input, num_out, name, relu=True, trainable=True): with tf.variable_scope(name) as scope: # only use the first input if isinstance(input, tuple): input = input[0] input_shape = input.get_shape() if input_shape.ndims == 4: dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim]) else: feed_in, dim = (input, int(input_shape[-1])) if name == 'bbox_pred': init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) init_biases = tf.constant_initializer(0.0) else: init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) weights = self.make_var('weights', [dim, num_out], init_weights, trainable, \ regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) biases = self.make_var('biases', [num_out], init_biases, trainable) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc
def add_model(self, input_data): """Adds a linear-layer plus a softmax transformation The core transformation for this model which transforms a batch of input data into a batch of predictions. In this case, the mathematical transformation effected is y = softmax(xW + b) Hint: Make sure to create tf.Variables as needed. Also, make sure to use tf.name_scope to ensure that your name spaces are clean. Hint: For this simple use-case, it's sufficient to initialize both weights W and biases b with zeros. Args: input_data: A tensor of shape (batch_size, n_features). Returns: out: A tensor of shape (batch_size, n_classes) """ ### YOUR CODE HERE # W = tf.Variable(tf.zeros((self.config.n_features, self.config.n_classes)), name="weights") # b = tf.Variable(tf.zeros((self.config.n_classes, )), name="biases") with tf.variable_scope('softmax'): W = tf.get_variable("weights", (self.config.n_features, self.config.n_classes), initializer=tf.constant_initializer(0.0)) b = tf.get_variable("bias", (self.config.n_classes,), initializer=tf.constant_initializer(0.0)) out = softmax(tf.matmul(input_data, W) + b) ### END YOUR CODE return out
def aconv1d_layer(input_tensor, size, rate, activation, scale, bias): global aconv1d_index with tf.variable_scope('aconv1d_' + str(aconv1d_index)): shape = input_tensor.get_shape().as_list() W = tf.get_variable('W', (1, size, shape[-1], shape[-1]), dtype=tf.float32, initializer=tf.random_uniform_initializer(minval=-scale, maxval=scale)) if bias: b = tf.get_variable('b', [shape[-1]], dtype=tf.float32, initializer=tf.constant_initializer(0)) out = tf.nn.atrous_conv2d(tf.expand_dims(input_tensor, dim=1), W, rate=rate, padding='SAME') out = tf.squeeze(out, [1]) if not bias: beta = tf.get_variable('beta', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(0)) gamma = tf.get_variable('gamma', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(1)) mean_running = tf.get_variable('mean', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(0)) variance_running = tf.get_variable('variance', shape[-1], dtype=tf.float32, initializer=tf.constant_initializer(1)) mean, variance = tf.nn.moments(out, axes=range(len(out.get_shape()) - 1)) def update_running_stat(): decay = 0.99 update_op = [mean_running.assign(mean_running * decay + mean * (1 - decay)), variance_running.assign(variance_running * decay + variance * (1 - decay))] with tf.control_dependencies(update_op): return tf.identity(mean), tf.identity(variance) m, v = tf.cond(tf.Variable(False, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES]), update_running_stat, lambda: (mean_running, variance_running)) out = tf.nn.batch_normalization(out, m, v, beta, gamma, 1e-8) if activation == 'tanh': out = tf.nn.tanh(out) if activation == 'sigmoid': out = tf.nn.sigmoid(out) aconv1d_index += 1 return out
def inference(images): with tf.variable_scope("conv1") as scope: kernel = _variable_with_weight_decay("weights", [5, 5, 3, 64], stddev=1e-4, wd=0.0) conv = conv2d_basic(images, kernel) bias = _variable_on_cpu("bias", [64], tf.constant_initializer(0.0)) h_conv1 = tf.nn.relu(conv + bias, name=scope.name) activation_summary(h_conv1) # norm1 norm1 = tf.nn.lrn(h_conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') with tf.variable_scope("conv2") as scope: kernel = _variable_with_weight_decay("weights", [1, 1, 64, 32], stddev=1e-4, wd=0.0) conv = conv2d_basic(norm1, kernel) bias = _variable_on_cpu("bias", [32], tf.constant_initializer(0.0)) h_conv2 = tf.nn.relu(conv + bias, name=scope.name) activation_summary(h_conv2) # norm2 norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') with tf.variable_scope("output") as scope: kernel = _variable_with_weight_decay("weights", [5, 5, 32, 3], stddev=1e-4, wd=0.0) conv = conv2d_basic(norm2, kernel) bias = _variable_on_cpu("bias", [3], tf.constant_initializer(0.0)) result = tf.nn.bias_add(conv, bias, name=scope.name) return result
def testDiscreteBottleneckVQCond(self): hidden_size = 60 z_size = 4 x = tf.zeros(shape=[100, 1, hidden_size], dtype=tf.float32) with tf.variable_scope("test2", reuse=tf.AUTO_REUSE): means = tf.get_variable("means", shape=[1, 1, 2**z_size, hidden_size], initializer=tf.constant_initializer(0.), dtype=tf.float32) ema_count = [] ema_count_i = tf.get_variable( "ema_count", [1, 2**z_size], initializer=tf.constant_initializer(0), trainable=False) ema_count.append(ema_count_i) ema_means = [] with tf.colocate_with(means): ema_means_i = tf.get_variable("ema_means", initializer=means.initialized_value()[0], trainable=False) ema_means.append(ema_means_i) cond = tf.cast(0.0, tf.bool) x_means_dense, x_means_hot, _, _, _ = discretization.discrete_bottleneck( x, hidden_size, z_size, 32, means=means, num_blocks=1, cond=cond, ema_means=ema_means, ema_count=ema_count, name="test2") with self.test_session() as sess: sess.run(tf.global_variables_initializer()) x_means_dense_eval, x_means_hot_eval = sess.run( [x_means_dense, x_means_hot]) means_eval = sess.run(means) self.assertEqual(x_means_dense_eval.shape, (100, 1, hidden_size)) self.assertEqual(x_means_hot_eval.shape, (100, 1)) self.assertAllClose(means_eval, np.zeros((1, 1, 2**z_size, hidden_size)))
def instancenorm(inputdata, epsilon=1e-5, data_format='NHWC', use_affine=True, name=None): """ :param name: :param inputdata: :param epsilon: :param data_format: :param use_affine: :return: """ shape = inputdata.get_shape().as_list() if len(shape) != 4: raise ValueError("Input data of instancebn layer has to be 4D tensor") if data_format == 'NHWC': axis = [1, 2] ch = shape[3] new_shape = [1, 1, 1, ch] else: axis = [2, 3] ch = shape[1] new_shape = [1, ch, 1, 1] if ch is None: raise ValueError("Input of instancebn require known channel!") mean, var = tf.nn.moments(inputdata, axis, keep_dims=True) if not use_affine: return tf.divide(inputdata - mean, tf.sqrt(var + epsilon), name='output') beta = tf.get_variable('beta', [ch], initializer=tf.constant_initializer()) beta = tf.reshape(beta, new_shape) gamma = tf.get_variable('gamma', [ch], initializer=tf.constant_initializer(1.0)) gamma = tf.reshape(gamma, new_shape) return tf.nn.batch_normalization(inputdata, mean, var, beta, gamma, epsilon, name=name)
def __init__(self, max_id, shortlist_size=100, name_prefix=''): """Creates a new TopN.""" self.ops = topn_ops.Load() self.shortlist_size = shortlist_size # id_to_score contains all the scores we are tracking. self.id_to_score = tf.get_variable( name=name_prefix + 'id_to_score', dtype=tf.float32, shape=[max_id], initializer=tf.constant_initializer(tf.float32.min)) # sl_ids and sl_scores together satisfy four invariants: # 1) If sl_ids[i] != -1, then # id_to_score[sl_ids[i]] = sl_scores[i] >= sl_scores[0] # 2) sl_ids[0] is the number of i > 0 for which sl_ids[i] != -1. # 3) If id_to_score[i] > sl_scores[0], then # sl_ids[j] = i for some j. # 4) If sl_ids[i] == -1, then sl_scores[i] = tf.float32.min. self.sl_ids = tf.get_variable( name=name_prefix + 'shortlist_ids', dtype=tf.int64, shape=[shortlist_size + 1], initializer=tf.constant_initializer(-1)) # Ideally, we would set self.sl_ids[0] = 0 here. But then it is hard # to pass that control dependency to the other other Ops. Instead, we # have insert, remove and get_best all deal with the fact that # self.sl_ids[0] == -1 actually means the shortlist size is 0. self.sl_scores = tf.get_variable( name=name_prefix + 'shortlist_scores', dtype=tf.float32, shape=[shortlist_size + 1], initializer=tf.constant_initializer(tf.float32.min)) # TopN keeps track of its internal data dependencies, so the user # doesn't have to. self.last_ops = []
def linear(inputs, output_size, no_bias=False, bias_start_zero=False, matrix_start_zero=False, scope=None): """Define a linear connection.""" with tf.variable_scope(scope or 'Linear'): if matrix_start_zero: matrix_initializer = tf.constant_initializer(0) else: matrix_initializer = None if bias_start_zero: bias_initializer = tf.constant_initializer(0) else: bias_initializer = None input_size = inputs.get_shape()[1].value matrix = tf.get_variable('Matrix', [input_size, output_size], initializer=matrix_initializer) bias_term = tf.get_variable('Bias', [output_size], initializer=bias_initializer) output = tf.matmul(inputs, matrix) if not no_bias: output = output + bias_term return output
def __init__( self, layer=None, act=tf.identity, epsilon=1e-5, scale_init=tf.constant_initializer(1.0), offset_init=tf.constant_initializer(0.0), G=32, name='group_norm', ): Layer.__init__(self, name=name) self.inputs = layer.outputs print(" [TL] GroupNormLayer %s: epsilon:%f act:%s" % (self.name, epsilon, act.__name__)) inputs_shape = get_shape(layer.outputs) G = tf.minimum(G, inputs_shape[-1]) # [N, H, W, C] to [N, C, H, W] temp_input = tf.transpose(self.inputs, [0, 3, 1, 2]) temp_input = tf.reshape(temp_input, [inputs_shape[0], G, inputs_shape[-1]//G, inputs_shape[1], inputs_shape[2]], name='group_reshape1') with tf.variable_scope(name) as vs: mean, var = tf.nn.moments(temp_input, [2, 3, 4], keep_dims=True) scale = tf.get_variable('scale', shape=[1, inputs_shape[-1], 1, 1], initializer=scale_init, dtype=D_TYPE) offset = tf.get_variable('offset', shape=[1, inputs_shape[-1], 1, 1], initializer=offset_init, dtype=D_TYPE) temp_input = (temp_input - mean) / tf.sqrt(var + epsilon) temp_input = tf.reshape(temp_input, shape=[inputs_shape[0], inputs_shape[-1], inputs_shape[1], inputs_shape[2]], name='group_reshape2') self.outputs = scale * temp_input + offset self.outputs = tf.transpose(self.outputs, [0, 2, 3, 1]) self.outputs = act(self.outputs) variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) self.all_layers = list(layer.all_layers) self.all_params = list(layer.all_params) self.all_drop = dict(layer.all_drop) self.all_layers.extend([self.outputs]) self.all_params.extend(variables)
def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm", in_dim=None, phase=Phase.train): shape = input_layer.shape shp = in_dim or shape[-1] with tf.variable_scope(name) as scope: self.mean = self.variable('mean', [shp], init=tf.constant_initializer(0.), train=False) self.variance = self.variable('variance', [shp], init=tf.constant_initializer(1.0), train=False) self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02)) self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.)) if phase == Phase.train: mean, variance = tf.nn.moments(input_layer.tensor, [0, 1, 2]) mean.set_shape((shp,)) variance.set_shape((shp,)) update_moving_mean = moving_averages.assign_moving_average(self.mean, mean, decay) update_moving_variance = moving_averages.assign_moving_average(self.variance, variance, decay) with tf.control_dependencies([update_moving_mean, update_moving_variance]): normalized_x = tf.nn.batch_norm_with_global_normalization( input_layer.tensor, mean, variance, self.beta, self.gamma, epsilon, scale_after_normalization=True) else: normalized_x = tf.nn.batch_norm_with_global_normalization( input_layer.tensor, self.mean, self.variance, self.beta, self.gamma, epsilon, scale_after_normalization=True) return input_layer.with_tensor(normalized_x, parameters=self.vars)
def get_transform(point_cloud, is_training, bn_decay=None, K = 3): """ Transform Net, input is BxNx3 gray image Return: Transformation matrix of size 3xK """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -1) net = tf_util.conv2d(input_image, 64, [1,3], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 128, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='tconv3', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='tconv4', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_XYZ') as sc: assert(K==3) weights = tf.get_variable('weights', [128, 3*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [3*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) + tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) #transform = tf_util.fully_connected(net, 3*K, activation_fn=None, scope='tfc3') transform = tf.reshape(transform, [batch_size, 3, K]) return transform
def _build_net(self): with tf.name_scope('inputs'): self.tf_obs=tf.placeholder(tf.float32,[None,self.n_features],name="observations") self.tf_acts=tf.placeholder(tf.int32,[None,],name="actions_num") self.tf_vt=tf.placeholder(tf.float32,[None,],name="actions_value") layer=tf.layers.dense( inputs=self.tf_obs, units=10, activation=tf.nn.tanh kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3), bias_initializer=tf.constant_initializer(0.1), name='fc1' ) all_act=tf.layers.dense( inputs=layer, units=self.n_actions, activation=None, kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3) bias_initializer=tf.constant_initializer(0.1) name='fc2' ) self.all_act_prob=tf.nn.softmax(all_act,name='act_prob') with tf.name_scope('loss'): neg_log_prob=tf.nn.sparse_softmax_cross_enrtropy_with_logits(logits=all_act,labels=self.tf_acts) loss=tf.reduce_mean(neg_log_prob*self.tf_vt)#用log_p*R的最大化来表示目标 with tf.name_scope('train'): self.train_op=tf.train.AdamOptimizer(self.lr).minimize(loss)
def __init__(self, sess, n_features, lr=0.01): self.sess = sess self.s = tf.placeholder(tf.float32, [1, n_features], "state") self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next") self.r = tf.placeholder(tf.float32, None, 'r') with tf.variable_scope('Critic'): l1 = tf.layers.dense( inputs=self.s, units=20, # number of hidden units activation=tf.nn.relu, # None # have to be linear to make sure the convergence of actor. # But linear approximator seems hardly learns the correct Q. kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='l1' ) self.v = tf.layers.dense( inputs=l1, units=1, # output units activation=None, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='V' ) with tf.variable_scope('squared_TD_error'): self.td_error = self.r + GAMMA * self.v_ - self.v self.loss = tf.square(self.td_error) # TD_error = (r+gamma*V_next) - V_eval with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
def get_transform_K(inputs, is_training, bn_decay=None, K = 3): """ Transform Net, input is BxNx1xK gray image Return: Transformation matrix of size KxK """ batch_size = inputs.get_shape()[0].value num_point = inputs.get_shape()[1].value net = tf_util.conv2d(inputs, 256, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='tconv1', bn_decay=bn_decay) net = tf_util.conv2d(net, 1024, [1,1], padding='VALID', stride=[1,1], bn=True, is_training=is_training, scope='tconv2', bn_decay=bn_decay) net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='tmaxpool') net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='tfc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='tfc2', bn_decay=bn_decay) with tf.variable_scope('transform_feat') as sc: weights = tf.get_variable('weights', [256, K*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) biases = tf.get_variable('biases', [K*K], initializer=tf.constant_initializer(0.0), dtype=tf.float32) + tf.constant(np.eye(K).flatten(), dtype=tf.float32) transform = tf.matmul(net, weights) transform = tf.nn.bias_add(transform, biases) #transform = tf_util.fully_connected(net, 3*K, activation_fn=None, scope='tfc3') transform = tf.reshape(transform, [batch_size, K, K]) return transform
def add_model(self, input_data): with tf.variable_scope("FirstConv") as CLayer1: w_conv1 = tf.get_variable("w_conv1", (11, 11, 1, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv1 = tf.get_variable("b_conv1", (32), initializer=tf.constant_initializer(0.1)) conv1 = tf.nn.conv2d(input_data, w_conv1, strides=[1, 1, 1, 1], padding='VALID') hconv1 = tf.nn.relu(conv1 + b_conv1) h_pool1 = tf.nn.max_pool(hconv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.variable_scope("SecondConv") as CLayer2: w_conv2 = tf.get_variable("w_conv2", (11 , 11, 32, 64), initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv2 = tf.get_variable("b_conv2", (64), initializer=tf.constant_initializer(0.1)) conv2 = tf.nn.conv2d(h_pool1, w_conv2, strides=[1, 1, 1, 1], padding='VALID') hconv2 = tf.nn.relu(conv2 + b_conv2) h_pool2 = tf.nn.max_pool(hconv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.variable_scope("FullyConnected") as FC: flattend_input = tf.reshape(input_data, [self.config.batch_size, -1]) w_input = tf.get_variable("w_input", (self.config.DIM_ETA*self.config.DIM_PHI, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) wfc1 = tf.get_variable("wfc1", (self.config.final_size*64, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) #bfc1 = tf.get_variable("bfc1", (32), initializer=tf.constant_initializer(0.1)) h_pool2_flat = tf.reshape(h_pool2, [-1, self.config.final_size*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, wfc1) + tf.matmul(flattend_input, w_input))#+ bfc1) h_fc1_drop = tf.nn.dropout(h_fc1, self.dropout_placeholder) with tf.variable_scope("ReadoutLayer") as RL: wfc2 = tf.get_variable("wfc2", (32, self.config.num_classes), initializer=tf.truncated_normal_initializer(stddev=0.1)) bfc2 = tf.get_variable("bfc2", (self.config.num_classes), initializer=tf.constant_initializer(0.1)) y_conv = tf.matmul(h_fc1_drop, wfc2) + bfc2 return y_conv
def __init__(self, sess, n_features, n_actions, lr=0.001): self.sess = sess self.s = tf.placeholder(tf.float32, [1, n_features], "state") self.a = tf.placeholder(tf.int32, None, "act") self.td_error = tf.placeholder(tf.float32, None, "td_error") # TD_error with tf.variable_scope('Actor'): l1 = tf.layers.dense( inputs=self.s, units=20, # number of hidden units activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='l1' ) self.acts_prob = tf.layers.dense( inputs=l1, units=n_actions, # output units activation=tf.nn.softmax, # get action probabilities kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='acts_prob' ) with tf.variable_scope('exp_v'): log_prob = tf.log(self.acts_prob[0, self.a]) self.exp_v = tf.reduce_mean(log_prob * self.td_error) # advantage (TD_error) guided loss with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v)
def batch_norm(x, phase_train, name='bn', decay=0.9, reuse=None, affine=True): """ Batch normalization on convolutional maps. from: https://stackoverflow.com/questions/33949786/how-could-i- use-batch-normalization-in-tensorflow Only modified to infer shape from input tensor x. [DEPRECATED] Use tflearn or slim batch normalization instead. Parameters ---------- x Tensor, 4D BHWD input maps phase_train boolean tf.Variable, true indicates training phase name string, variable name decay : float, optional Description reuse : None, optional Description affine whether to affine-transform outputs Return ------ normed batch-normalized maps """ with tf.variable_scope(name, reuse=reuse): shape = x.get_shape().as_list() beta = tf.get_variable( name='beta', shape=[shape[-1]], initializer=tf.constant_initializer(0.0), trainable=True) gamma = tf.get_variable( name='gamma', shape=[shape[-1]], initializer=tf.constant_initializer(1.0), trainable=affine) if len(shape) == 4: batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') else: batch_mean, batch_var = tf.nn.moments(x, [0], name='moments') ema = tf.train.ExponentialMovingAverage(decay=decay) ema_apply_op = ema.apply([batch_mean, batch_var]) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) def mean_var_with_update(): with tf.control_dependencies([ema_apply_op]): return tf.identity(batch_mean), tf.identity(batch_var) mean, var = control_flow_ops.cond(phase_train, mean_var_with_update, lambda: (ema_mean, ema_var)) # tf.nn.batch_normalization normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, 1e-6, affine) return normed
def __init__(self, env, task, visualise, unsupType, summary_writer, envWrap=False, designHead='universe', noReward=False): """ An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments. Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism. But overall, we'll define the model, specify its inputs, and describe how the policy gradients step should be computed. """ self.task = task self.unsup = unsupType is not None # unsupType default is "action" self.envWrap = envWrap self.env = env self.distance = 0 predictor = None numaction = env.action_space.n worker_device = "/job:worker/task:{}/cpu:0".format(task) with tf.device(tf.train.replica_device_setter(1, worker_device=worker_device)): with tf.variable_scope("global"): self.network = LSTMPolicy(env.observation_space.shape, numaction, designHead) self.global_step = tf.get_variable("global_step", [], tf.int32, initializer=tf.constant_initializer(0, dtype=tf.int32), trainable=False) if self.unsup: with tf.variable_scope("predictor"): if 'state' in unsupType: self.ap_network = StatePredictor(env.observation_space.shape, numaction, designHead, unsupType) else: self.ap_network = StateActionPredictor(env.observation_space.shape, numaction, designHead) with tf.device(worker_device): with tf.variable_scope("local"): self.local_network = pi = LSTMPolicy(env.observation_space.shape, numaction, designHead) pi.global_step = self.global_step if self.unsup: with tf.variable_scope("predictor"): if 'state' in unsupType: self.local_ap_network = predictor = StatePredictor(env.observation_space.shape, numaction, designHead, unsupType) else: self.local_ap_network = predictor = StateActionPredictor(env.observation_space.shape, numaction, designHead) # Computing a3c loss: https://arxiv.org/abs/1506.02438 # print('a3c.loss()') self.tfNowDistance = tf.placeholder(tf.float32, [], name="tfNowDistance") self.tfLastDistance = tf.placeholder(tf.float32, [], name="tfLastDistance") self.tfGradientDistance = tf.placeholder(tf.float32, [], name="tfGradientDistance") self.ac = tf.placeholder(tf.float32, [None, numaction], name="ac") self.adv = tf.placeholder(tf.float32, [None], name="adv") self.r = tf.placeholder(tf.float32, [None], name="r") log_prob_tf = tf.nn.log_softmax(pi.logits) prob_tf = tf.nn.softmax(pi.logits) # 1) the "policy gradients" loss: its derivative is precisely the policy gradient # notice that self.ac is a placeholder that is provided externally. # adv will contain the advantages, as calculated in process_rollout pi_loss = - tf.reduce_mean(tf.reduce_sum(log_prob_tf * self.ac, 1) * self.adv) # Eq (19) # 2) loss of value function: l2_loss = (x-y)^2/2 vf_loss = 0.5 * tf.reduce_mean(tf.square(pi.vf - self.r)) # Eq (28) # 3) entropy to ensure randomness entropy = - tf.reduce_mean(tf.reduce_sum(prob_tf * log_prob_tf, 1)) # final a3c loss: lr of critic is half of actor self.loss = pi_loss + 0.5 * vf_loss - entropy * constants['ENTROPY_BETA'] print(pi_loss) print(self.loss) # compute gradients grads = tf.gradients(self.loss * 20.0, pi.var_list) # batchsize=20. Factored out to make hyperparams not depend on it. # computing predictor loss if self.unsup: if 'state' in unsupType: self.predloss = constants['PREDICTION_LR_SCALE'] * predictor.forwardloss else: self.predloss = constants['PREDICTION_LR_SCALE'] * (predictor.invloss * (1-constants['FORWARD_LOSS_WT']) + predictor.forwardloss * constants['FORWARD_LOSS_WT']) predgrads = tf.gradients(self.predloss * 20.0, predictor.var_list) # batchsize=20. Factored out to make hyperparams not depend on it. # do not backprop to policy if constants['POLICY_NO_BACKPROP_STEPS'] > 0: grads = [tf.scalar_mul(tf.to_float(tf.greater(self.global_step, constants['POLICY_NO_BACKPROP_STEPS'])), grads_i) for grads_i in grads] self.runner = RunnerThread(env, pi, constants['ROLLOUT_MAXLEN'], visualise, predictor, envWrap, noReward, task) # storing summaries bs = tf.to_float(tf.shape(pi.x)[0]) if use_tf12_api: tf.summary.scalar("model/policy_loss", pi_loss) tf.summary.scalar("model/value_loss", vf_loss) tf.summary.scalar("model/entropy", entropy) tf.summary.image("model/state", pi.x) # max_outputs=10 tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads)) tf.summary.scalar("model/var_global_norm", tf.global_norm(pi.var_list)) tf.summary.scalar("distance/last_distance", self.tfLastDistance) tf.summary.scalar("distance/now_distance", self.tfNowDistance) tf.summary.scalar("distance/gradient_distance", self.tfGradientDistance) if self.unsup: tf.summary.scalar("model/predloss", self.predloss) if 'action' in unsupType: tf.summary.scalar("model/inv_loss", predictor.invloss) tf.summary.scalar("model/forward_loss", predictor.forwardloss) tf.summary.scalar("model/predgrad_global_norm", tf.global_norm(predgrads)) tf.summary.scalar("model/predvar_global_norm", tf.global_norm(predictor.var_list)) self.summary_op = tf.summary.merge_all() else: tf.scalar_summary("model/policy_loss", pi_loss) tf.scalar_summary("model/value_loss", vf_loss) tf.scalar_summary("model/entropy", entropy) tf.image_summary("model/state", pi.x) tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads)) tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list)) if self.unsup: tf.scalar_summary("model/predloss", self.predloss) if 'action' in unsupType: tf.scalar_summary("model/inv_loss", predictor.invloss) tf.scalar_summary("model/forward_loss", predictor.forwardloss) tf.scalar_summary("model/predgrad_global_norm", tf.global_norm(predgrads)) tf.scalar_summary("model/predvar_global_norm", tf.global_norm(predictor.var_list)) self.summary_op = tf.merge_all_summaries() #self.summary_writer = summary_writer #self.summary_writer.add_summary(tf.Summary.FromString(self.summary_op), self.global_step) # clip gradients grads, _ = tf.clip_by_global_norm(grads, constants['GRAD_NORM_CLIP']) grads_and_vars = list(zip(grads, self.network.var_list)) if self.unsup: predgrads, _ = tf.clip_by_global_norm(predgrads, constants['GRAD_NORM_CLIP']) pred_grads_and_vars = list(zip(predgrads, self.ap_network.var_list)) ''' # testing loss distance_loss = tf.divide(tf.subtract(self.tfNowDistance, self.tfLastDistance), 100) dis_grads = tf.gradients(distance_loss, pi.var_list) # batchsize=20. Factored out to make hyperparams not depend on it. dis_grads, _ = tf.clip_by_global_norm(dis_grads, constants['GRAD_NORM_CLIP']) dis_grads_and_vars = list(zip(dis_grads, self.ap_network.var_list)) ''' grads_and_vars = grads_and_vars + pred_grads_and_vars # update global step by batch size inc_step = self.global_step.assign_add(tf.shape(pi.x)[0]) # each worker has a different set of adam optimizer parameters # TODO: make optimizer global shared, if needed print("Optimizer: ADAM with lr: %f" % (constants['LEARNING_RATE'])) print("Input observation shape: ",env.observation_space.shape) opt = tf.train.AdamOptimizer(constants['LEARNING_RATE']) self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step) # copy weights from the parameter server to the local model sync_var_list = [v1.assign(v2) for v1, v2 in zip(pi.var_list, self.network.var_list)] if self.unsup: sync_var_list += [v1.assign(v2) for v1, v2 in zip(predictor.var_list, self.ap_network.var_list)] self.sync = tf.group(*sync_var_list) # initialize extras self.summary_writer = None self.local_steps = 0
def conv2d(inputs, num_output_channels, kernel_size, scope, stride=[1, 1], padding='SAME', data_format='NHWC', use_xavier=True, stddev=1e-3, weight_decay=None, activation_fn=tf.nn.relu, is_biases=True, bn=False, bn_decay=None, is_training=None): """ 2D convolution with non-linear operation. Args: inputs: 4-D tensor variable BxHxWxC num_output_channels: int kernel_size: a list of 2 ints scope: string stride: a list of 2 ints padding: 'SAME' or 'VALID' data_format: 'NHWC' or 'NCHW' use_xavier: bool, use xavier_initializer if true stddev: float, stddev for truncated_normal init weight_decay: float activation_fn: function bn: bool, whether to use batch norm bn_decay: float or float tensor variable in [0,1] is_training: bool Tensor variable Returns: Variable tensor """ with tf.variable_scope(scope) as sc: kernel_h, kernel_w = kernel_size assert (data_format == 'NHWC' or data_format == 'NCHW') if data_format == 'NHWC': num_in_channels = inputs.get_shape()[-1].value elif data_format == 'NCHW': num_in_channels = inputs.get_shape()[1].value kernel_shape = [ kernel_h, kernel_w, num_in_channels, num_output_channels ] kernel = _variable_with_weight_decay('weights', shape=kernel_shape, use_xavier=use_xavier, stddev=stddev, wd=weight_decay) stride_h, stride_w = stride outputs = tf.nn.conv2d(inputs, kernel, [1, stride_h, stride_w, 1], padding=padding, data_format=data_format) if is_biases: biases = _variable_on_cpu('biases', [num_output_channels], tf.constant_initializer(0.0)) outputs = tf.nn.bias_add(outputs, biases, data_format=data_format) if bn: outputs = batch_norm_for_conv2d(outputs, is_training, bn_decay=bn_decay, scope='bn', data_format=data_format) if activation_fn is not None: outputs = activation_fn(outputs) return outputs
def prelu(_x, scope=None): """parametric ReLU activation""" with tf.variable_scope(name_or_scope=scope, default_name="prelu"): _alpha = tf.get_variable("prelu", shape=_x.get_shape()[-1], dtype=_x.dtype, initializer=tf.constant_initializer(0.1)) return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x)
def autoencoder(x_hat, x, dim_img, dim_z, n_hidden, keep_prob, last_term,Component_Count): # encoding mu1, sigma1, mix1 = Create_Encoder_MNIST(x_hat,n_hidden, dim_z, keep_prob,"encoder1") mu2, sigma2, mix2 = Create_Encoder_MNIST(x_hat,n_hidden, dim_z, keep_prob,"encoder2") mu3, sigma3, mix3 = Create_Encoder_MNIST(x_hat,n_hidden, dim_z, keep_prob, "encoder3") mu4, sigma4, mix4 = Create_Encoder_MNIST(x_hat,n_hidden, dim_z, keep_prob, "encoder4") z1 = distributions.Normal(loc=mu1, scale=sigma1) z2 = distributions.Normal(loc=mu2, scale=sigma2) z3 = distributions.Normal(loc=mu3, scale=sigma3) z4 = distributions.Normal(loc=mu4, scale=sigma4) p = 0.5 #a = p / (1.0-p) ard_init = -10. dropout_a = tf.get_variable("dropout",shape=[1],initializer=tf.constant_initializer(ard_init)) #Dropout of components m1 = np.ones(batch_size) s1 = np.zeros(batch_size) dropout_a = tf.cast(dropout_a,tf.float64) dropout_dis = distributions.Normal(loc=m1, scale=dropout_a) dropout_samples = dropout_dis.sample(sample_shape=(4)) dropout_samples = tf.transpose(dropout_samples) dropout_samples = tf.cast(dropout_samples, tf.float32) dropout_samples = tf.clip_by_value(dropout_samples, 1e-8, 1 - 1e-8) ''' mix1 = mix1*dropout_samples[:,0:1] mix2 = mix2*dropout_samples[:,1:2] mix3 = mix3*dropout_samples[:,2:3] mix4 = mix4*dropout_samples[:,3:4] ''' sum1 = mix1 + mix2 + mix3 + mix4 mix1 = mix1 / sum1 mix2 = mix2 / sum1 mix3 = mix3 / sum1 mix4 = mix4 / sum1 mix = tf.concat([mix1, mix2, mix3, mix4], 1) mix_parameters = mix dist = tf.distributions.Dirichlet(mix) mix_samples = dist.sample() mix = mix_samples mix_dropout1 = dropout_samples[:, 0:1] * mix_samples[:, 0:1] mix_dropout2 = dropout_samples[:, 1:2] * mix_samples[:, 1:2] mix_dropout3 = dropout_samples[:, 2:3] * mix_samples[:, 2:3] mix_dropout4 = dropout_samples[:, 3:4] * mix_samples[:, 3:4] sum1 = mix_dropout1 + mix_dropout2 + mix_dropout3 + mix_dropout4 mix_dropout1 = mix_dropout1 / sum1 mix_dropout2 = mix_dropout2 / sum1 mix_dropout3 = mix_dropout3 / sum1 mix_dropout4 = mix_dropout4 / sum1 # sampling by re-parameterization technique # z = mu + sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32) z1_samples = z1.sample() z2_samples = z2.sample() z3_samples = z3.sample() z4_samples = z4.sample() ttf = [] ttf.append(z1_samples) ttf.append(z2_samples) ttf.append(z3_samples) ttf.append(z4_samples) dHSIC_Value = dHSIC(ttf) # decoding y1 = Create_SubDecoder(z1_samples, n_hidden, dim_img, keep_prob,"decoder1") y2 = Create_SubDecoder(z2_samples, n_hidden, dim_img, keep_prob, "decoder2") y3 = Create_SubDecoder(z3_samples, n_hidden, dim_img, keep_prob, "decoder3") y4 = Create_SubDecoder(z4_samples, n_hidden, dim_img, keep_prob, "decoder4") #dropout out y1 = y1 * mix_dropout1 y2 = y2 * mix_dropout2 y3 = y3 * mix_dropout3 y4 = y4 * mix_dropout4 y = y1 + y2+y3+y4 output = Create_FinalDecoder(y,n_hidden, dim_img, keep_prob, "final") y = output m1 = np.zeros(dim_z, dtype=np.float32) m1[:] = 0 v1 = np.zeros(dim_z, dtype=np.float32) v1[:] = 1 # p_z1 = distributions.Normal(loc=np.zeros(dim_z, dtype=np.float32), # scale=np.ones(dim_z, dtype=np.float32)) p_z1 = distributions.Normal(loc=m1, scale=v1) m2 = np.zeros(dim_z, dtype=np.float32) m2[:] = 0 v2 = np.zeros(dim_z, dtype=np.float32) v2[:] = 1 p_z2 = distributions.Normal(loc=m2, scale=v2) m3 = np.zeros(dim_z, dtype=np.float32) m3[:] = 0 v3 = np.zeros(dim_z, dtype=np.float32) v3[:] = 1 p_z3 = distributions.Normal(loc=m3, scale=v3) m4 = np.zeros(dim_z, dtype=np.float32) m4[:] = 0 v4 = np.zeros(dim_z, dtype=np.float32) v4[:] = 1 p_z4 = distributions.Normal(loc=m4, scale=v4) kl1 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z1, p_z1), 1)) kl2 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z2, p_z2), 1)) kl3 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z3, p_z3), 1)) kl4 = tf.reduce_mean(tf.reduce_sum(distributions.kl_divergence(z4, p_z4), 1)) kl1 = kl1 kl2 = kl2 kl3 = kl3 kl4 = kl4 KL_divergence = (kl1 + kl2 + kl3 + kl4) / 4.0 # loss marginal_likelihood = tf.reduce_sum(x * tf.log(y) + (1 - x) * tf.log(1 - y), 1) marginal_likelihood = tf.reduce_mean(marginal_likelihood) # KL divergence between two Dirichlet distributions a1 = tf.clip_by_value(mix_parameters, 0.1, 0.8) a2 = tf.constant((0.25,0.25,0.25,0.25), shape=(batch_size, 4)) r = tf.reduce_sum((a1 - a2) * (tf.polygamma(0.0, a1) - tf.polygamma(0.0, 1)), axis=1) a = tf.lgamma(tf.reduce_sum(a1, axis=1)) - tf.lgamma(tf.reduce_sum(a2, axis=1)) + tf.reduce_sum(tf.lgamma(a2), axis=-1) - tf.reduce_sum( tf.lgamma(a1), axis=1) + r kl = a kl = tf.reduce_mean(kl) p1 = 1 p2 = 1 p4 = 1 ELBO = marginal_likelihood - KL_divergence * p2 loss = -ELBO + kl * p1 + p4*dHSIC_Value + KL_Dropout2(dropout_a) z = z1_samples return y, z, loss, -marginal_likelihood,KL_divergence,dropout_a,dropout_samples
def create(limage, rimage, targets, state, net_type='win37_dep9'): is_training = tf.placeholder(tf.bool, [], name='is_training') with tf.name_scope('siamese_' + net_type): if net_type == 'win37_dep9': # print('orgngnewdodwnwofnofnfofno',state) # print('jjjjjjjjjjjjjjjjjjjjjj',type(state)) state1 = copy.deepcopy(state) '''for i in state1: # print(self.state) # print('remove none procedure') # print(i) isit = False for x in i: if x[0] != 'none': isit = True # print(' ------- ') # print(isit) if isit == False: # print('why') state1.pop(state1.index(i))''' state2 = copy.deepcopy(state1) #print('state1:',state1) #print('state2:',state2) lbranch = net37.create_network(state1, limage, is_training, reuse=False) # print('fegnngoneognongonegonegog',state) rbranch = net37.create_network(state2, rimage, is_training, reuse=True) elif net_type == 'win19_dep9': lbranch = net19.create_network(limage, is_training, reuse=False) rbranch = net19.create_network(rimage, is_training, reuse=True) else: sys.exit('Valid net_type: win37_dep9 or win19_dep9') prod_flatten, loss = three_pixel_error(lbranch, rbranch, targets) lrate = tf.placeholder(tf.float32, [], name='lrate') with tf.name_scope("optimizer"): global_step = tf.get_variable( "global_step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdagradOptimizer(lrate) train_step = slim.learning.create_train_op(loss, optimizer, global_step=global_step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) net = { 'lbranch': lbranch, 'rbranch': rbranch, 'loss': loss, 'inner_product': prod_flatten, 'train_step': train_step, 'is_training': is_training, 'global_step': global_step, 'lrate': lrate } return net
def discriminator(self, inputs, label, reuse=False): with tf.variable_scope("discriminator", reuse=reuse): w_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02) b_init = tf.constant_initializer(0.0) ont_hot_label = tf.one_hot(label, depth=self.num_class) label_fill = tf.reshape(ont_hot_label,shape=[self.batch_size,1,1,10])* \ tf.ones(shape=[self.batch_size,28,28,10]) input_cat = tf.concat(values=[inputs, label_fill], axis=3) conv1 = tf.layers.conv2d(input_cat, 64, [4, 4], strides=[2, 2], padding="same", kernel_initializer=w_init, bias_initializer=b_init) lrelu1 = self.leakyReLU( tf.layers.batch_normalization(conv1, training=self.is_training)) conv2 = tf.layers.conv2d(lrelu1, 128, [4, 4], strides=[2, 2], padding="same", kernel_initializer=w_init, bias_initializer=b_init) lrelu2 = self.leakyReLU( tf.layers.batch_normalization(conv2, training=self.is_training)) with tf.variable_scope("D", reuse=reuse): d_logits = tf.layers.conv2d(lrelu2, 1, [7, 7], strides=(1, 1), padding='valid', kernel_initializer=w_init) d_sigmoid = tf.nn.sigmoid(d_logits) with tf.variable_scope("Q", reuse=reuse): q_share_conv1 = tf.layers.conv2d(lrelu2, 128, [4, 4], strides=(2, 2), padding='same', kernel_initializer=w_init) q_share_lrelu1 = self.leakyReLU( tf.layers.batch_normalization(q_share_conv1, training=self.is_training)) feature_height, feature_width = q_share_lrelu1.get_shape( ).as_list()[1:3] Q_cat_logit = tf.layers.conv2d( q_share_lrelu1, self.num_class, [feature_height, feature_height], strides=(1, 1), padding='valid', kernel_initializer=w_init) Q_cat_logit = tf.squeeze(Q_cat_logit, axis=[1, 2]) return d_sigmoid, Q_cat_logit
def hwblock(inputs, num_filters, weighted_skip=False, biases_initializer=tf.constant_initializer(-1.0), kernel_size=(3, 3), stride=1, mid_stride=1, filters_ratio=4, padding='SAME', activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=None, trainable=True, scope=None, reuse=None, is_training=True): with tf.variable_op_scope([inputs], scope, 'hwblock', reuse=reuse): with slim.arg_scope([slim.conv2d], num_outputs=num_filters, kernel_size=kernel_size, stride=stride, padding=padding, activation_fn=activation_fn, weights_initializer=weights_initializer, weights_regularizer=weights_regularizer, trainable=trainable, scope=scope, reuse=reuse): h = slim.conv2d(inputs, kernel_size=(1, 1)) h = slim.batch_norm(h, decay=0.9, epsilon=0.00005, is_training=is_training) h = slim.conv2d(h, stride=mid_stride) h = slim.batch_norm(h, decay=0.9, epsilon=0.00005, is_training=is_training) h = slim.conv2d(h, num_outputs=filters_ratio * num_filters, kernel_size=(1, 1), activation_fn=None) h = slim.batch_norm(h, decay=0.9, epsilon=0.00005, is_training=is_training) t = slim.conv2d(inputs, kernel_size=(1, 1)) t = slim.batch_norm(t, decay=0.9, epsilon=0.00005, is_training=is_training) t = slim.conv2d(t, stride=mid_stride) t = slim.batch_norm(t, decay=0.9, epsilon=0.00005, is_training=is_training) t = slim.conv2d(t, num_outputs=filters_ratio * num_filters, kernel_size=(1, 1), activation_fn=tf.sigmoid, biases_initializer=biases_initializer) t = slim.batch_norm(t, decay=0.9, epsilon=0.00005, is_training=is_training) if weighted_skip or mid_stride > 1: x = slim.conv2d(inputs, num_outputs=filters_ratio * num_filters, kernel_size=(1, 1), stride=mid_stride, activation_fn=None) x = slim.batch_norm(x, decay=0.9, epsilon=0.00005, is_training=is_training) else: x = inputs outputs = (h - x) * t + x return outputs
schedule_params = ScheduleParameters() # schedule_params.improve_steps = TrainingSteps(10000000000) schedule_params.improve_steps = TrainingSteps(400) # 400 epochs schedule_params.steps_between_evaluation_periods = TrainingSteps(1) schedule_params.evaluation_steps = EnvironmentEpisodes(10) schedule_params.heatup_steps = EnvironmentSteps(DATASET_SIZE) ######### # Agent # ######### agent_params = DDQNBCQAgentParameters() agent_params.network_wrappers['main'].batch_size = 128 # TODO cross-DL framework abstraction for a constant initializer? agent_params.network_wrappers['main'].heads_parameters = [ QHeadParameters(output_bias_initializer=tf.constant_initializer(-100)) ] agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps( 100) # agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(500) agent_params.algorithm.discount = 0.99 # agent_params.algorithm.action_drop_method_parameters = KNNParameters() agent_params.algorithm.action_drop_method_parameters = NNImitationModelParameters( ) # NN configuration agent_params.network_wrappers['main'].learning_rate = 0.0001 agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False agent_params.network_wrappers['main'].softmax_temperature = 0.2
def bias_variable(shape): initializer = tf.constant_initializer(0.0) return tf.get_variable("biases", shape, initializer=initializer, dtype=tf.float32)
def train(): with tf.Graph().as_default(): with tf.device('/gpu:' + str(GPU_INDEX)): pointclouds_pl, labels_pl, direc_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT, FLAGS.normal) is_training_pl = tf.placeholder(tf.bool, shape=()) batch = tf.get_variable('batch', [], initializer=tf.constant_initializer(0), trainable=False) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn_decay', bn_decay) # Get model and loss pred = MODEL.get_model(pointclouds_pl, direc_pl, is_training_pl, bn_decay=bn_decay, use_normal=FLAGS.normal) loss = MODEL.get_loss(pred, labels_pl) reglosses = tf.get_collection('reglosses') total_loss = WEIGHT_DECAY * tf.add_n(reglosses, name='reg_loss') + loss tf.summary.scalar('total_loss', total_loss) correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl)) accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE) tf.summary.scalar('accuracy', accuracy) print("--- Get training operator") # Get training operator if OPTIMIZER == 'momentum': learning_rate = get_learning_rate(batch) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) elif OPTIMIZER == 'adam': learning_rate = get_learning_rate(batch) optimizer = tf.train.AdamOptimizer(learning_rate) elif OPTIMIZER == 'sgd': boundaries = [400000.1, 800000.1, 1200000.1, 1600000.1, 2000000.1, 2400000.1, 2800000.1, 3200000.1] lr_sgd = [0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001, 0.0003, 0.0001] step = batch*BATCH_SIZE learning_rate = tf.train.piecewise_constant(step, boundaries=boundaries, values=lr_sgd) optimizer = tf.train.GradientDescentOptimizer(learning_rate) tf.summary.scalar('learning_rate', learning_rate) train_op = optimizer.minimize(total_loss, global_step=batch) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a session config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config=config) # Add summary writers merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph) test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph) testp_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test_plus'), sess.graph) # Init variables init = tf.global_variables_initializer() ckpt = tf.train.get_checkpoint_state(LOG_DIR) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): saver.restore(sess, ckpt.model_checkpoint_path) else: sess.run(init) ops = {'pointclouds_pl': pointclouds_pl, 'dir_pl': direc_pl, 'labels_pl': labels_pl, 'is_training_pl': is_training_pl, 'pred': pred, 'loss': total_loss, 'train_op': train_op, 'merged': merged, 'step': batch} for epoch in range(MAX_EPOCH): log_string('**** EPOCH %03d ****' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) eval_one_epoch(sess, ops, test_writer, testp_writer) # Save the variables to disk. if epoch % 10 == 0: save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt")) log_string("Model saved in file: %s" % save_path)
def _bias_variable(self, shape, name='biases'): initializer = tf.constant_initializer(0.1) return tf.get_variable(name=name, shape=shape, initializer=initializer)
IMG_SHORT_SIDE_LEN = [800, 600, 1000, 1200] IMG_MAX_LENGTH = 1500 CLASS_NUM = 1 LABEL_TYPE = 0 RADUIUS = 6 OMEGA = 1 IMG_ROTATE = True RGB2GRAY = True VERTICAL_FLIP = True HORIZONTAL_FLIP = True IMAGE_PYRAMID = True # --------------------------------------------- Network_config SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) PROBABILITY = 0.01 FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) WEIGHT_DECAY = 1e-4 USE_GN = False FPN_CHANNEL = 256 # ---------------------------------------------Anchor config LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] ANCHOR_STRIDE = [8, 16, 32, 64, 128] ANCHOR_SCALES = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] ANCHOR_RATIOS = [1, 1 / 2, 2., 1 / 3., 3., 5., 1 / 5.] ANCHOR_ANGLES = [-90, -75, -60, -45, -30, -15] ANCHOR_SCALE_FACTORS = None USE_CENTER_OFFSET = True
def inference_nolx227(x, pkeep): # Encoding phase #fer2013: we use 1 channel : #x = tf.image.rgb_to_hsv(x) #1st conv. image with tf.variable_scope('conv1') as scope: x_image = tf.reshape(x, [-1, 227, 227, 1]) #dropout input image x_image = tf.nn.dropout(x_image,keep_prob=pkeep) kernel = _variable_with_weight_decay('weights',shape=[7,7,1,96],stddev=1e-0,wd=None) conv = tf.nn.conv2d(x_image,kernel,[1,4,4,1],padding='SAME') biases = _variable_on_cpu('biases',[96],tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) print('>> conv1=',conv1) pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1], strides=[1,2,2,1],padding='SAME',name='pool1') print('>> pool1=', pool1) #dropout pool1 = tf.nn.dropout(pool1,keep_prob=pkeep) norm1 = tf.nn.local_response_normalization(pool1, bias = 1.0, depth_radius=5, alpha=0.0001, beta=0.75,name='norm1') print('>>>>> norm1',norm1) #2nd conv. image with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights',shape=[5,5,96,256],stddev=1e-2,wd=None) conv = tf.nn.conv2d(norm1, kernel, [1,1,1,1], padding='SAME') biases = _variable_on_cpu('biases',[256], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv,biases) conv2 = tf.nn.relu(pre_activation,name=scope.name) print('>> conv2=', conv2) pool2= tf.nn.max_pool(conv2, ksize=[1,3,3,1], strides=[1,2,2,1],padding='SAME',name='pool2') print('>> pool2=', pool2) #dropout pool2 = tf.nn.dropout(pool2,keep_prob=pkeep) norm2 = tf.nn.local_response_normalization(pool2,bias=1.0,depth_radius=5,alpha=0.0001,beta=0.75,name='norm2') print('>>>>> norm2 =', norm2) #3rd conv. image with tf.variable_scope('conv3') as scope: kernel = _variable_with_weight_decay('weights',shape=[3,3,256,384],stddev=1e-2,wd=None) conv = tf.nn.conv2d(norm2, kernel, [1,1,1,1], padding='SAME') biases = _variable_on_cpu('biases',[384], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(pre_activation, name=scope.name) print('>>>>>> conv3=',conv3) pool3 = tf.nn.max_pool(conv3, ksize=[1,3,3,1], strides=[1,2,2,1],padding='SAME',name='pool3') print('>>>>pool3 =', pool3) pool3 = tf.nn.dropout(pool3,keep_prob=pkeep) # fully connected with tf.variable_scope('full4') as scope: reshape = tf.reshape(pool3, [x_image.get_shape()[0],-1 ] ) dim = reshape.get_shape()[1].value print('>>>>> reshape = ', reshape,'dim=',dim) weights = _variable_with_weight_decay('weights',shape=[dim,512],stddev=5e-3,wd=0.004) #wd?? print('>>>>> weights =', weights) biases = _variable_on_cpu('biases',[512], tf.constant_initializer(1.0)) full4 = tf.nn.relu(tf.matmul(reshape,weights) + biases, name=scope.name) print('>>>>> full4 = ', full4) drop4 = tf.nn.dropout(full4,keep_prob=pkeep) print('>>>> drop4 =', drop4) #fully connected # fc5 = FullConnected(drop6, 512, 512, activation='relu') # fc5_out = fc5.output() with tf.variable_scope('full5') as scope: dim = drop4.get_shape()[1].value print('>>>> full5 >>>>> dim = ', dim) weights = _variable_with_weight_decay('weights',shape=[dim,512],stddev=5e-3,wd=0.004) #wd?? print('>>>> full5 >>>>> weights = ', weights) biases = _variable_on_cpu('biases',[512], tf.constant_initializer(1.0)) full5 = tf.nn.relu(tf.matmul(drop4,weights) + biases, name=scope.name) print('>>>> full5 >>>>> full5 = ', full5) drop5 = tf.nn.dropout(full5,keep_prob=pkeep) #fully connected # fc8 = FullConnected(drop7, 512, 8, activation='relu') # fc8_out = fc8.output() with tf.variable_scope('softmax_linear') as scope: dim = drop5.get_shape()[1].value print('>>>> softmax_linear >>>>> dim = ', dim) weights = _variable_with_weight_decay('weights',shape=[512,NUM_CLASSES],stddev=1e-2,wd=None) print('>>>> softmax_linear >>>>> weights = ', weights) biases = _variable_on_cpu('biases',[NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(drop5,weights), biases, name=scope.name) print('>>>> softmax_linear >>>>> = ', softmax_linear) _activation_summary(softmax_linear) return softmax_linear
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # Create model directory if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) use_pretrained_model = True #model_filename = "./sports1m_finetuning_ucf101.model" model_filename = FLAGS.model_metagraph #with tf.Graph().as_default(): with tf.variable_scope("C3D"): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) #images_placeholder, labels_placeholder = placeholder_inputs(FLAGS.batch_size * gpu_num) images_placeholder, labels_placeholder = placeholder_inputs() tower_grads1 = [] tower_grads2 = [] logits = [] opt_stable = tf.train.AdamOptimizer(1e-4) opt_finetuning = tf.train.AdamOptimizer(1e-3) with tf.variable_scope('var_name') as var_scope: weights = { 'wc1': _variable_with_weight_decay('wc1', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 64], 0.0005), 'wc2': _variable_with_weight_decay('wc2', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 64, 128], 0.0005), 'wc3a': _variable_with_weight_decay('wc3a', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 128, 256], 0.0005), 'wc3b': _variable_with_weight_decay('wc3b', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 256, 256], 0.0005), 'wc4a': _variable_with_weight_decay('wc4a', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 256, 512], 0.0005), 'wc4b': _variable_with_weight_decay('wc4b', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 512, 512], 0.0005), 'wc5a': _variable_with_weight_decay('wc5a', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 512, 512], 0.0005), 'wc5b': _variable_with_weight_decay('wc5b', [c3d_model.CHANNELS, c3d_model.CHANNELS, c3d_model.CHANNELS, 512, 512], 0.0005), 'wd1': _variable_with_weight_decay('wd1', [8192, 4096], 0.0005), 'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.0005), 'out': _variable_with_weight_decay('wout', [4096, len(input_data.DS_CLASSES)], 0.0005) } biases = { 'bc1': _variable_with_weight_decay('bc1', [64], 0.000), 'bc2': _variable_with_weight_decay('bc2', [128], 0.000), 'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000), 'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000), 'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000), 'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000), 'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000), 'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000), 'bd1': _variable_with_weight_decay('bd1', [4096], 0.000), 'bd2': _variable_with_weight_decay('bd2', [4096], 0.000), 'out': _variable_with_weight_decay('bout', [len(input_data.DS_CLASSES)], 0.000), } for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): varlist2 = [ weights['out'],biases['out'] ] varlist1 = list( set(weights.values()) | set(biases.values()) - set(varlist2) ) logit = c3d_model.inference_c3d(images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size,:,:,:,:], 0.5, #FLAGS.batch_size, weights, biases) loss_name_scope = ('gpud_%d_loss' % gpu_index) loss = tower_loss(loss_name_scope, logit, tf.cast(labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size], tf.float32)) loss_rm = tf.reduce_mean(loss) grads1 = opt_stable.compute_gradients(loss, varlist1) grads2 = opt_finetuning.compute_gradients(loss, varlist2) tower_grads1.append(grads1) tower_grads2.append(grads2) logits.append(logit) logits = tf.concat(logits,0) #accuracy = tower_acc(logits, tf.cast(labels_placeholder, tf.float32)) with tf.variable_scope("metrics"): # TODO: use _predict function: #sigm_logits = tf.sigmoid(logits) #predictions = tf.round((tf.sign(sigm_logits - tf.reduce_mean(sigm_logits, axis=1, keepdims=True) * 1.2) + 1) / 2) predictions = _predict(logits, 1.5) accuracy, accuracy_update_op = tf.metrics.accuracy(labels_placeholder, predictions) precision, precision_update_op = tf.metrics.precision(labels_placeholder, predictions) recall, recall_update_op = tf.metrics.recall(labels_placeholder, predictions) #accuracy, accuracy_update_op = tf.metrics.accuracy(labels_placeholder, tf.round(tf.sigmoid(logits))) #precision, precision_update_op = tf.metrics.precision(labels_placeholder, tf.round(tf.sigmoid(logits))) #recall, recall_update_op = tf.metrics.recall(labels_placeholder, tf.round(tf.sigmoid(logits))) f1score = 2 * precision * recall / (precision + recall) tf.summary.scalar("accuracy", accuracy) tf.summary.scalar("precision", precision) tf.summary.scalar("recall", recall) tf.summary.scalar("f1score", f1score) grads1 = average_gradients(tower_grads1) grads2 = average_gradients(tower_grads2) apply_gradient_op1 = opt_stable.apply_gradients(grads1) apply_gradient_op2 = opt_finetuning.apply_gradients(grads2, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op) null_op = tf.no_op() # Create a saver for writing training checkpoints. saver = tf.train.Saver(list(weights.values()) + list(biases.values())) init = tf.global_variables_initializer() metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="C3D/metrics") metrics_vars_init = tf.variables_initializer(var_list=metrics_vars) #init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create a session for running Ops on the Graph. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) if FLAGS.mode == "train": print("Restoring [TRAIN]: starting from scratch.") #print("Restoring [TRAIN]", model_filename, os.path.isfile(model_filename)) #if os.path.isfile(model_filename) and use_pretrained_model: # saver.restore(sess, model_filename) else: print("Restoring [EVAL]", model_filename, os.path.isfile(model_filename)) saver = tf.train.import_meta_graph(model_filename) print(model_filename[:model_filename.rindex("/")]) saver.restore(sess, tf.train.latest_checkpoint(model_filename[:model_filename.rindex("/")])) # Create summary writter if FLAGS.mode == "train": merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('./visual_logs/train', sess.graph) test_writer = tf.summary.FileWriter('./visual_logs/test', sess.graph) # TODO: pass these as args: videos_folder = "datasets/Dataset_PatternRecognition/H3.6M" train_json_filename = "datasets/Dataset_PatternRecognition/json/dataset_training.json" train_npz_filename = "datasets/Dataset_PatternRecognition/npz/dataset_training.npz" val_json_filename = "datasets/Dataset_PatternRecognition/json/dataset_testing.json" val_npz_filename = "datasets/Dataset_PatternRecognition/npz/dataset_testing.npz" # Generate datasets: if FLAGS.mode == "train": train_X, train_y = _generate_dataset(train_npz_filename, sess, videos_folder, train_json_filename) val_X, val_y = _generate_dataset(val_npz_filename, sess, videos_folder, val_json_filename) # Uncomment to remove original images from dataset: # (remeber to set c3d_model.CHANNELS to 5) #if FLAGS.mode == "train": # train_X = train_X[:,:,:,:,3:] #val_X = val_X[:,:,:,:,3:] # Train the network and compute metrics on train a val sets: batch_size = FLAGS.batch_size * gpu_num if FLAGS.mode == "train": for epoch in range(FLAGS.epochs): print("Epoch {}/{}:".format(epoch+1, FLAGS.epochs)) # Reset metrics: sess.run(metrics_vars_init) # Iterate through training set: rand_indices = np.random.randint(train_X.shape[0], size=train_X.shape[0]) for idx in range(0, train_X.shape[0], batch_size): # Extract the following batch_size indices: L = min(idx+batch_size, train_X.shape[0]) train_images = _preprocess_data(train_X[rand_indices[idx:L]]) train_labels = train_y[rand_indices[idx:L]] # Update metrics and get results: sess.run([train_op, accuracy_update_op, precision_update_op, recall_update_op], feed_dict={images_placeholder: train_images, labels_placeholder: train_labels}) summary, train_curr_loss, train_curr_accuracy, train_curr_precision, train_curr_recall, train_curr_f1score = \ sess.run([merged, loss_rm, accuracy, precision, recall, f1score], feed_dict={images_placeholder: train_images, labels_placeholder: train_labels}) # Print results: print("Progress: {}/{} - train_loss: {:2.3} - train_accuracy: {:2.3} - " "train_precision: {:2.3} - train_recall: {:2.3} - train_f1score: {:2.3}" .format(L, train_X.shape[0], train_curr_loss, train_curr_accuracy, train_curr_precision, train_curr_recall, train_curr_f1score), end="\r") print("") # Save metrics to TensorBoard: train_writer.add_summary(summary, epoch+1) # Reset metrics: sess.run(metrics_vars_init) # Iterate through validation set: for idx in range(0, val_X.shape[0], batch_size): # Extract the following batch_size indices: L = min(idx+batch_size, val_X.shape[0]) val_images = _preprocess_data(val_X[idx:L]) val_labels = val_y[idx:L] # Update metrics and get results: sess.run([accuracy_update_op, precision_update_op, recall_update_op], feed_dict={images_placeholder: val_images, labels_placeholder: val_labels}) summary, val_curr_loss, val_curr_accuracy, val_curr_precision, val_curr_recall, val_curr_f1score = \ sess.run([merged, loss_rm, accuracy, precision, recall, f1score], feed_dict={images_placeholder: val_images, labels_placeholder: val_labels}) # Print results: print("Progress: {}/{} - val_loss: {:2.3} - val_accuracy: {:2.3} - " "val_precision: {:2.3} - val_recall: {:2.3} - val_f1score: {:2.3}" .format(L, val_X.shape[0], val_curr_loss, val_curr_accuracy, val_curr_precision, val_curr_recall, val_curr_f1score), end="\r") print("") # Save metrics to TensorBoard: test_writer.add_summary(summary, epoch+1) # Save checkpoint: saver.save(sess, os.path.join(model_save_dir, 'c3d_ucf_model'), global_step=epoch+1) print("done") else: # Reset metrics: sess.run(metrics_vars_init) # Iterate through validation set: for idx in range(0, val_X.shape[0], batch_size): # Extract the following batch_size indices: L = min(idx+batch_size, val_X.shape[0]) val_images = _preprocess_data(val_X[idx:L]) val_labels = val_y[idx:L] # Update metrics and get results: sess.run([accuracy_update_op, precision_update_op, recall_update_op], feed_dict={images_placeholder: val_images, labels_placeholder: val_labels}) val_curr_loss, val_curr_accuracy, val_curr_precision, val_curr_recall, val_curr_f1score = \ sess.run([loss_rm, accuracy, precision, recall, f1score], feed_dict={images_placeholder: val_images, labels_placeholder: val_labels}) # Print results: print("Progress: {}/{} - val_loss: {:2.3} - val_accuracy: {:2.3} - " "val_precision: {:2.3} - val_recall: {:2.3} - val_f1score: {:2.3}" .format(L, val_X.shape[0], val_curr_loss, val_curr_accuracy, val_curr_precision, val_curr_recall, val_curr_f1score), end="\r") print("")
def spigot(x): if preinitialize: scale = 1 # changing this to 0.125 will still work, and learn. But, 0.0625 will _not_! (for sequence_sz=8) return tf.constant_initializer(scale * np.array(x)) else: return None
def run_training(): # Get the sets of images and labels for training, validation, and # Tell TensorFlow that the model will be built into the default Graph. # Create model directory if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) use_pretrained_model = True model_filename = "./conv3d_deepnetA_sport1m_iter_1900000_TF.model" with tf.Graph().as_default(): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) images_placeholder, labels_placeholder, embeddings_placeholder = placeholder_inputs( FLAGS.batch_size * gpu_num) tower_grads1 = [] tower_grads2 = [] logits = [] opt_stable = tf.train.AdamOptimizer(1e-5) opt_finetuning = tf.train.AdamOptimizer(1e-4) with tf.variable_scope('var_name') as var_scope: weights = { 'wc1': _variable_with_weight_decay('wc1', [3, 3, 3, 3, 64], 0.000), 'wc2': _variable_with_weight_decay('wc2', [3, 3, 3, 64, 128], 0.000), 'wc3a': _variable_with_weight_decay('wc3a', [3, 3, 3, 128, 256], 0.000), 'wc3b': _variable_with_weight_decay('wc3b', [3, 3, 3, 256, 256], 0.000), 'wc4a': _variable_with_weight_decay('wc4a', [3, 3, 3, 256, 512], 0.000), 'wc4b': _variable_with_weight_decay('wc4b', [3, 3, 3, 512, 512], 0.000), 'wc5a': _variable_with_weight_decay('wc5a', [3, 3, 3, 512, 512], 0.000), 'wc5b': _variable_with_weight_decay('wc5b', [3, 3, 3, 512, 512], 0.000), 'wd1': _variable_with_weight_decay( 'wd1', [8192 + c3d_model.TOTAL_EMBEDDING_DIM, 4096], 0.000), 'wd2': _variable_with_weight_decay('wd2', [4096, 4096], 0.000), 'out': _variable_with_weight_decay('wout', [4096, c3d_model.NUM_CLASSES], 0.000) } biases = { 'bc1': _variable_with_weight_decay('bc1', [64], 0.000), 'bc2': _variable_with_weight_decay('bc2', [128], 0.000), 'bc3a': _variable_with_weight_decay('bc3a', [256], 0.000), 'bc3b': _variable_with_weight_decay('bc3b', [256], 0.000), 'bc4a': _variable_with_weight_decay('bc4a', [512], 0.000), 'bc4b': _variable_with_weight_decay('bc4b', [512], 0.000), 'bc5a': _variable_with_weight_decay('bc5a', [512], 0.000), 'bc5b': _variable_with_weight_decay('bc5b', [512], 0.000), 'bd1': _variable_with_weight_decay('bd1', [4096], 0.000), 'bd2': _variable_with_weight_decay('bd2', [4096], 0.000), 'out': _variable_with_weight_decay('bout', [c3d_model.NUM_CLASSES], 0.000), } for gpu_index in range(0, gpu_num): with tf.device('/gpu:%d' % gpu_index): varlist2 = [ weights['wd1'], weights['wd2'], weights['out'], biases['bd1'], biases['bd2'], biases['out'] ] varlist1 = list((set(weights.values()) | set(biases.values())) - set(varlist2)) logit = c3d_model.transfer_c3d( images_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :, :, :], embeddings_placeholder[ gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size, :, :], # FIX THIS? 0.5, FLAGS.batch_size, weights, biases) # logit = tf.matmul(pre_logit, weights['out2']) + biases['out2'] loss_name_scope = ('gpud_%d_loss' % gpu_index) loss = tower_loss( loss_name_scope, logit, labels_placeholder[gpu_index * FLAGS.batch_size:(gpu_index + 1) * FLAGS.batch_size]) grads1 = opt_stable.compute_gradients(loss, varlist1) grads2 = opt_finetuning.compute_gradients(loss, varlist2) tower_grads1.append(grads1) tower_grads2.append(grads2) logits.append(logit) logits = tf.concat(logits, 0) accuracy = tower_acc(logits, labels_placeholder) tf.summary.scalar('accuracy', accuracy) grads1 = average_gradients(tower_grads1) grads2 = average_gradients(tower_grads2) apply_gradient_op1 = opt_stable.apply_gradients(grads1) apply_gradient_op2 = opt_finetuning.apply_gradients( grads2, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op1, apply_gradient_op2, variables_averages_op) null_op = tf.no_op() # Create a saver for writing training checkpoints. # Only load pre-activation weights. saver = tf.train.Saver(varlist1) init = tf.global_variables_initializer() # Create a session for running Ops on the Graph. sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(init) if os.path.isfile(model_filename) and use_pretrained_model: saver.restore(sess, model_filename) # Create summary writter merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( './visual_logs_classifier/train_strat_5fps_embed_%s' % experiment_id, sess.graph) test_writer = tf.summary.FileWriter( './visual_logs_classifier/test_strat_5fps_embed_%s' % experiment_id, sess.graph) # used later as a substitute value for the embeddings # rand_embeddings = tf.random_normal((FLAGS.batch_size, c3d_model.NUM_FRAMES_PER_CLIP, c3d_model.EMBEDDING_DIM)) for step in xrange(FLAGS.max_steps): start_time = time.time() train_images, train_labels, train_embeddings, _, _, _ = input_data.read_clip_and_label( filename='list/s5_train_%s.list' % experiment_id, batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP, crop_size=c3d_model.CROP_SIZE, embeddings=True, shuffle=True) sess.run(train_op, feed_dict={ images_placeholder: train_images, labels_placeholder: train_labels, embeddings_placeholder: train_embeddings }) duration = time.time() - start_time print('Step %d: %.3f sec' % (step, duration)) # Save a checkpoint and evaluate the model periodically. if (step) % 10 == 0 or (step + 1) == FLAGS.max_steps: saver.save( sess, os.path.join( model_save_dir, 'strat_5_embed_ucf_model_%s/model' % experiment_id), global_step=step) print('Training Data Eval:') summary, acc = sess.run( [merged, accuracy], feed_dict={ images_placeholder: train_images, labels_placeholder: train_labels, embeddings_placeholder: train_embeddings }) # print(train_embeddings) print("accuracy: " + "{:.5f}".format(acc)) train_writer.add_summary(summary, step) print('Validation Data Eval:') val_images, val_labels, val_embeddings, _, _, _ = input_data.read_clip_and_label( filename='list/s5_dev_%s.list' % experiment_id, batch_size=FLAGS.batch_size * gpu_num, num_frames_per_clip=c3d_model.NUM_FRAMES_PER_CLIP, crop_size=c3d_model.CROP_SIZE, embeddings=True, shuffle=True) summary, acc = sess.run( [merged, accuracy], feed_dict={ images_placeholder: val_images, labels_placeholder: val_labels, embeddings_placeholder: val_embeddings }) print("accuracy: " + "{:.5f}".format(acc)) test_writer.add_summary(summary, step) print("done")
def prelu(x): alpha = tf.get_variable('alpha', shape=x.get_shape()[-1], dtype=x.dtype, initializer=tf.constant_initializer(0.1)) return tf.maximum(0.0, x) + alpha * tf.minimum(0.0, x)
def inference(images, eval=False): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ # We instantiate all variables using tf.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.get_variable() with tf.Variable(). # # conv1 with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') print("*********Conv1 Shape Post Conv***********: \n", conv.get_shape()) biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) print("*********Conv1 Shape pre_activation***********: \n", pre_activation.get_shape()) conv1 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv1) print("*********Conv1 Shape Post Activation***********: \n", conv1.get_shape()) # pool1 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') print("*********Pool1 Shape***********: \n", pool1.get_shape()) # norm1 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # conv2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv2) print("*********Conv2 Shape***********: \n", conv2.get_shape()) # norm2 norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # pool2 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') print("*********Pool2 Shape***********: \n", pool2.get_shape()) # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. reshape = tf.reshape(pool2, [FLAGS.batch_size, -1]) dim = reshape.get_shape()[1].value print("*********Local3 Re-Shape***********: \n", dim) weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) _activation_summary(local3) print("*********Local3 Out Shape***********: \n", local3.get_shape()) # local4 with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) _activation_summary(local4) print("*********Local4 Shape***********: \n", local4.get_shape()) # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. with tf.variable_scope('softmax_linear') as scope: weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=0.0) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) _activation_summary(softmax_linear) #Check if inference is for eval(), if during an eval step normalize the logits! if eval: softmax_linear = tf.nn.softmax(softmax_linear) return softmax_linear
def inference(input_tensor, train, regularizer): with tf.variable_scope('layer1-conv1'): conv1_weights = tf.get_variable( "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable( "bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) with tf.name_scope("layer2-pool1"): pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") with tf.variable_scope("layer3-conv2"): conv2_weights = tf.get_variable( "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable( "bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) with tf.name_scope("layer4-pool2"): pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2, [pool_shape[0], nodes]) with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable( "weight", [nodes, FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights)) fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1, 0.5) with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable( "weight", [FC_SIZE, NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1)) logit = tf.matmul(fc1, fc2_weights) + fc2_biases return logit
def variable_b(self, shape, initial = 0.01): b = tf.get_variable('b', shape = shape, initializer = tf.constant_initializer(initial)) return b
def bias_variable(shape,name,dtype): return (tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))
return '/job:{}/replica:0/task:0/device:{}:0'.format( job_name, short_name.upper()) def device_fn(n): if 'Variable' in n.type: return long_device_name('CPU') else: return long_device_name('GPU') with tf.device(device_fn): x = tf.get_variable('x', shape=(), dtype=tf.int32, initializer=tf.constant_initializer(5)) y = tf.get_variable('y', shape=(), dtype=tf.int32, initializer=tf.constant_initializer(3)) z = x + y # Creates a single-process cluster, with an in-process server. server = tf.train.Server({job_name: ['localhost:1234']}) with tf.Session(server.target) as sess: sess.run(tf.global_variables_initializer()) print('Result: z = ', sess.run(z)) print('Available devices:') pprint(sess.list_devices())
def combine_noise(latent, ladder, name="default"): gate = tf.get_variable("gate", shape=latent.get_shape()[1:], initializer=tf.constant_initializer(0.1)) return latent + tf.multiply(gate, ladder)
[x + '*' for x in ALL_GUIDES]))).flatten().tolist() print(TRAIN_MIRS) init_params = [ -4.0, # FREEAGO_INIT, 0.0, # GUIDE_OFFSET_INIT, -1.0, # PASS_OFFSET_INIT, -0.5, # DECAY_INIT, -8.5, # UTR_COEF_INIT ] _freeAGO_mean = tf.get_variable('freeAGO_mean', shape=(), initializer=tf.constant_initializer( init_params[0])) _freeAGO_guide_offset = tf.get_variable( 'freeAGO_guide_offset', shape=[NUM_TRAIN, 1], initializer=tf.constant_initializer(np.arange(NUM_TRAIN).reshape([-1, 1]))) _freeAGO_pass_offset = tf.get_variable('freeAGO_pass_offset', shape=[NUM_TRAIN, 1], initializer=tf.constant_initializer( init_params[2])) _freeAGO_all = tf.reshape(tf.concat([ _freeAGO_guide_offset + _freeAGO_mean, _freeAGO_pass_offset + _freeAGO_mean ], axis=1), [NUM_TRAIN * 2], name='freeAGO_all') filename = "/lab/bartel4_ata/kathyl/RNA_Seq/outputs/convnet/tfrecords/guide_passenger_only_canon.tfrecord"
def __init__(self, config, scope='MTNet'): self.config = config with tf.variable_scope(scope, reuse=False): X = tf.placeholder( tf.float32, shape=[None, self.config.n, self.config.T, self.config.D]) Q = tf.placeholder(tf.float32, shape=[None, self.config.T, self.config.D]) Y = tf.placeholder(tf.float32, shape=[None, self.config.K]) lr = tf.placeholder(tf.float32) input_keep_prob = tf.placeholder(tf.float32) output_keep_prob = tf.placeholder(tf.float32) # ------- no-linear component---------------- last_rnn_hid_size = self.config.en_rnn_hidden_sizes[-1] # <batch_size, n, en_rnn_hidden_sizes> m_is = self.__encoder(X, self.config.n, scope='m') c_is = self.__encoder(X, self.config.n, scope='c') # <batch_size, 1, en_rnn_hidden_sizes> u = self.__encoder(tf.reshape( Q, shape=[-1, 1, self.config.T, self.config.D]), 1, scope='in') p_is = tf.matmul(m_is, tf.transpose(u, perm=[0, 2, 1])) # using softmax p_is = tf.squeeze(p_is, axis=[-1]) p_is = tf.nn.softmax(p_is) # <batch_size, n, 1> p_is = tf.expand_dims(p_is, -1) # using sigmoid # p_is = tf.nn.sigmoid(p_is) # for summary # p_is_mean, _ = tf.metrics.mean_tensor(p_is, updates_collections = 'summary_ops', name = 'p_is') # tf.summary.histogram('p_is', p_is_mean) # <batch_size, n, en_rnn_hidden_sizes> = <batch_size, n, en_rnn_hidden_sizes> * <batch_size, n, 1> o_is = tf.multiply(c_is, p_is) pred_w = tf.get_variable( 'pred_w', shape=[last_rnn_hid_size * (self.config.n + 1), self.config.K], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) pred_b = tf.get_variable('pred_b', shape=[self.config.K], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) pred_x = tf.concat([o_is, u], axis=1) pred_x = tf.reshape( pred_x, shape=[-1, last_rnn_hid_size * (self.config.n + 1)]) # <batch_size, D> y_pred = tf.matmul(pred_x, pred_w) + pred_b # ------------ ar component ------------ with tf.variable_scope('AutoRegression'): if self.config.highway_window > 0: highway_ws = tf.get_variable( 'highway_ws', shape=[ self.config.highway_window * self.config.D, self.config.K ], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.1)) highway_b = tf.get_variable( 'highway_b', shape=[self.config.K], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) highway_x = tf.reshape( Q[:, -self.config.highway_window:], shape=[-1, self.config.highway_window * self.config.D]) y_pred_l = tf.matmul(highway_x, highway_ws) + highway_b # y_pred_l = tf.matmul(Q[:, -1], highway_ws[0]) + highway_b # _, y_pred_l = tf.while_loop(lambda i, _ : tf.less(i, self.config.highway_window), # lambda i, acc : (i + 1, tf.matmul(Q[:, self.config.T - i - 1], highway_ws[i]) + acc), # loop_vars = [1, y_pred_l]) y_pred += y_pred_l # metrics summary # mae_loss, _ = tf.metrics.mean_absolute_error(Y, y_pred, updates_collections = 'summary_ops', name = 'mae_metric') # tf.summary.scalar('mae_loss', mae_loss) rmse_loss, _ = tf.metrics.root_mean_squared_error( Y, y_pred, updates_collections='summary_ops', name='rmse_metric') tf.summary.scalar("rmse_loss", rmse_loss) statistics_vars = tf.get_collection(tf.GraphKeys.METRIC_VARIABLES) statistics_vars_initializer = tf.variables_initializer( var_list=statistics_vars) loss = tf.losses.absolute_difference(Y, y_pred) with tf.name_scope('Train'): train_op = tf.train.AdamOptimizer(lr).minimize(loss) # assignment self.X = X self.Q = Q self.Y = Y self.input_keep_prob = input_keep_prob self.output_keep_prob = output_keep_prob self.lr = lr self.y_pred = y_pred self.loss = loss self.train_op = train_op self.reset_statistics_vars = statistics_vars_initializer self.merged_summary = tf.summary.merge_all() self.summary_updates = tf.get_collection('summary_ops')
def interfence(input_tensor, train, regularizer): #layer1---input_tensor() with tf.variable_scope("layer1-conv1"): conv1_weights = tf.get_variable("weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) conv1_biases = tf.get_variable( "bias", [CONV1_DEEP], initializer=tf.constant_initializer(STD_MEAN, dtype=tf.float32)) conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) with tf.name_scope('layer1-pool1'): pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # pool1 = tf.nn.lrn(pool1, 4, bias=1., alpha=0.001/9., beta=0.75) #layer2 with tf.variable_scope('layer2-conv1'): conv2_weights = tf.get_variable("weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) conv2_biases = tf.get_variable( 'bias', [CONV2_DEEP], initializer=tf.constant_initializer(STD_MEAN, dtype=tf.float32)) conv2 = tf.nn.conv2d( pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) with tf.name_scope('layer2-pool2'): pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # pool2 = tf.nn.lrn(pool2, 4, bias=1., alpha=0.001 / 9., beta=0.75) #layer3 with tf.variable_scope('layer3-conv1'): conv3_weights = tf.get_variable("weight", [CONV3_SIZE, CONV3_SIZE, CONV2_DEEP, CONV3_DEEP], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) conv3_biases = tf.get_variable( 'bias', [CONV3_DEEP], initializer=tf.constant_initializer(STD_MEAN, dtype=tf.float32)) conv3 = tf.nn.conv2d( pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases)) with tf.name_scope('layer3-pool1'): pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # pool3 = tf.nn.lrn(pool3, 4, bias=1., alpha=0.001/9., beta=0.75) #layer4 with tf.variable_scope('layer4-conv1'): conv4_weights = tf.get_variable("weight", [CONV4_SIZE, CONV4_SIZE, CONV3_DEEP, CONV4_DEEP], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) conv4_biases = tf.get_variable( 'bias', [CONV4_DEEP], initializer=tf.constant_initializer(STD_MEAN, dtype=tf.float32)) conv4 = tf.nn.conv2d( pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME') relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases)) # relu4 = tf.nn.lrn(relu4, 4, bias=1., alpha=0.001 / 9., beta=0.75) with tf.name_scope('layer4-pool1'): pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #layer5 pool_shape = pool4.get_shape().as_list() #pool_shape = pool2.__getattribute__("shape").as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool4, [pool_shape[0], nodes]) with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable( "weight", [nodes, FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights)) fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(STD_MEAN)) fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1, 0.5) #layer7 with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable( "weight", [FC_SIZE, NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=STD_DEV)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(STD_MEAN)) logit = tf.matmul(fc1, fc2_weights) + fc2_biases return logit
def __init__(self, config, batch, word_mat=None, char_mat=None, trainable=True, opt=True, demo=False, graph=None): self.config = config self.demo = demo self.graph = graph if graph is not None else tf.Graph() with self.graph.as_default(): self.global_step = tf.get_variable( 'global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) self.dropout = tf.placeholder_with_default(0.0, (), name="dropout") if self.demo: self.c = tf.placeholder(tf.int32, [None, config.test_para_limit], "context") self.q = tf.placeholder(tf.int32, [None, config.test_ques_limit], "question") self.ch = tf.placeholder( tf.int32, [None, config.test_para_limit, config.char_limit], "context_char") self.qh = tf.placeholder( tf.int32, [None, config.test_ques_limit, config.char_limit], "question_char") self.y1 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index1") self.y2 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index2") else: self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next( ) # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer()) self.word_mat = tf.get_variable("word_mat", initializer=tf.constant( word_mat, dtype=tf.float32), trainable=False) self.char_mat = tf.get_variable("char_mat", initializer=tf.constant( char_mat, dtype=tf.float32)) self.c_mask = tf.cast(self.c, tf.bool) self.q_mask = tf.cast(self.q, tf.bool) self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1) self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1) if opt: # 利用batch中最长的文本,提取数据。节省资源 N, CL = config.batch_size if not self.demo else 1, config.char_limit self.c_maxlen = tf.reduce_max(self.c_len) self.q_maxlen = tf.reduce_max(self.q_len) # tf.slice: Extracts a slice from a tensor. # paras: input, begin, size self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen]) self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen]) self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen]) self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen]) self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL]) self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL]) self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen]) self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen]) else: self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit self.ch_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32), axis=2), [-1]) self.qh_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32), axis=2), [-1]) self.forward() total_params() if trainable: self.lr = tf.minimum( config.learning_rate, 0.001 / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1)) # 在深度学习笔记中,beta1,beta2,epsilon的值一般是0.9,0.999, 1e-8 self.opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.8, beta2=0.999, epsilon=1e-7) grads = self.opt.compute_gradients(self.loss) gradients, variables = zip(*grads) capped_grads, _ = tf.clip_by_global_norm( gradients, config.grad_clip) self.train_op = self.opt.apply_gradients( zip(capped_grads, variables), global_step=self.global_step)
def alexnet_v2(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='alexnet_v2'): """AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=[end_points_collection]): net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') net = slim.conv2d(net, 192, [5, 5], scope='conv2') net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') net = slim.conv2d(net, 384, [3, 3], scope='conv3') net = slim.conv2d(net, 384, [3, 3], scope='conv4') net = slim.conv2d(net, 256, [3, 3], scope='conv5') net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with slim.arg_scope([slim.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=tf.constant_initializer(0.1)): net = slim.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=tf.zeros_initializer, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict(end_points_collection) if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def __init__(self, args): inputs = tf.placeholder(shape=(args.batch_size, None), dtype=tf.int32, name='inputs') mask = tf.placeholder(shape=(args.batch_size, None), dtype=tf.float32, name='inputs_mask') seq_length = tf.placeholder(shape=args.batch_size, dtype=tf.float32, name='seq_length') self.input_form = [inputs, mask, seq_length] encoder_inputs = inputs decoder_inputs = tf.concat( [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs], axis=1) decoder_targets = tf.concat( [inputs, tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)], axis=1) decoder_mask = tf.concat( [mask, tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)], axis=1) x_size = out_size = args.map_size[0] * args.map_size[1] embeddings = tf.Variable(tf.random_uniform( [x_size, args.x_latent_size], -1.0, 1.0), dtype=tf.float32) encoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, encoder_inputs) decoder_inputs_embedded = tf.nn.embedding_lookup( embeddings, decoder_inputs) with tf.variable_scope("encoder"): encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) _, encoder_final_state = tf.nn.dynamic_rnn( encoder_cell, encoder_inputs_embedded, sequence_length=seq_length, dtype=tf.float32, ) mu_w = tf.get_variable("mu_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) mu_b = tf.get_variable("mu_b", [args.rnn_size], tf.float32, initializer=tf.constant_initializer(0.0)) sigma_w = tf.get_variable("sigma_w", [args.rnn_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) sigma_b = tf.get_variable("sigma_b", [args.rnn_size], tf.float32, initializer=tf.constant_initializer(0.0)) mu = tf.matmul(encoder_final_state, mu_w) + mu_b log_sigma_sq = tf.matmul(encoder_final_state, sigma_w) + sigma_b eps = tf.random_normal(shape=tf.shape(log_sigma_sq), mean=0, stddev=1, dtype=tf.float32) if args.eval: z = tf.zeros(shape=(args.batch_size, args.rnn_size), dtype=tf.float32) else: z = mu + tf.sqrt(tf.exp(log_sigma_sq)) * eps self.batch_post_embedded = z with tf.variable_scope("decoder"): decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size) decoder_init_state = z decoder_outputs, _ = tf.nn.dynamic_rnn( decoder_cell, decoder_inputs_embedded, initial_state=decoder_init_state, sequence_length=seq_length, dtype=tf.float32, ) out_w = tf.get_variable("out_w", [out_size, args.rnn_size], tf.float32, tf.random_normal_initializer(stddev=0.02)) out_b = tf.get_variable("out_b", [out_size], tf.float32, initializer=tf.constant_initializer(0.0)) batch_rec_loss = tf.reduce_mean(decoder_mask * tf.reshape( tf.nn.sampled_softmax_loss( weights=out_w, biases=out_b, labels=tf.reshape(decoder_targets, [-1, 1]), inputs=tf.reshape(decoder_outputs, [-1, args.rnn_size]), num_sampled=args.neg_size, num_classes=out_size), [args.batch_size, -1]), axis=-1) batch_latent_loss = -0.5 * tf.reduce_sum( 1 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), axis=1) self.rec_loss = rec_loss = tf.reduce_mean(batch_rec_loss) self.latent_loss = latent_loss = tf.reduce_mean(batch_latent_loss) self.loss = loss = tf.reduce_mean([rec_loss, latent_loss]) self.train_op = tf.train.AdamOptimizer( args.learning_rate).minimize(loss) target_out_w = tf.nn.embedding_lookup(out_w, decoder_targets) target_out_b = tf.nn.embedding_lookup(out_b, decoder_targets) self.batch_likelihood = tf.reduce_mean(decoder_mask * tf.log_sigmoid( tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b), axis=-1, name="batch_likelihood") saver = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES), max_to_keep=10) self.save, self.restore = saver.save, saver.restore
def __encoder(self, origin_input_x, n, strides=[1, 1, 1, 1], padding='VALID', activation_func=tf.nn.relu, scope='Encoder'): ''' Treat batch_size dimension and n dimension as one batch_size dimension (batch_size * n). :param input_x: <batch_size, n, T, D> :param strides: :param padding: :param scope: :return: the embedded of the input_x <batch_size, n, last_rnn_hid_size> ''' # constant scope = 'Encoder_' + scope batch_size_new = self.config.batch_size * n Tc = self.config.T - self.config.W + 1 last_rnn_hidden_size = self.config.en_rnn_hidden_sizes[-1] # reshape input_x : <batch_size * n, T, D, 1> input_x = tf.reshape(origin_input_x, shape=[-1, self.config.T, self.config.D, 1]) with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): # cnn parameters with tf.variable_scope('CNN', reuse=tf.AUTO_REUSE): w_conv1 = tf.get_variable( 'w_conv1', shape=[ self.config.W, self.config.D, 1, self.config.en_conv_hidden_size ], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv1 = tf.get_variable( 'b_conv1', shape=[self.config.en_conv_hidden_size], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) # <batch_size_new, Tc, 1, en_conv_hidden_size> h_conv1 = activation_func( tf.nn.conv2d(input_x, w_conv1, strides, padding=padding) + b_conv1) if self.config.input_keep_prob < 1: h_conv1 = tf.nn.dropout(h_conv1, self.config.input_keep_prob) # tmporal attention layer and gru layer # rnns rnns = [ tf.nn.rnn_cell.GRUCell(h_size, activation=activation_func) for h_size in self.config.en_rnn_hidden_sizes ] # dropout if self.config.input_keep_prob < 1 or self.config.output_keep_prob < 1: rnns = [ tf.nn.rnn_cell.DropoutWrapper( rnn, input_keep_prob=self.config.input_keep_prob, output_keep_prob=self.config.output_keep_prob) for rnn in rnns ] if len(rnns) > 1: rnns = tf.nn.rnn_cell.MultiRNNCell(rnns) else: rnns = rnns[0] # attention layer # attention weights attr_v = tf.get_variable( 'attr_v', shape=[Tc, 1], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) attr_w = tf.get_variable( 'attr_w', shape=[last_rnn_hidden_size, Tc], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) attr_u = tf.get_variable( 'attr_u', shape=[Tc, Tc], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.1)) # rnn inputs # <batch_size, n, Tc, en_conv_hidden_size> rnn_input = tf.reshape( h_conv1, shape=[-1, n, Tc, self.config.en_conv_hidden_size]) # n * <batch_size, last_rnns_size> res_hstates = tf.TensorArray(tf.float32, n) for k in range(n): # <batch_size, en_conv_hidden_size, Tc> attr_input = tf.transpose(rnn_input[:, k], perm=[0, 2, 1]) # <batch_size, last_rnn_hidden_size> s_state = rnns.zero_state(self.config.batch_size, tf.float32) if len(self.config.en_rnn_hidden_sizes) > 1: h_state = s_state[-1] else: h_state = s_state for t in range(Tc): # h(t-1) dot attr_w h_part = tf.matmul(h_state, attr_w) # en_conv_hidden_size * <batch_size_new, 1> e_ks = tf.TensorArray(tf.float32, self.config.en_conv_hidden_size) _, output = tf.while_loop( lambda i, _: tf.less(i, self.config.en_conv_hidden_size ), lambda i, output_ta: (i + 1, output_ta.write( i, tf.matmul( tf.tanh(h_part + tf.matmul( attr_input[:, i], attr_u)), attr_v))), [0, e_ks]) # <batch_size, en_conv_hidden_size, 1> e_ks = tf.transpose(output.stack(), perm=[1, 0, 2]) e_ks = tf.reshape( e_ks, shape=[-1, self.config.en_conv_hidden_size]) # <batch_size, en_conv_hidden_size> a_ks = tf.nn.softmax(e_ks) x_t = tf.matmul(tf.expand_dims(attr_input[:, :, t], -2), tf.matrix_diag(a_ks)) # <batch_size, en_conv_hidden_size> x_t = tf.reshape( x_t, shape=[-1, self.config.en_conv_hidden_size]) h_state, s_state = rnns(x_t, s_state) res_hstates = res_hstates.write(k, h_state) return tf.transpose(res_hstates.stack(), perm=[1, 0, 2])