def __init__(self, distinctTagNum, w2vPath, c2vPath, numHidden): self.distinctTagNum = distinctTagNum self.numHidden = numHidden self.w2v = self.load_w2v(w2vPath, FLAGS.embedding_word_size) self.c2v = self.load_w2v(c2vPath, FLAGS.embedding_char_size) self.words = tf.Variable(self.w2v, name="words") self.chars = tf.Variable(self.c2v, name="chars") with tf.variable_scope('Softmax') as scope: self.W = tf.get_variable( shape=[numHidden * 2, distinctTagNum], initializer=tf.truncated_normal_initializer(stddev=0.01), name="weights", regularizer=tf.contrib.layers.l2_regularizer(0.001)) self.b = tf.Variable(tf.zeros([distinctTagNum], name="bias")) with tf.variable_scope('CNN_Layer') as scope: self.filter = tf.get_variable( "filters_1", shape=[2, FLAGS.embedding_char_size, 1, FLAGS.embedding_char_size], regularizer=tf.contrib.layers.l2_regularizer(0.0001), initializer=tf.truncated_normal_initializer(stddev=0.01), dtype=tf.float32) self.trains_params = None self.inp_w = tf.placeholder(tf.int32, shape=[None, FLAGS.max_sentence_len], name="input_words") self.inp_c = tf.placeholder( tf.int32, shape=[None, FLAGS.max_sentence_len * FLAGS.max_chars_per_word], name="input_chars") pass
def _shared_encoder_network(self): # config SSE network to be shared encoder mode # Build shared encoder with tf.variable_scope('shared_encoder'): # TODO: need play with forgetGate and peeholes here if self.use_lstm: src_single_cell = tf.nn.rnn_cell.LSTMCell(self.src_cell_size, forget_bias=1.0, use_peepholes=False) else: src_single_cell = tf.nn.rnn_cell.GRUCell(self.src_cell_size) src_cell = src_single_cell if self.num_layers > 1: src_cell = tf.nn.rnn_cell.MultiRNNCell([src_single_cell] * self.num_layers) #compute source sequence related tensors src_output, _ = tf.nn.dynamic_rnn(src_cell, self.src_input_distributed, sequence_length=self._src_lens, dtype=tf.float32) src_last_output = self._last_relevant(src_output, self._src_lens) self.src_M = tf.get_variable('src_M', shape=[self.src_cell_size, self.seq_embed_size], initializer=tf.truncated_normal_initializer()) # self.src_b = tf.get_variable('src_b', shape=[self.seq_embed_size]) self.src_seq_embedding = tf.matmul(src_last_output, self.src_M) # + self.src_b #declare tgt_M tensor before reuse them self.tgt_M = tf.get_variable('tgt_M', shape=[self.src_cell_size, self.seq_embed_size], initializer=tf.truncated_normal_initializer()) # self.tgt_b = tf.get_variable('tgt_b', shape=[self.seq_embed_size]) with tf.variable_scope('shared_encoder', reuse=True): #compute target sequence related tensors by reusing shared_encoder model tgt_output, _ = tf.nn.dynamic_rnn(src_cell, self.tgt_input_distributed, sequence_length=self._tgt_lens, dtype=tf.float32) tgt_last_output = self._last_relevant(tgt_output, self._tgt_lens) self.tgt_seq_embedding = tf.matmul(tgt_last_output, self.tgt_M) # + self.tgt_b
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes): """ Create the layers for a fully convolutional network. Build skip-layers using the vgg layers. :param vgg_layer3_out: TF Tensor for VGG Layer 3 output :param vgg_layer4_out: TF Tensor for VGG Layer 4 output :param vgg_layer7_out: TF Tensor for VGG Layer 7 output :param num_classes: Number of classes to classify :return: The Tensor for the last layer of output """ # upsampling on layer7 by 2 input = tf.layers.conv2d(vgg_layer7_out, num_classes, 1, strides=(1,1), padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) output = tf.layers.conv2d_transpose(input, num_classes, 4, strides = (2, 2), padding= 'same', kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #skip connection followed by upsampling on layer4 by 2 input = tf.layers.conv2d(vgg_layer4_out, num_classes, 1, strides=(1,1), padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) input = tf.add(input, output) output = tf.layers.conv2d_transpose(input, num_classes, 4, strides = (2, 2), padding= 'same', kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #skip connection followed by upsampling on layer3 by 8 input = tf.layers.conv2d(vgg_layer3_out, num_classes, 1, strides=(1,1), padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) input = tf.add(input, output) nn_last_layer = tf.layers.conv2d_transpose(input, num_classes, 32, strides = (8, 8), padding= 'same', kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) return nn_last_layer
def bottleneck(input_, feature_input, features, is_training, stride, name): f1, f2, f3 = features shortcut_input = input_ with tf.variable_scope(name): conv1_weight = tf.get_variable(name='conv1', shape=[1, 1, feature_input, f1], initializer=tf.truncated_normal_initializer(stddev=0.01)) input_ = tf.nn.conv2d(input_, conv1_weight, [1, 1, 1, 1], padding='SAME') input_ = tf.layers.batch_normalization(input_, training=is_training) input_ = tf.nn.relu(input_) conv2_weight = tf.get_variable(name='conv2', shape=[3, 3, f1, f2], initializer=tf.truncated_normal_initializer(stddev=0.01)) input_ = tf.nn.conv2d(input_, conv2_weight, [1, stride, stride, 1], padding='SAME') input_ = tf.layers.batch_normalization(input_, training=is_training) input_ = tf.nn.relu(input_) conv3_weight = tf.get_variable(name='conv3', shape=[1, 1, f2, f3], initializer=tf.truncated_normal_initializer(stddev=0.01)) input_ = tf.nn.conv2d(input_, conv3_weight, [1, 1, 1, 1], padding='SAME') input_ = tf.layers.batch_normalization(input_, training=is_training) if not (feature_input == f3): convs_weight = tf.get_variable(name='convs', shape=[1, 1, feature_input, f3], initializer=tf.truncated_normal_initializer(stddev=0.01)) shortcut_input = tf.nn.conv2d(shortcut_input, convs_weight, [1, stride, stride, 1], padding='SAME') shortcut_input = tf.layers.batch_normalization(shortcut_input, training=is_training) input_ = tf.nn.relu(tf.add(shortcut_input, input_)) return input_
def multiplicative_integration(list_of_inputs, output_size, initial_bias_value = 0.0, weights_already_calculated = False, use_highway_gate = False, use_l2_loss = False, scope = None, timestep = 0): '''expects len(2) for list of inputs and will perform integrative multiplication weights_already_calculated will treat the list of inputs as Wx and Uz and is useful for batch normed inputs ''' with tf.variable_scope(scope or 'double_inputs_multiple_integration'): if len(list_of_inputs) != 2: raise ValueError('list of inputs must be 2, you have:', len(list_of_inputs)) if weights_already_calculated: #if you already have weights you want to insert from batch norm Wx = list_of_inputs[0] Uz = list_of_inputs[1] else: with tf.variable_scope('Calculate_Wx_mulint'): Wx = linear.linear(list_of_inputs[0], output_size, False, use_l2_loss = use_l2_loss, timestep = timestep) with tf.variable_scope("Calculate_Uz_mulint"): Uz = linear.linear(list_of_inputs[1], output_size, False, use_l2_loss = use_l2_loss, timestep = timestep) with tf.variable_scope("multiplicative_integration"): alpha = tf.get_variable('mulint_alpha', [output_size], initializer = tf.truncated_normal_initializer(mean = 1.0, stddev = 0.1)) beta1, beta2 = tf.split(0,2, tf.get_variable('mulint_params_betas', [output_size*2], initializer = tf.truncated_normal_initializer(mean = 0.5, stddev = 0.1))) original_bias = tf.get_variable('mulint_original_bias', [output_size], initializer = tf.truncated_normal_initializer(mean = initial_bias_value, stddev = 0.1)) final_output = alpha*Wx*Uz + beta1*Uz + beta2*Wx + original_bias if use_highway_gate: final_output = highway_network.apply_highway_gate(final_output, list_of_inputs[0]) return final_output
def residual_block(input_, dilation, kwidth, num_kernels=1, bias_init=None, stddev=0.02, do_skip=True, name='residual_block'): print('input shape to residual block: ', input_.get_shape()) with tf.variable_scope(name): h_a = atrous_conv1d(input_, dilation, kwidth, num_kernels, bias_init=bias_init, stddev=stddev) h = tf.tanh(h_a) # apply gated activation z_a = atrous_conv1d(input_, dilation, kwidth, num_kernels, name='conv_gate', bias_init=bias_init, stddev=stddev) z = tf.nn.sigmoid(z_a) print('gate shape: ', z.get_shape()) # element-wise apply the gate gated_h = tf.mul(z, h) print('gated h shape: ', gated_h.get_shape()) #make res connection h_ = conv1d(gated_h, kwidth=1, num_kernels=1, init=tf.truncated_normal_initializer(stddev=stddev), name='residual_conv1') res = h_ + input_ print('residual result: ', res.get_shape()) if do_skip: #make skip connection skip = conv1d(gated_h, kwidth=1, num_kernels=1, init=tf.truncated_normal_initializer(stddev=stddev), name='skip_conv1') return res, skip else: return res
def model(data, prev_outputs, image_size, n_channels, n_actions, n_prev_actions): kernel_defs = [(8, 16, 4), (2, 32, 1)] # each conv layer, (patch_side, n_kernels, stride) fc_sizes = [256] n_input_kernels = n_channels for i, k in enumerate(kernel_defs): with tf.variable_scope("conv_%i" % i): kernel_shape = (k[0], k[0], n_input_kernels, k[1]) data = conv_relu(data, kernel_shape, k[2]) n_input_kernels = k[1] for i, n in enumerate(fc_sizes): with tf.variable_scope("fc_%i" % i): if i == 0: previous_n = kernel_defs[-1][1] * np.prod(image_size) / np.prod([k[2] for k in kernel_defs])**2 data = tf.reshape(data, [-1, previous_n]) reshape_prev_outputs = tf.reshape(prev_outputs, [-1, n_actions * n_prev_actions]) prev_outputs_weights = tf.get_variable("prev_outputs_weights", [n_actions * n_prev_actions, n], initializer=tf.truncated_normal_initializer(mean=0., stddev=0.01/np.sqrt(n_prev_actions * n_actions))) else: previous_n = fc_sizes[i-1] weights = tf.get_variable("weights", [previous_n, n], initializer=tf.truncated_normal_initializer(mean=0., stddev=0.01 / np.sqrt(previous_n))) biases = tf.get_variable("biases", [n], initializer=tf.constant_initializer(0.0)) relu_input = tf.matmul(data, weights) + biases if i == 0: relu_input += 0.1 * (previous_n / n_actions / n_prev_actions) * tf.matmul(reshape_prev_outputs, prev_outputs_weights) data = tf.nn.relu(relu_input) with tf.variable_scope("flat_out"): weights = tf.get_variable("weights", [fc_sizes[-1], n_actions], initializer=tf.truncated_normal_initializer(mean=0., stddev=0.01 / np.sqrt(fc_sizes[-1]))) biases = tf.get_variable("biases", [n_actions], initializer=tf.constant_initializer(0.0)) return tf.matmul(data, weights) + biases
def Discriminator_with_Vanilla(input_Pattern, hidden_Unit_Size = 128, label_Unit_Size = 10, is_Training = True, reuse = False): with tf.variable_scope('discriminator', reuse=reuse): hidden_Activation = tf.layers.dense( inputs = input_Pattern, units = hidden_Unit_Size, activation = tf.nn.relu, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "hidden" ) discrimination_Logits = tf.layers.dense( inputs = hidden_Activation, units = 1, activation = None, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "discrimination" ) discrimination_Activation = tf.nn.sigmoid(discrimination_Logits); label_Logits = tf.layers.dense( inputs = hidden_Activation, units = label_Unit_Size, activation = None, use_bias = True, kernel_initializer = tf.truncated_normal_initializer(stddev=0.1), bias_initializer = tf.zeros_initializer(), name = "label" ) label_Activation = tf.nn.softmax(label_Logits); return discrimination_Logits, label_Logits, discrimination_Activation, label_Activation;
def discriminator(x_image, reuse=False): if (reuse): tf.get_variable_scope().reuse_variables() #First Conv and Pool Layers W_conv1 = tf.get_variable('d_wconv1', [5, 5, 1, 8], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_conv1 = tf.get_variable('d_bconv1', [8], initializer=tf.constant_initializer(0)) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = avg_pool_2x2(h_conv1) #Second Conv and Pool Layers W_conv2 = tf.get_variable('d_wconv2', [5, 5, 8, 16], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_conv2 = tf.get_variable('d_bconv2', [16], initializer=tf.constant_initializer(0)) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = avg_pool_2x2(h_conv2) #First Fully Connected Layer W_fc1 = tf.get_variable('d_wfc1', [7 * 7 * 16, 32], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_fc1 = tf.get_variable('d_bfc1', [32], initializer=tf.constant_initializer(0)) h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) #Second Fully Connected Layer W_fc2 = tf.get_variable('d_wfc2', [32, 1], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_fc2 = tf.get_variable('d_bfc2', [1], initializer=tf.constant_initializer(0)) #Final Layer y_conv=(tf.matmul(h_fc1, W_fc2) + b_fc2) return y_conv
def discriminator(images, reuse_variables=None): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables) as scope: # First convolutional and pool layers # This finds 32 different 5 x 5 pixel features d_w1 = tf.get_variable('d_w1', [5, 5, 1, 32], initializer=tf.truncated_normal_initializer(stddev=0.02)) d_b1 = tf.get_variable('d_b1', [32], initializer=tf.constant_initializer(0)) d1 = tf.nn.conv2d(input=images, filter=d_w1, strides=[1, 1, 1, 1], padding='SAME') d1 = d1 + d_b1 d1 = tf.nn.relu(d1) d1 = tf.nn.avg_pool(d1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Second convolutional and pool layers # This finds 64 different 5 x 5 pixel features d_w2 = tf.get_variable('d_w2', [5, 5, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.02)) d_b2 = tf.get_variable('d_b2', [64], initializer=tf.constant_initializer(0)) d2 = tf.nn.conv2d(input=d1, filter=d_w2, strides=[1, 1, 1, 1], padding='SAME') d2 = d2 + d_b2 d2 = tf.nn.relu(d2) d2 = tf.nn.avg_pool(d2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # First fully connected layer d_w3 = tf.get_variable('d_w3', [7 * 7 * 64, 1024], initializer=tf.truncated_normal_initializer(stddev=0.02)) d_b3 = tf.get_variable('d_b3', [1024], initializer=tf.constant_initializer(0)) d3 = tf.reshape(d2, [-1, 7 * 7 * 64]) d3 = tf.matmul(d3, d_w3) d3 = d3 + d_b3 d3 = tf.nn.relu(d3) # Second fully connected layer d_w4 = tf.get_variable('d_w4', [1024, 1], initializer=tf.truncated_normal_initializer(stddev=0.02)) d_b4 = tf.get_variable('d_b4', [1], initializer=tf.constant_initializer(0)) d4 = tf.matmul(d3, d_w4) + d_b4 # d4 contains unscaled values return d4
def inference(images): def _variable_with_weight_decay(name, shape, stddev, wd): var = tf.get_variable(name, shape=shape, initializer=tf.truncated_normal_initializer(stddev=stddev)) if wd: weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss') tf.add_to_collection('losses', weight_decay) return var with tf.variable_scope('conv1') as scope: kernel = tf.get_variable('weights', shape=[3, 3, 3, 32], initializer=tf.truncated_normal_initializer(stddev=1e-4)) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', shape=[32], initializer=tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(bias, name=scope.name) pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') with tf.variable_scope('conv2') as scope: kernel = tf.get_variable('weights', shape=[3, 3, 32, 64], initializer=tf.truncated_normal_initializer(stddev=1e-4)) conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', shape=[64], initializer=tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(bias, name=scope.name) pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') with tf.variable_scope('conv3') as scope: kernel = tf.get_variable('weights', shape=[3, 3, 64, 128], initializer=tf.truncated_normal_initializer(stddev=1e-4)) conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', shape=[128], initializer=tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) conv3 = tf.nn.relu(bias, name=scope.name) pool3 = tf.nn.max_pool(conv3, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3') with tf.variable_scope('conv4') as scope: kernel = tf.get_variable('weights', shape=[3, 3, 128, 256], initializer=tf.truncated_normal_initializer(stddev=1e-4)) conv = tf.nn.conv2d(pool3, kernel, [1, 1, 1, 1], padding='SAME') biases = tf.get_variable('biases', shape=[256], initializer=tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) conv4 = tf.nn.relu(bias, name=scope.name) pool4 = tf.nn.max_pool(conv4, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool4') with tf.variable_scope('fc5') as scope: dim = 1 for d in pool4.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool4, [BATCH_SIZE, dim]) weights = _variable_with_weight_decay('weights', shape=[dim, 1024], stddev=0.05, wd=0.005) biases = tf.get_variable('biases', shape=[1024], initializer=tf.constant_initializer(0.1)) fc5 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name) with tf.variable_scope('fc6') as scope: weights = _variable_with_weight_decay('weights', shape=[1024, 256], stddev=0.05, wd=0.005) biases = tf.get_variable('biases', shape=[256], initializer=tf.constant_initializer(0.1)) fc6 = tf.nn.relu_layer(fc5, weights, biases, name=scope.name) with tf.variable_scope('fc7') as scope: weights = _variable_with_weight_decay('weights', shape=[256, NUM_CLASSES], stddev=0.05, wd=0.005) biases = tf.get_variable('biases', shape=[NUM_CLASSES], initializer=tf.constant_initializer(0.1)) fc7 = tf.nn.xw_plus_b(fc6, weights, biases, name=scope.name) return fc7
def fc(self, input, num_out, name, relu=True, trainable=True): with tf.variable_scope(name) as scope: # only use the first input if isinstance(input, tuple): input = input[0] input_shape = input.get_shape() if input_shape.ndims == 4: dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim]) else: feed_in, dim = (input, int(input_shape[-1])) if name == 'bbox_pred': init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) init_biases = tf.constant_initializer(0.0) else: init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) weights = self.make_var('weights', [dim, num_out], init_weights, trainable, \ regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) biases = self.make_var('biases', [num_out], init_biases, trainable) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc
def Discriminator(image_Pattern, initial_Filter_Count = 64, attribute_Count = 10, reuse = False): with tf.variable_scope('discriminator', reuse=reuse): hidden_Activation = image_Pattern; for index in range(6): hidden_Activation = tf.nn.leaky_relu( tf.layers.conv2d( inputs = hidden_Activation, filters = initial_Filter_Count * (2 ** index), kernel_size = 4, strides = 2, padding = "same", kernel_initializer = tf.truncated_normal_initializer(stddev=0.02) ), alpha=0.01, name="hidden_Layer{}".format(index) ) output_Activation = tf.layers.conv2d( inputs = hidden_Activation, filters = 1 + attribute_Count, kernel_size = hidden_Activation.get_shape()[1:3], strides = 1, padding = "valid", name = "output_Layer", use_bias = False, kernel_initializer = tf.truncated_normal_initializer(stddev=0.02) ) discrimination_Logit, attribute_Logit = tf.split( tf.squeeze(output_Activation, axis=[1,2]), num_or_size_splits = [1, attribute_Count], axis = 1 ) return discrimination_Logit, attribute_Logit;
def build_graph(network_input, input_shape, output_shape, batch_size): with tf.variable_scope('simple_cnn'): with tf.variable_scope('conv1'): conv1_weights = tf.get_variable('weights', [3, 3, input_shape[2], 32], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv1_bias = tf.get_variable('bias', [32], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv1 = tf.nn.conv2d(network_input, conv1_weights, [1, 1, 1, 1], padding='SAME') conv1_out = tf.nn.max_pool(tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias)), ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME') with tf.variable_scope('conv2'): conv2_weights = tf.get_variable('weights', [3, 3, 32, 64], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv2_bias = tf.get_variable('bias', [64], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv2 = tf.nn.conv2d(conv1_out, conv2_weights, [1, 1, 1, 1], padding='SAME') conv2_out = tf.nn.max_pool(tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias)), ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME') with tf.variable_scope('conv3'): conv3_weights = tf.get_variable('weights', [3, 3, 64, 128], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv3_bias = tf.get_variable('bias', [128], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) conv3 = tf.nn.conv2d(conv2_out, conv3_weights, [1, 1, 1, 1], padding='SAME') conv3_out = tf.nn.max_pool(tf.nn.relu(tf.nn.bias_add(conv3, conv3_bias)), ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME') with tf.variable_scope('fc4'): conv_out_shape = input_shape[0] * input_shape[1] * 128 conv_out_flat = tf.reshape(conv3_out, [batch_size, conv_out_shape]) fc4_weights = tf.get_variable('weights', [conv_out_shape, 1024], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) fc4_bias = tf.get_variable('bias', [1024], tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) fc_out = tf.nn.relu(tf.nn.bias_add(tf.matmul(conv_out_flat, fc4_weights), fc4_bias)) output_weight = tf.get_variable('out_weights', [1024, output_shape[0]], tf.float32, initializer=tf.truncated_normal_initializer(stddev=6e-2)) output_bias = tf.get_variable('out_bias', output_shape, tf.float32, initializer=tf.truncated_normal_initializer(stddev=5e-2)) output = tf.matmul(fc_out, output_weight) + output_bias return output
def testCheckInitializers(self): initializers = { "key_a": tf.truncated_normal_initializer(mean=0, stddev=1), "key_c": tf.truncated_normal_initializer(mean=0, stddev=1), } keys = ["key_a", "key_b"] self.assertRaisesRegexp(KeyError, "Invalid initializer keys.*", snt.check_initializers, initializers=initializers, keys=keys) del initializers["key_c"] initializers["key_b"] = "not a function" self.assertRaisesRegexp(TypeError, "Initializer for.*", snt.check_initializers, initializers=initializers, keys=keys) initializers["key_b"] = {"key_c": "not a function"} self.assertRaisesRegexp(TypeError, "Initializer for.*", snt.check_initializers, initializers=initializers, keys=keys) initializers["key_b"] = { "key_c": tf.truncated_normal_initializer(mean=0, stddev=1), "key_d": tf.truncated_normal_initializer(mean=0, stddev=1), } snt.check_initializers(initializers=initializers, keys=keys)
def discriminator(x_image, reuse=False): # get_variable(): get or create a variable instead of a direct call to tf.Variable if reuse: tf.get_variable_scope().reuse_variables() # 每一层的输入是上一层的输出(第一层的输入是28x28的图像) # First Conv and Pool Layers stddev为standard deviation标准差 # W为整层对下一层的权重 W_conv1 = tf.get_variable('d_wconv1', [5, 5, 1, 8], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_conv1 = tf.get_variable('d_bconv1', [8], initializer=tf.constant_initializer(0)) # b为偏置节点 h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # z = W*x+b relu为激活函数 h_pool1 = avg_pool_2x2(h_conv1) # 池化 # Second Conv and Pool Layers W_conv2 = tf.get_variable('d_wconv2', [5, 5, 8, 16], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_conv2 = tf.get_variable('d_bconv2', [16], initializer=tf.constant_initializer(0)) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = avg_pool_2x2(h_conv2) # First Fully Connected Layer W_fc1 = tf.get_variable('d_wfc1', [7 * 7 * 16, 32], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_fc1 = tf.get_variable('d_bfc1', [32], initializer=tf.constant_initializer(0)) h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) # matmul: 矩阵相乘 # Second Fully Connected Layer W_fc2 = tf.get_variable('d_wfc2', [32, 1], initializer=tf.truncated_normal_initializer(stddev=0.02)) b_fc2 = tf.get_variable('d_bfc2', [1], initializer=tf.constant_initializer(0)) # Final Layer y_conv = (tf.matmul(h_fc1, W_fc2) + b_fc2) return y_conv
def _build_network(self, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net_conv = self._image_to_head(is_training) with tf.variable_scope(self._scope, self._scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError fc7 = self._head_to_tail(pool5, is_training) with tf.variable_scope(self._scope, self._scope): # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def add_model(self, input_data): with tf.variable_scope("FirstConv") as CLayer1: w_conv1 = tf.get_variable("w_conv1", (11, 11, 1, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv1 = tf.get_variable("b_conv1", (32), initializer=tf.constant_initializer(0.1)) conv1 = tf.nn.conv2d(input_data, w_conv1, strides=[1, 1, 1, 1], padding='VALID') hconv1 = tf.nn.relu(conv1 + b_conv1) h_pool1 = tf.nn.max_pool(hconv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.variable_scope("SecondConv") as CLayer2: w_conv2 = tf.get_variable("w_conv2", (11 , 11, 32, 64), initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv2 = tf.get_variable("b_conv2", (64), initializer=tf.constant_initializer(0.1)) conv2 = tf.nn.conv2d(h_pool1, w_conv2, strides=[1, 1, 1, 1], padding='VALID') hconv2 = tf.nn.relu(conv2 + b_conv2) h_pool2 = tf.nn.max_pool(hconv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') with tf.variable_scope("FullyConnected") as FC: flattend_input = tf.reshape(input_data, [self.config.batch_size, -1]) w_input = tf.get_variable("w_input", (self.config.DIM_ETA*self.config.DIM_PHI, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) wfc1 = tf.get_variable("wfc1", (self.config.final_size*64, 32), initializer=tf.truncated_normal_initializer(stddev=0.1)) #bfc1 = tf.get_variable("bfc1", (32), initializer=tf.constant_initializer(0.1)) h_pool2_flat = tf.reshape(h_pool2, [-1, self.config.final_size*64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, wfc1) + tf.matmul(flattend_input, w_input))#+ bfc1) h_fc1_drop = tf.nn.dropout(h_fc1, self.dropout_placeholder) with tf.variable_scope("ReadoutLayer") as RL: wfc2 = tf.get_variable("wfc2", (32, self.config.num_classes), initializer=tf.truncated_normal_initializer(stddev=0.1)) bfc2 = tf.get_variable("bfc2", (self.config.num_classes), initializer=tf.constant_initializer(0.1)) y_conv = tf.matmul(h_fc1_drop, wfc2) + bfc2 return y_conv
def model(x_crop, y_, reuse): """ For more simplified CNN APIs, check tensorlayer.org """ W_init = tf.truncated_normal_initializer(stddev=5e-2) W_init2 = tf.truncated_normal_initializer(stddev=0.04) b_init2 = tf.constant_initializer(value=0.1) with tf.variable_scope("model", reuse=reuse): net = tl.layers.InputLayer(x_crop, name='input') net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn1') net = tl.layers.SignLayer(net) net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1') net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') net = tl.layers.BinaryConv2d(net, 64, (5, 5), (1, 1), act=tf.nn.relu, padding='SAME', W_init=W_init, name='cnn2') net = tl.layers.LocalResponseNormLayer(net, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2') net = tl.layers.FlattenLayer(net, name='flatten') # output: (batch_size, 2304) net = tl.layers.SignLayer(net) net = tl.layers.BinaryDenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu') # output: (batch_size, 384) net = tl.layers.SignLayer(net) net = tl.layers.BinaryDenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu') # output: (batch_size, 192) net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output') # output: (batch_size, 10) y = net.outputs ce = tl.cost.cross_entropy(y, y_, name='cost') # L2 for the MLP, without this, the accuracy will be reduced by 15%. L2 = 0 for p in tl.layers.get_variables_with_name('relu/W', True, True): L2 += tf.contrib.layers.l2_regularizer(0.004)(p) cost = ce + L2 # correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y), 1), y_) correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return net, cost, acc
def model_batch_norm(x_crop, y_, reuse, is_train): """ Batch normalization should be placed before rectifier. """ W_init = tf.truncated_normal_initializer(stddev=5e-2) W_init2 = tf.truncated_normal_initializer(stddev=0.04) b_init2 = tf.constant_initializer(value=0.1) with tf.variable_scope("model", reuse=reuse): net = InputLayer(x_crop, name='input') net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn1') net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch1') net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool1') net = tl.layers.Conv2d(net, 64, (5, 5), (1, 1), padding='SAME', W_init=W_init, b_init=None, name='cnn2') net = tl.layers.BatchNormLayer(net, is_train, act=tf.nn.relu, name='batch2') net = tl.layers.MaxPool2d(net, (3, 3), (2, 2), padding='SAME', name='pool2') net = tl.layers.FlattenLayer(net, name='flatten') # output: (batch_size, 2304) net = tl.layers.DenseLayer(net, n_units=384, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d1relu') # output: (batch_size, 384) net = tl.layers.DenseLayer(net, n_units=192, act=tf.nn.relu, W_init=W_init2, b_init=b_init2, name='d2relu') # output: (batch_size, 192) net = tl.layers.DenseLayer(net, n_units=10, act=tf.identity, W_init=W_init2, name='output') # output: (batch_size, 10) y = net.outputs ce = tl.cost.cross_entropy(y, y_, name='cost') # L2 for the MLP, without this, the accuracy will be reduced by 15%. L2 = 0 for p in tl.layers.get_variables_with_name('relu/W', True, True): L2 += tf.contrib.layers.l2_regularizer(0.004)(p) cost = ce + L2 correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_) acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) return net, cost, acc
def build_network(self, images, class_num, is_training=True, keep_prob=0.5, scope='Fast-RCNN'): self.conv1 = self.convLayer(images, 11, 11, 4, 4, 96, "conv1", "VALID") lrn1 = self.LRN(self.conv1, 2, 2e-05, 0.75, "norm1") self.pool1 = self.maxPoolLayer(lrn1, 3, 3, 2, 2, "pool1", "VALID") self.conv2 = self.convLayer(self.pool1, 5, 5, 1, 1, 256, "conv2", groups=2) lrn2 = self.LRN(self.conv2, 2, 2e-05, 0.75, "lrn2") self.pool2 = self.maxPoolLayer(lrn2, 3, 3, 2, 2, "pool2", "VALID") self.conv3 = self.convLayer(self.pool2, 3, 3, 1, 1, 384, "conv3") self.conv4 = self.convLayer(self.conv3, 3, 3, 1, 1, 384, "conv4", groups=2) self.conv5 = self.convLayer(self.conv4, 3, 3, 1, 1, 256, "conv5", groups=2) self.roi_pool6 = roi_pooling(self.conv5, self.rois, pool_height=6, pool_width=6) with slim.arg_scope([slim.fully_connected, slim.conv2d], activation_fn=nn_ops.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): flatten = slim.flatten(self.roi_pool6, scope='flat_32') self.fc1 = slim.fully_connected(flatten, 4096, scope='fc_6') drop6 = slim.dropout(self.fc1, keep_prob=keep_prob, is_training=is_training, scope='dropout6',) self.fc2 = slim.fully_connected(drop6, 4096, scope='fc_7') drop7 = slim.dropout(self.fc2, keep_prob=keep_prob, is_training=is_training, scope='dropout7') cls = slim.fully_connected(drop7, class_num,activation_fn=nn_ops.softmax ,scope='fc_8') bbox = slim.fully_connected(drop7, (self.class_num-1)*4, weights_initializer=tf.truncated_normal_initializer(0.0, 0.001), activation_fn=None ,scope='fc_9') return cls,bbox
def Generator(image_Pattern, is_Training = True, name = "generator", reuse = False): with tf.variable_scope(name, reuse=reuse): convolution_Activation = tf.nn.leaky_relu( tf.layers.conv2d( inputs = image_Pattern, filters = 2 ** 6, kernel_size = [4,4], strides = (2,2), padding = "same", use_bias = False, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), ) ) for power in range(7, 10): convolution_Activation = tf.nn.leaky_relu( tf.layers.batch_normalization( tf.layers.conv2d( inputs = convolution_Activation, filters = 2 ** power, kernel_size = [4,4], strides = (2,2), padding = "same", use_bias = False, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), ), training = is_Training ) ) convolution_Transpose_Activation = convolution_Activation; for power in reversed(range(6, 9)): convolution_Transpose_Activation = tf.nn.leaky_relu( tf.layers.batch_normalization( tf.layers.conv2d_transpose( inputs = convolution_Transpose_Activation, filters = 2 ** power, kernel_size = [4,4], strides = (2,2), padding = "same", use_bias = False, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), ), training = is_Training ) ) generator_Logit = tf.layers.conv2d_transpose( inputs = convolution_Transpose_Activation, filters = 3, #RGB kernel_size = [4,4], strides = (2,2), padding = "same", use_bias = False, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), ) generator_Activation = tf.nn.tanh(generator_Logit); return generator_Logit, generator_Activation;
def lstm_fs_(xs, ys, batches, l, m, n): #(name, shape=None, initializer=None,dtype=tf.float32, var_type="variable") [Wf, Wi, WC, Wo] = map(lambda name: variable_on_cpu(name, shape=[m+n,m], initializer=tf.truncated_normal_initializer(stddev=1e-2)), ["Wf", "Wi", "WC", "Wo"]) Wo1 = variable_on_cpu( "Wo1", shape=[m, n], initializer=tf.truncated_normal_initializer(stddev=1e-2)) [bf, bi, bC, bo] = map(lambda name: variable_on_cpu(name, shape=[m], initializer=tf.truncated_normal_initializer(stddev=1e-2)), ["bf", "bi", "bC", "bo"]) bo1 = variable_on_cpu( "bo1", shape=[n], initializer=tf.truncated_normal_initializer(stddev=1e-2)) # C = variable_on_cpu("C", shape=[m], var_type="variable") # h = variable_on_cpu("h", shape=[m], var_type="variable") #C = tf.ones([batches,m]) C = tf.zeros([batches,m]) #h = tf.zeros([m]) #h = tf.ones([batches,m]) h = tf.zeros([batches,m]) (outs, end) = scan(lambda mem, x: step_lstm1(x, mem, Wf, bf, Wi, bi, WC, bC, Wo, bo, Wo1, bo1), (C,h), xs, l) yhats = tf.pack(outs) #print(ys) #print(yhats) loss = cross_entropy(ys, yhats,t=1e-6) #tf.nn.sparse_softmax_cross_entropy_with_logits(outs, yhats, name='xentropy') #loss = cross_entropy(outs, yhats) #is not actually accuracy accuracy = cross_entropy(ys[-1], yhats[-1]) #tf.nn.sparse_softmax_cross_entropy_with_logits(outs[-1], yhats[-1]) return {"loss": loss, "inference": yhats, "accuracy": accuracy}
def inference(input_tensor,train,regularizer): #第一层卷积 with tf.variable_scope('layer1-conv1'): conv1_weights = tf.get_variable("weight", [CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable("biases",[CONV1_DEEP], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor,conv1_weights, strides=[1,1,1,1],padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases)) #第二层池化 with tf.name_scope('layer2-pool1'): pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') #第三层卷积 with tf.variable_scope('layer3-conv2'): conv2_weights = tf.get_variable("weight", [CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable("biases",[CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1,conv2_weights, strides=[1,1,1,1],padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases)) #第四层池化 with tf.name_scope('layer4-pool2'): pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2,[pool_shape[0],nodes]) #第五层全连接层 with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable("weight",[nodes,FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc1_weights)) fc1_biases = tf.get_variable("bias",[FC_SIZE], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1,0.5) #第六层全连接层 with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable("weight",[FC_SIZE,NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias",[NUM_LABELS], initializer=tf.constant_initializer(0.1)) logit = tf.matmul(fc1,fc2_weights) + fc2_biases return logit
def inference(images): #Conv1 Layer with tf.variable_scope('conv1') as scope: kernel = _variable_on_cpu('weights',[5, 5, 3, 64], tf.truncated_normal_initializer(stddev=1e-4)) biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) conv1 = conv2d(scope.name, images, kernel, biases) pool1 = max_pool('pool1', conv1, 3) norm1 = norm('norm1', pool1) #Conv2 Layer with tf.variable_scope('conv2') as scope: kernel = _variable_on_cpu('weights', [5, 5, 3, 64], tf.truncated_normal_initializer(stddev=1e-4)) biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) conv2 = conv2d(scope.name, images, kernel, biases) pool2 = max_pool('pool2', conv2, 3) norm2 = norm('norm2', pool2) # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. dim = 1 for d in pool2.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool2, [BATCH_SIZE, dim]) weights = _variable_on_cpu('weights', [dim, 384], tf.truncated_normal_initializer(stddev=0.04)) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu_layer(reshape, weights, biases, name=scope.name) # local4 with tf.variable_scope('local4') as scope: weights = _variable_on_cpu('weights', [384,192], tf.truncated_normal_initializer(stddev=0.04)) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu_layer(local3, weights, biases, name=scope.name) # softmax, i.e. softmax(WX + b) with tf.variable_scope('softmax_linear') as scope: weights = _variable_on_cpu('weights', [192, NUM_CLASSES], tf.truncated_normal_initializer(stddev=1/192.0)) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.nn.xw_plus_b(local4, weights, biases, name=scope.name) return softmax_linear
def inference(input_tensor, train, regularizer): #卷积层1 28*28*1 -> 28*28*32 with tf.variable_scope('layer1-conv1'): #5*5*32过滤器 conv1_weights = tf.get_variable("weight", [CONV1_SIZE, CONV1_SIZE, NUM_LABELS, CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_bias = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0)) #strides步长为1, padding全0填充 conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1,1,1,1], padding = 'SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias)) #池化层1 28*28*32 -> 14*14*32 #name_scope 是给op_name加前缀, variable_scope是给get_variable()创建的变量的名字加前缀。 with tf.name_scope('layer2-pool1'): pool1 = tf.nn.max_pool(relu1, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME') #卷积层2 14*14*32 -> 14*14*64 with tf.variable_scope('layer3-conv2'): conv2_weights = tf.get_variable("weight", [CONV2_SIZE, CONV2_SIZE, NUM_LABELS, CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_bias = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1,1,1,1], padding = 'SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias)) #池化层2 14*14*64 -> 7*7*64 with tf.name_scope('layer4-pool2'): pool2 = tf.nn.max_pool(relu2, ksize=[1,2,2,1], strides=[1,2,2,1], padding = 'SAME') ##输入FC前reshape shape为 batch_size*7*7*64 pool_shape[0]为batch_size pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1]*pool_shape[2]*pool_shape[3] reshaped = tf.reshape(pool2, [pool_shape[0], nodes]) #FC1 49*64拉直, 用dropout避免过拟合 with tf.variable_scope("layer5-fc1"): fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: #可以认为这里的regularizer是个函数指针 tf.add_to_collection('losses', regularizer(fc1_weights)) fc1_bias = tf.get_variable("bias", [FC_SIZE], tf.constant_initializer(0.0)) fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_bias) if train: fc1 = tf.nn.dropout(fc1, 0.5) #dropout一般只在fc层使用 with tf.variable_scope("layer6-fc2"): fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: #可以认为这里的regularizer是个函数指针 tf.add_to_collection('losses', regularizer(fc2_weights)) fc2_bias = tf.get_variable("bias", [NUM_LABELS], tf.constant_initializer(0.0)) logit = tf.matmul(fc1, fc2_weights) + fc2_bias return logit
def __init__(self, length, batch_size, voc_size, emb_dim, keep_prob, num_class, state_size, pretrained_emb=None): self.length = length self.batch_size = batch_size self.voc_size = voc_size self.emb_dim = emb_dim self.keep_prob = keep_prob self.num_class = num_class self.state_size = state_size self.pretrained_emb = pretrained_emb self.scope = 'lstm' def constant_embedding_initializer(shape=None, dtype=None): return self.pretrained_emb def ortho_weight(shape=None, dtype=None): dim = max(shape) W = np.random.randn(dim, dim) u, s, v = np.linalg.svd(W) return v[:shape[0], :shape[1]].astype(np.float32) with tf.variable_scope(self.scope): if self.pretrained_emb is not None: embedding = tf.get_variable('embedding', shape=[self.voc_size, self.emb_dim], initializer=constant_embedding_initializer, trainable=False) else: embedding = tf.get_variable( 'embedding', shape=[self.voc_size, self.emb_dim], initializer=tf.truncated_normal_initializer(stddev=0.01)) W = tf.get_variable( 'weight', shape=[self.state_size + self.emb_dim, 4 * self.state_size], initializer=ortho_weight) # logistic regression layer to convert from h to logits. W_h = tf.get_variable('weight_softmax', shape=[self.state_size, self.num_class], initializer=tf.truncated_normal_initializer( stddev=math.sqrt(6.0 / self.state_size))) h_init = tf.get_variable('h_init', shape=[self.batch_size, self.state_size], initializer=tf.constant_initializer(0.0), trainable=False) C_init = tf.get_variable('C_init', shape=[self.batch_size, self.state_size], initializer=tf.constant_initializer(0.0), trainable=False)
def conv2d( x, n_filters, k_h=5, k_w=5, stride_h=2, stride_w=2, stddev=0.02, batch_norm=False, activation=lambda x: x, bias=True, padding="SAME", name="Conv2D", ): """2D Convolution with options for kernel size, stride, and init deviation. Parameters ---------- x : Tensor Input tensor to convolve. n_filters : int Number of filters to apply. k_h : int, optional Kernel height. k_w : int, optional Kernel width. stride_h : int, optional Stride in rows. stride_w : int, optional Stride in cols. stddev : float, optional Initialization's standard deviation. activation : arguments, optional Function which applies a nonlinearity batch_norm : bool, optional Whether or not to apply batch normalization padding : str, optional 'SAME' or 'VALID' name : str, optional Variable scope to use. Returns ------- x : Tensor Convolved input. """ with tf.variable_scope(name): w = tf.get_variable( "w", [k_h, k_w, x.get_shape()[-1], n_filters], initializer=tf.truncated_normal_initializer(stddev=stddev) ) conv = tf.nn.conv2d(x, w, strides=[1, stride_h, stride_w, 1], padding=padding) if bias: b = tf.get_variable("b", [n_filters], initializer=tf.truncated_normal_initializer(stddev=stddev)) conv = conv + b if batch_norm: norm = bn(-1) conv = norm(conv) return conv
def generator(z, batch_size, z_dim, reuse=False): if (reuse): tf.get_variable_scope().reuse_variables() g_dim = 64 #Number of filters of first layer of generator c_dim = 1 #Color dimension of output (MNIST is grayscale, so c_dim = 1 for us) s = 28 #Output size of the image s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16) #We want to slowly upscale the image, so these values will help #make that change gradual. h0 = tf.reshape(z, [batch_size, s16+1, s16+1, 25]) h0 = tf.nn.relu(h0) #Dimensions of h0 = batch_size x 2 x 2 x 25 #First DeConv Layer output1_shape = [batch_size, s8, s8, g_dim*4] W_conv1 = tf.get_variable('g_wconv1', [5, 5, output1_shape[-1], int(h0.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv1 = tf.get_variable('g_bconv1', [output1_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv1 = tf.nn.conv2d_transpose(h0, W_conv1, output_shape=output1_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv1 H_conv1 = tf.contrib.layers.batch_norm(inputs = H_conv1, center=True, scale=True, is_training=True, scope="g_bn1") H_conv1 = tf.nn.relu(H_conv1) #Dimensions of H_conv1 = batch_size x 3 x 3 x 256 #Second DeConv Layer output2_shape = [batch_size, s4 - 1, s4 - 1, g_dim*2] W_conv2 = tf.get_variable('g_wconv2', [5, 5, output2_shape[-1], int(H_conv1.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv2 = tf.get_variable('g_bconv2', [output2_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv2 = tf.nn.conv2d_transpose(H_conv1, W_conv2, output_shape=output2_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv2 H_conv2 = tf.contrib.layers.batch_norm(inputs = H_conv2, center=True, scale=True, is_training=True, scope="g_bn2") H_conv2 = tf.nn.relu(H_conv2) #Dimensions of H_conv2 = batch_size x 6 x 6 x 128 #Third DeConv Layer output3_shape = [batch_size, s2 - 2, s2 - 2, g_dim*1] W_conv3 = tf.get_variable('g_wconv3', [5, 5, output3_shape[-1], int(H_conv2.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv3 = tf.get_variable('g_bconv3', [output3_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv3 = tf.nn.conv2d_transpose(H_conv2, W_conv3, output_shape=output3_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv3 H_conv3 = tf.contrib.layers.batch_norm(inputs = H_conv3, center=True, scale=True, is_training=True, scope="g_bn3") H_conv3 = tf.nn.relu(H_conv3) #Dimensions of H_conv3 = batch_size x 12 x 12 x 64 #Fourth DeConv Layer output4_shape = [batch_size, s, s, c_dim] W_conv4 = tf.get_variable('g_wconv4', [5, 5, output4_shape[-1], int(H_conv3.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv4 = tf.get_variable('g_bconv4', [output4_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv4 = tf.nn.conv2d_transpose(H_conv3, W_conv4, output_shape=output4_shape, strides=[1, 2, 2, 1], padding='VALID') + b_conv4 H_conv4 = tf.nn.tanh(H_conv4) #Dimensions of H_conv4 = batch_size x 28 x 28 x 1 return H_conv4
def _construct_nn(self, use_batch_norm, seperate_validation): tf.reset_default_graph() clear_start([self._ld]) if self._random_state is not None: if self._verbose: print('seed is fixed to {}'.format(self._random_state)) tf.set_random_seed(self._random_state) np.random.seed(self._random_state) layers = [] self._input_ph = tf.placeholder(tf.float32, shape=[None, self.structure[0]], name='input') self._dropout_keep_rate = tf.placeholder_with_default(1., shape=None, name='keep_rate') self._train_mode = tf.placeholder_with_default(False, shape=None, name='train_mode') layers.append(self._input_ph) j = 1 with tf.variable_scope('autoencoder'): for i, n_neurons in enumerate(self.structure[1:-1]): if j == 1: x = tf.layers.dense(self._input_ph, n_neurons, name='hidden_%s' % j, kernel_initializer=tf.truncated_normal_initializer()) else: x = tf.layers.dense(x, n_neurons, name='hidden_%s' % j, kernel_initializer=tf.truncated_normal_initializer()) if use_batch_norm: x = tf.layers.batch_normalization(x, axis=1, training=self._train_mode, scale=False) layers.append(x) x = self.activation_fn(x) layers.append(x) x = tf.layers.dropout(x, tf.subtract(1., self._dropout_keep_rate), name='dropout_%s' % j) layers.append(x) if j == self.encoding_layer_index: x = tf.identity(x, name='encoding') self._encoding = x j += 1 self._output = tf.layers.dense(x, self.structure[-1], name='output', kernel_initializer=tf.truncated_normal_initializer()) self._labels = tf.placeholder(tf.float32, shape=[None, self.structure[-1]], name='label') layers.append(self._output) if self._cpu_only: with tf.device('/cpu:{}'.format(self._cpu_number)): sess = tf.Session(config=self._config) if seperate_validation: self._train_writer = tf.summary.FileWriter(self._ld + 'train/', sess.graph) self._val_writer = tf.summary.FileWriter(self._ld + 'val/', sess.graph) else: self._train_writer = tf.summary.FileWriter(self._ld, sess.graph) else: with tf.device('/gpu:{}'.format(self._gpu_number)): sess = tf.Session(config=self._config) if seperate_validation: self._train_writer = tf.summary.FileWriter(self._ld + 'train/', sess.graph) self._val_writer = tf.summary.FileWriter(self._ld + 'val/') else: self._train_writer = tf.summary.FileWriter(self._ld, sess.graph) self._sess = sess self._network = layers
def discriminator(images, reuse=False): """ Create the discriminator network :param image: Tensor of input image(s) :param reuse: Boolean if the weights should be reused :return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator) """ # TODO: Implement Function # Using dropouts in discrimnator so as to weaken it's learning model for the data distribution # as well as help generalise better. keep_probability = 0.8 with tf.variable_scope('discriminator', reuse=reuse): # Input layer is 28x28x1 for MNIST or 28x28x3 for CelebA # This is the 1st layer # No batch normalization on first layer x1 = tf.layers.conv2d( images, 128, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) x1 = tf.nn.dropout(x1, keep_probability) relu1 = LeakyReLU(x1) # 14x14x128 now #print('discriminator x1 shape ', x1.shape) # This is the 2nd layer x2 = tf.layers.conv2d( relu1, 256, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) x2 = tf.nn.dropout(x2, keep_probability) bn2 = tf.layers.batch_normalization(x2, training=True) relu2 = LeakyReLU(bn2) # 7x7x256 now #print('discriminator x2 shape ', x2.shape) # This is the 3rd layer x3 = tf.layers.conv2d( relu2, 512, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) x3 = tf.nn.dropout(x3, keep_probability) bn3 = tf.layers.batch_normalization(x3, training=True) relu3 = LeakyReLU(bn3) # 4x4x512 now #print('discriminator x3 shape ', x3.shape) # This is the 4th fully-connected layer # Flatten it #flat = tf.reshape(relu2, (-1, 7*7*128)) before adding additional layer flat = tf.reshape(relu3, (-1, 4 * 4 * 512)) logits = tf.layers.dense( flat, 1, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)) logits = tf.nn.dropout(logits, keep_probability) out = tf.sigmoid(logits) return out, logits
def generator(z, out_channel_dim, is_train=True): """ Create the generator network :param z: Input z :param out_channel_dim: The number of channels in the output image :param is_train: Boolean if generator is being used for training :return: The tensor output of the generator """ # TODO: Implement Function keep_probability = 0.9 # eliminating dropout weakens generalization in Generator # print(out_channel_dim) <- value of 5 in unit test #output_dim = (28, 28, out_channel_dim) with tf.variable_scope( 'generator', reuse=not is_train): # <- not sure if reuse during training # 1st fully connected layer x1 = tf.layers.dense( z, 7 * 7 * 512, kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)) # Reshape it to start the convolutional stack x1 = tf.reshape(x1, (-1, 7, 7, 256)) #x1 = tf.nn.dropout(x1, keep_probability) x1 = tf.layers.batch_normalization(x1, training=is_train) x1 = LeakyReLU(x1) # 7x7x512 now # This is the 2nd layer x2 = tf.layers.conv2d_transpose( x1, 256, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) #x2 = tf.nn.dropout(x2, keep_probability) x2 = tf.layers.batch_normalization(x2, training=is_train) x2 = LeakyReLU(x2) # 14x14x256 now # This is the 3rd layer x3 = tf.layers.conv2d_transpose( x2, 128, 5, strides=2, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) #x3 = tf.nn.dropout(x3, keep_probability) x3 = tf.layers.batch_normalization(x3, training=is_train) x3 = LeakyReLU(x3) # 28x28x128 now # This is the 4th, output layer # stides=1 because no upscaling logits = tf.layers.conv2d_transpose( x3, out_channel_dim, 5, strides=1, padding='same', kernel_initializer=tf.contrib.layers.xavier_initializer()) #logits = tf.nn.dropout(logits, keep_probability) # 28x28xout_channel_dim now out = tf.tanh(logits) return out
def create_initializer(initializer_range=0.02): """Creates a `truncated_normal_initializer` with the given range.""" return tf.truncated_normal_initializer(stddev=initializer_range)
def get_weight(self, shape, name): return tf.get_variable(shape=shape, initializer=tf.truncated_normal_initializer(stddev=0.01), name=name)
def __init__( self, inputs, inputs_depth, z, input_sequence_length, output_sequence_length, cell_type='gru', project_to_rnn_output=False, reverse_input=False, use_attention=False, use_residual=True, bias_initializer=tf.constant_initializer(0.), kernel_initializer=tf.truncated_normal_initializer(stddev=0.001), reuse=False): ''' Initialize the generative network. Args: inputs(tf.placeholder): The input variable containing current data. inputs_depth(int): input embed size. z(tf.placeholder, optional): A random generated input vector used as input. input_sequence_length(int): the length of the input sequence. output_sequence_length(int): the length of the resulted sequence. cell_type(str): The type of cell to use for the encode and decoder. project_to_rnn_output(bool): project the input to the number of hidden unit in the RNN. reverse_input(bool): reverse the input sequence before feeding it to the network. use_attention(bool): true to use attention instead of the last state of the encoder. use_residual(bool): use resent like structure for the recurrent. bias_initializer: initializer for the bias value. kernel_initializer: initializer for the `W` parameters. reuse(bool): True to reuse model parameters from a previously created model. ''' self._reuse = reuse self._batch_size = tf.shape(inputs)[0] # batch_size self._input_sequence_length = input_sequence_length self._output_sequence_length = output_sequence_length self._inputs_depth = inputs_depth self._inputs_shape = inputs.shape self._element_shape = inputs.shape[2:].as_list() self._output = None self._parameters = [] self._weights = [] self._num_neurons = 1024 self._num_layers = 2 self._num_nn_layers = 2 self._cell_type = cell_type self._bias_initializer = bias_initializer self._kernel_initializer = kernel_initializer self._reccurent_bias_initializer = None self._reccurent_kernel_initializer = None self._project_to_rnn_output = project_to_rnn_output self._use_attention = use_attention self._use_residual = use_residual if self._use_residual: self._project_to_rnn_output = True # Similar to tf.zeros but support variable batch size. if self._project_to_rnn_output: self._zeros_input = tf.fill( tf.stack([tf.shape(inputs)[0], self._num_neurons]), 0.0) else: self._zeros_input = tf.fill( tf.stack([tf.shape(inputs)[0], self._inputs_depth]), 0.0) if reverse_input: inputs = tf.reverse(inputs, axis=[1]) self._build(inputs, z)
def __call__(self, inputs, state, scope=None): """ recur*: r state*: mu stats*: phi freq*: frequency vector """ with tf.variable_scope(scope or type(self).__name__): self._freqs = tf.reshape(tf.get_variable("frequency", initializer=self._freqs_array, trainable=False), [1, -1, 1]) self._phases = tf.reshape(tf.get_variable("phase", [self._nfreqs], initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32), trainable=True), [1, -1, 1]) self._freqs_mask = tf.reshape(tf.get_variable("frequency_mask", initializer=self._freqs_mask_array, trainable=False), [1, -1, 1]) # Make statistics on input. if self._recur_dims > 0: """ r_t = f(W^r mu_{t-1} + b^r) """ recur_output = self._activation(_linear( state, self._recur_dims, True, scope='recur_feats' ), name='recur_feats_act') """ phi_t = W^phi r_t + W^x x_t + b^phi """ stats = self._activation(_linear( [inputs, recur_output], self._num_stats, True, scope='stats', ), name='stats_act') else: stats = self._activation(_linear( inputs, self._num_stats, True, scope='stats' ), name='stats_act') # Compute moving averages of statistics for the state. with tf.variable_scope('out_state'): state_tensor = tf.reshape( state, [-1, self._nfreqs, self._num_stats], 'state_tensor' ) stats_tensor = tf.reshape( stats, [-1, 1, self._num_stats], 'stats_tensor' ) """ mu_t = mask*mu_{t-1} + cos(2*pi*w*t/T + 2*pi*phase)*phi_t """ out_state = tf.reshape(self._freqs_mask*state_tensor + 1.0/self._seq_len*tf.cos(2.0*math.pi/self._seq_len*self.cur_time_step*self._freqs + 2.0*math.pi*self._phases)*stats_tensor, [-1, self.state_size], 'out_state') # Compute the output. if self._include_input: output_vars = [out_state, inputs] else: output_vars = out_state """ o_t = W^o mu_t + b^o """ output = _linear( output_vars, self._output_dims, True, scope='output' ) if not self._linear_out: output = self._activation(output, name='output_act') # update time step self.next_time_step() # Retrieve RNN Variables if not self.W: with tf.variable_scope('recur_feats', reuse=True): self.W.append(tf.get_variable('Matrix')) self.b.append(tf.get_variable('Bias')) with tf.variable_scope('stats', reuse=True): self.W.append(tf.get_variable('Matrix')) self.b.append(tf.get_variable('Bias')) with tf.variable_scope('output', reuse=True): self.W.append(tf.get_variable('Matrix')) self.b.append(tf.get_variable('Bias')) print("W = ", self.W) print("b = ", self.b) """ o_t and mu_t """ return (output, out_state)
def inference(images, batch_size, n_classes): with tf.variable_scope('conv1') as scope: weights = tf.get_variable('weight', shape=[3, 3, 3, 16], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.1, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) with tf.variable_scope('pooling_lrn') as scope: pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1') norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, bata=0.75, name='norm1') with tf.variable_scope('conv2') as scope: weights = tf.get_variable('weight', shape=[3, 3, 3, 16], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.1, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name='conv2') with tf.variable_scope('pooling2_lrn') as scope: norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, bata=0.75, name='norm2') pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME', name='pooling2') with tf.variable_scope('local3') as scope: reshape = tf.reshape(pool2, shape=[batch_size, -1]) dim = reshape.get_shape()[1].value weights = tf.get_variable('weights', shape=[dim, 128], dtype=tf.float32, initializer=tf.constant_initializer( stddev=0.005, dtype=tf.flfloat32)) biases = tf.get_variable('biases', shape=[128], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) with tf.variable_scope('local4') as scope: weights = tf.get_variable('weights', shape=[128, 128], dtype=tf.float32, initializer=tf.constant_initializer( stddev=0.005, dtype=tf.flfloat32)) biases = tf.get_variable('biases', shape=[128], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.005, dtype=tf.float32)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4') with tf.variable_scope('softmax_linear') as scope: weights = tf.get_variable('softmax_linear', shape=[128, n_classes], dtype=tf.float32, initializer=tf.constant_initializer( stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear') return softmax_linear
# gamespace display = False game=Environment.GameV1(display) game.populateGameArray() prev_x = None xs, rs, rs2, ys, vel, velavg = [], [], [], [], [], [] running_reward = None reward_sum = 0 observation = np.zeros(shape=(200,300)) episode_number = 0 WillContinue = True # initialize model tf_model = {} with tf.variable_scope('layer_one', reuse=False): xavier_l1 = tf.truncated_normal_initializer(mean=0, stddev=1. / np.sqrt(n_obs), dtype=tf.float32) tf_model['W1'] = tf.get_variable("W1", [n_obs, h], initializer=xavier_l1) with tf.variable_scope('layer_two', reuse=False): xavier_l2 = tf.truncated_normal_initializer(mean=0, stddev=1. / np.sqrt(h), dtype=tf.float32) tf_model['W2'] = tf.get_variable("W2", [h, n_actions], initializer=xavier_l2) def discount_rewards(rewardarray, velocityarray): rewardarray.reverse() velocityarray.reverse() gamenumber = 0 for i in range(len(rewardarray)): if rewardarray[i] != 0: if rewardarray[i] > 0: rewardarray[i] = (len(rewardarray)-i)/300 * rewardarray[0] gamenumber +=1 else: rewardarray[i] = rewardarray[i] * math.pow(6-velocityarray[gamenumber], (300-len(rewardarray)+i)/300)
# -*- coding: utf-8 -* - ''' 使用arg scope在不同的层之间共享参数值 ''' import tensorflow.contrib.slim as slim import tensorflow as tf input = slim.variable( "input", [1, 28, 28, 3], weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) # 以下三个卷积层共用很多超参数,读起来比较晦涩 net = slim.conv2d( input, 64, [11, 11], 4, padding='SAME', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), scope='conv1') net = slim.conv2d( net, 128, [11, 11], padding='VALID', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), scope='conv2') net = slim.conv2d( net, 256, [11, 11], padding='SAME',
# He : tf.contrib.layers.variance_scaling_initializer() # Normal : tf.random_normal_initializer(mean=0.0, stddev=0.02) # Truncated_normal : tf.truncated_normal_initializer(mean=0.0, stddev=0.02) # Orthogonal : tf.orthogonal_initializer(0.02) ################################################################################## # Regularization ################################################################################## # l2_decay : tf.contrib.layers.l2_regularizer(0.0001) # orthogonal_regularizer : orthogonal_regularizer(0.0001) # orthogonal_regularizer_fully(0.0001) # factor, mode, uniform = pytorch_xavier_weight_factor(gain=0.02, uniform=False) # weight_init = tf_contrib.layers.variance_scaling_initializer(factor=factor, mode=mode, uniform=uniform) weight_init = tf.truncated_normal_initializer(mean=0.0, stddev=0.02) weight_regularizer = tf.contrib.layers.l2_regularizer(0.0001) weight_regularizer_fully = tf.contrib.layers.l2_regularizer(0.0001) ################################################################################## # Layers ################################################################################## # padding='SAME' ======> pad = floor[ (kernel - stride) / 2 ] def conv(x, channels, kernel=4, stride=2, pad=0, pad_type='zero', use_bias=True, sn=False, scope='conv_0'): with tf.variable_scope(scope): if pad > 0: h = x.get_shape().as_list()[1] if h % stride == 0: pad = pad * 2 else:
def generator2(input, is_train, reuse=False): c2, c4, c8, c16 = 32, 64, 128, 256 # channel num: 64, 128, 256, 512 output_dim = CHANNEL with tf.variable_scope('gene') as scope: if reuse: scope.reuse_variables() #Convolution, activation, bias, repeat! conv1 = tf.layers.conv2d(input, c2, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv1') #regularisation layer in every convolution. bn1 = tf.contrib.layers.batch_norm(conv1, is_training = is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope = 'bn1') act1 = lrelu(bn1, n='act1') act1 = tf.nn.dropout(act1, keep_prob=0.5) #Convolution, activation, bias, repeat! conv2 = tf.layers.conv2d(act1, c4, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv2') bn2 = tf.contrib.layers.batch_norm(conv2, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn2') act2 = lrelu(bn2, n='act2') act2 = tf.nn.dropout(act2, keep_prob=0.5) #Convolution, activation, bias, repeat! conv3 = tf.layers.conv2d(act2, c8, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv3') bn3 = tf.contrib.layers.batch_norm(conv3, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn3') act3 = lrelu(bn3, n='act3') act3 = tf.nn.dropout(act3, keep_prob=0.5) #Convolution, activation, bias, repeat! conv4 = tf.layers.conv2d(act3, c16, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv4') bn4 = tf.contrib.layers.batch_norm(conv4, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn4') act4 = lrelu(bn4, n='act4') act4 = tf.nn.dropout(act4, keep_prob=0.5) #deconvolution, activation, bias, repeat! conv5 = tf.layers.conv2d_transpose(act4, c8, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv5') bn5 = tf.contrib.layers.batch_norm(conv5, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn5') act5 = tf.nn.relu(bn5, name='act5') #deconvolution, activation, bias, repeat! conv6 = tf.layers.conv2d_transpose(act5, c4, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv6') bn6 = tf.contrib.layers.batch_norm(conv6, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn6') act6 = tf.nn.relu(bn6, name='act6') #deconvolution, activation, bias, repeat! conv7 = tf.layers.conv2d_transpose(act6, c2, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv7') bn7 = tf.contrib.layers.batch_norm(conv7, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn7') act7 = tf.nn.relu(bn7, name='act7') #deconvolution, activation, bias, repeat! conv8 = tf.layers.conv2d_transpose(act7, output_dim, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv8') bn8 = tf.contrib.layers.batch_norm(conv8, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn8') act8 = tf.nn.relu(bn8, name='act8') return act8
def msra_initializer(kl, dl): """ kl for kernel size, dl for filter number """ stddev = math.sqrt(2. / (kl**2 * dl)) return tf.truncated_normal_initializer(stddev=stddev)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Contains the definition for inception v1 classification network.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf slim = tf.contrib.slim trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) from tensorboxresnet.utils import tf_concat def inception_v1_base(inputs, final_endpoint='Mixed_5c', scope='InceptionV1'): """Defines the Inception V1 base architecture. This architecture is defined in: Going deeper with convolutions Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. http://arxiv.org/pdf/1409.4842v1.pdf. Args: inputs: a tensor of size [batch_size, height, width, channels].
def generator(z, batch_size, z_dim, reuse=False): if (reuse): tf.get_variable_scope().reuse_variables() g_dim = 64 #Number of filters of first layer of generator c_dim = 1 #Color dimension of output (MNIST is grayscale, so c_dim = 1 for us) s = 28 #Output size of the image s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int( s / 16) #We want to slowly upscale the image, so these values will help #make that change gradual. h0 = tf.reshape(z, [batch_size, s16 + 1, s16 + 1, 25]) h0 = tf.nn.relu(h0) #Dimensions of h0 = batch_size x 2 x 2 x 25 #First DeConv Layer output1_shape = [batch_size, s8, s8, g_dim * 4] W_conv1 = tf.get_variable( 'g_wconv1', [5, 5, output1_shape[-1], int(h0.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv1 = tf.get_variable('g_bconv1', [output1_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv1 = tf.nn.conv2d_transpose(h0, W_conv1, output_shape=output1_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv1 H_conv1 = tf.contrib.layers.batch_norm(inputs=H_conv1, center=True, scale=True, is_training=True, scope="g_bn1") H_conv1 = tf.nn.relu(H_conv1) #Dimensions of H_conv1 = batch_size x 3 x 3 x 256 #Second DeConv Layer output2_shape = [batch_size, s4 - 1, s4 - 1, g_dim * 2] W_conv2 = tf.get_variable( 'g_wconv2', [5, 5, output2_shape[-1], int(H_conv1.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv2 = tf.get_variable('g_bconv2', [output2_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv2 = tf.nn.conv2d_transpose(H_conv1, W_conv2, output_shape=output2_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv2 H_conv2 = tf.contrib.layers.batch_norm(inputs=H_conv2, center=True, scale=True, is_training=True, scope="g_bn2") H_conv2 = tf.nn.relu(H_conv2) #Dimensions of H_conv2 = batch_size x 6 x 6 x 128 #Third DeConv Layer output3_shape = [batch_size, s2 - 2, s2 - 2, g_dim * 1] W_conv3 = tf.get_variable( 'g_wconv3', [5, 5, output3_shape[-1], int(H_conv2.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv3 = tf.get_variable('g_bconv3', [output3_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv3 = tf.nn.conv2d_transpose(H_conv2, W_conv3, output_shape=output3_shape, strides=[1, 2, 2, 1], padding='SAME') + b_conv3 H_conv3 = tf.contrib.layers.batch_norm(inputs=H_conv3, center=True, scale=True, is_training=True, scope="g_bn3") H_conv3 = tf.nn.relu(H_conv3) #Dimensions of H_conv3 = batch_size x 12 x 12 x 64 #Fourth DeConv Layer output4_shape = [batch_size, s, s, c_dim] W_conv4 = tf.get_variable( 'g_wconv4', [5, 5, output4_shape[-1], int(H_conv3.get_shape()[-1])], initializer=tf.truncated_normal_initializer(stddev=0.1)) b_conv4 = tf.get_variable('g_bconv4', [output4_shape[-1]], initializer=tf.constant_initializer(.1)) H_conv4 = tf.nn.conv2d_transpose(H_conv3, W_conv4, output_shape=output4_shape, strides=[1, 2, 2, 1], padding='VALID') + b_conv4 H_conv4 = tf.nn.tanh(H_conv4) #Dimensions of H_conv4 = batch_size x 28 x 28 x 1 return H_conv4
def inference(images, num_classes, dropout_keep_prob=0.6, is_training=True, reuse=False): # Convolutional Layer #1 # Computes 16 features using a 3x3 filter with ReLU activation. # Padding is added to preserve width and height. # Input Tensor Shape: [batch_size, 200, 200, 3] # Output Tensor Shape: [batch_size, 200, 200, 16] with tf.variable_scope('conv1') as scope: weights = tf.get_variable('weights', shape=[3, 3, 3, 16], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.1, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) #pool1 and norm1 with tf.variable_scope('pool1') as scope: pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling1') # norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0, # beta=0.75,name='norm1') #conv2 with tf.variable_scope('conv2') as scope: weights = tf.get_variable('weights', shape=[3, 3, 16, 16], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.1, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[16], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) conv = tf.nn.conv2d(pool1, weights, strides=[1, 1, 1, 1], padding='SAME') pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name='conv2') #pool2 and norm2 with tf.variable_scope('pool2') as scope: # norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001/9.0, # beta=0.75,name='norm2') pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pooling2') #local3 with tf.variable_scope('local3') as scope: b, x, y, n = pool2.shape dim = x.value * y.value * n.value reshape = tf.reshape(pool2, shape=[-1, dim]) dim = reshape.get_shape()[1].value weights = tf.get_variable('weights', shape=[dim, 128], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[128], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) # Add dropout operation; 0.7 probability that element will be kept with tf.variable_scope('dropout') as scope: dropout = tf.layers.dropout(inputs=local3, rate=1 - dropout_keep_prob, training=is_training) # #local4 # with tf.variable_scope('local4') as scope: # weights = tf.get_variable('weights', # shape=[128,128], # dtype=tf.float32, # initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32)) # biases = tf.get_variable('biases', # shape=[128], # dtype=tf.float32, # initializer=tf.constant_initializer(0.1)) # local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4') # softmax with tf.variable_scope('logits') as scope: weights = tf.get_variable('weights', shape=[128, num_classes], dtype=tf.float32, initializer=tf.truncated_normal_initializer( stddev=0.005, dtype=tf.float32)) biases = tf.get_variable('biases', shape=[num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.1)) logits = tf.add(tf.matmul(dropout, weights), biases, name='logits') end_points = { 'Classes': tf.argmax(input=logits, axis=1, name='Classes'), 'Predictions': tf.nn.softmax(logits, name='Predictions') } return logits, end_points
def __init__(self, hps, init_emb=None): # Create the model self.hps = hps self.enc_len = hps.bucket[0] self.dec_len = hps.bucket[1] self.mode = self.hps.mode self.keep_prob = tf.placeholder(tf.float32) self.global_step = tf.Variable(0, trainable=False) self.zeros = np.zeros((self.hps.batch_size, self.hps.his_mem_slots), dtype=np.float32) self.gama = tf.constant(20.0) self.learning_rate = tf.Variable(float(self.hps.learning_rate), trainable=False) self.learning_rate_decay_op = \ self.learning_rate.assign(self.learning_rate * self.hps.decay_rate) self.b_size = self.hps.batch_size if self.mode == 'train' else 1 # Random bias for memory writing init_val = [] v = 1.0 for i in xrange(0, self.hps.his_mem_slots): init_val.append(v) v /= 5.0 bias_val = [] for i in xrange(0, self.b_size): if self.b_size > 1: random.shuffle(init_val) bias_val.append(np.array(init_val+[0.0])) bias_val = np.array(bias_val) print ("Shape of random bias: %s" % (str(np.shape(bias_val)))) #[b_size,his_mem_slots+1] self.random_bias = tf.constant(bias_val, dtype=tf.float32) # The null slot null_mem = np.zeros([self.b_size, self.hps.his_mem_size], dtype=np.float32) - 1e-2 self.null_mem = np.expand_dims(null_mem, 1) #[batch_size,1,his_mem_size] 注意区分np和tf的expand_dim # Build the graph self.__build_placeholders() # Build word embedding with tf.variable_scope('word_embedding'), tf.device('/cpu:0'): if init_emb is not None: print ("Initialize embedding with pre-trained word2vec.") initializer = tf.constant_initializer(init_emb) else: print ("Initialize embedding with normal distribution.") initializer = tf.truncated_normal_initializer(stddev=1e-4) word_emb = tf.get_variable('word_emb', [self.hps.vocab_size, self.hps.emb_size], dtype=tf.float32, initializer=initializer, trainable= True) self.emb_enc_inps = [ [tf.nn.embedding_lookup(word_emb, x) for x in enc_inp] for enc_inp in self.enc_inps] #[None,emb_size] self.emb_dec_inps = [ [tf.nn.embedding_lookup(word_emb, x) for x in dec_inp] for dec_inp in self.dec_inps] #[None,emb_size] self.emb_key_inps = [ [tf.nn.embedding_lookup(word_emb, x) for x in self.key_inps[i] ] #[None,emb_size] for i in xrange(0, self.hps.key_slots)] # Build genre embedding with tf.variable_scope('ph_embedding'), tf.device('/cpu:0'): # NOTE: we set fixed 36 phonology categories ph_emb = tf.get_variable('ph_emb', [36, self.hps.ph_emb_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4)) emb_ph_inps = [ [tf.nn.embedding_lookup(ph_emb, x) for x in ph_inp] for ph_inp in self.ph_inps] #[None,ph_emb_size] with tf.variable_scope('len_embedding'), tf.device('/cpu:0'): len_emb = tf.get_variable('len_emb', [self.dec_len+1, self.hps.len_emb_size], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=1e-4)) emb_len_inps = [ [tf.nn.embedding_lookup(len_emb, x) for x in len_inp] for len_inp in self.len_inps] #[None,len_emb_size] # Concatenate phonology embedding and length embedding to form the genre embedding self.emb_genre = [[] for x in xrange(self.hps.sens_num)] for step in xrange(0, self.hps.sens_num): for i in xrange(self.dec_len+1): self.emb_genre[step].append(array_ops.concat([emb_ph_inps[step][i], emb_len_inps[step][i]], 1) ) # Cells for the encoder enc_cell_fw = tf.nn.rnn_cell.GRUCell(self.hps.hidden_size) enc_cell_bw = tf.nn.rnn_cell.GRUCell(self.hps.hidden_size) self.enc_cell_fw = tf.nn.rnn_cell.DropoutWrapper(enc_cell_fw, output_keep_prob=self.keep_prob, input_keep_prob = self.keep_prob) self.enc_cell_bw = tf.nn.rnn_cell.DropoutWrapper(enc_cell_bw, output_keep_prob=self.keep_prob, input_keep_prob = self.keep_prob) if self.hps.mode == 'train': self.build_train_graph() else: self.build_gen_graph() # saver self.saver = tf.train.Saver(tf.global_variables() , write_version=tf.train.SaverDef.V1)
def inference(input_tensor, train, regularizer): with tf.variable_scope('layer1-conv1'): conv1_weights = tf.get_variable( "weight", [CONV1_LENGTH, CONV1_WIDE, NUM_CHANNELS, CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable( "bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) with tf.name_scope("layer2-pool1"): pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") with tf.variable_scope("layer3-conv2"): conv2_weights = tf.get_variable( "weight", [CONV2_LENGTH, CONV2_WIDE, CONV1_DEEP, CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable( "bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) with tf.name_scope("layer4-pool2"): pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2, [-1, nodes]) with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable( "weight", [nodes, FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights)) fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1, dr) with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable( "weight", [FC_SIZE, NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1)) logit = tf.matmul(fc1, fc2_weights) + fc2_biases return logit
def linear(x, hidden, name='linear'): # x : [batch, hi] with tf.variable_scope(name): weight = tf.get_variable('weight', [x. get_shape()[-1], hidden], initializer=tf.truncated_normal_initializer(stddev=0.02)) bias = tf.get_variable('bias', [hidden], initializer=tf.constant_initializer(0)) weighted_sum = tf.matmul(x, weight) + bias return weighted_sum
dim_hop1 = L11 * 2 # add self-loop for i in range(n_steps): Graphs[i, :, :] += np.eye(n_node, dtype=np.int32) Data_idx = np.arange(n_node) X_train_idx, X_test_idx, y_train, y_test = train_test_split( Data_idx, Labels, test_size=0.1) #N_tr, N_te X_train_idx, X_val_idx, y_train, y_val = train_test_split( X_train_idx, y_train, test_size=0.1) #N_tr, N_te tf.reset_default_graph() weights_att = {'W': tf.get_variable('Weights_W', shape=[r,L], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1)), \ 'V': tf.get_variable('Weights_V', shape=[L,M], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1)), \ 'w1_h1': tf.get_variable('Weights_w1_h1', shape=[2*L11], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1)), \ 'V1_h1': tf.get_variable('Weights_V1_h1', shape=[L11,n_dim], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1)), \ 'w1_h0': tf.get_variable('Weights_w1_h0', shape=[2*L10], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1)), \ 'V1_h0': tf.get_variable('Weights_V1_h0', shape=[L10,L11], dtype=tf.float64, \ initializer=tf.truncated_normal_initializer(mean=-0.1, stddev=0.1))} W, V = weights_att['W'], weights_att['V'] w1_h0, V1_h0 = weights_att['w1_h0'], weights_att['V1_h0'] w1_h1, V1_h1 = weights_att['w1_h1'], weights_att['V1_h1']
def conv2d(x, output_dim, filter_height=5, filter_width=5, stride_hor=2, stride_ver=2, name='conv2d'): with tf.variable_scope(name): filter = tf.get_variable('filter', [filter_height, filter_width, x.get_shape()[-1], output_dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) convolution = tf.nn.conv2d(x, filter, strides=[1,stride_hor, stride_ver, 1], padding='SAME') bias = tf.get_variable('bias', [output_dim], initializer=tf.constant_initializer(0)) weighted_sum = convolution + bias return weighted_sum
def generator(input, is_train, reuse=False): c2, c4, c8, c16 = 32, 64, 128, 256 # channel num: 64, 128, 256, 512 output_dim = CHANNEL with tf.variable_scope('gen') as scope: if reuse: scope.reuse_variables() # Convolution, activation, bias, repeat! conv1 = tf.layers.conv2d(input, c2, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv1') # Regularisation layer in every convolution. bn1 = tf.contrib.layers.batch_norm(conv1, is_training = is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope = 'bn1') act1 = lrelu(bn1, n='act1') act1 = tf.nn.dropout(act1, keep_prob=0.5) #TODO: Can we explain why act1 is being reassigned immediately? Same for all activation layers below... #Convolution, activation, bias, repeat! conv2 = tf.layers.conv2d(act1, c4, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv2') bn2 = tf.contrib.layers.batch_norm(conv2, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn2') act2 = lrelu(bn2, n='act2') act2 = tf.nn.dropout(act2, keep_prob=0.5) #Convolution, activation, bias, repeat! conv3 = tf.layers.conv2d(act2, c8, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv3') bn3 = tf.contrib.layers.batch_norm(conv3, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn3') act3 = lrelu(bn3, n='act3') act3 = tf.nn.dropout(act3, keep_prob=0.5) #Convolution, activation, bias, repeat! conv4 = tf.layers.conv2d(act3, c16, kernel_size=[5, 5], strides=[2, 2], padding="SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name='conv4') bn4 = tf.contrib.layers.batch_norm(conv4, is_training=is_train, epsilon=1e-5, decay = 0.9, updates_collections=None, scope='bn4') act4 = lrelu(bn4, n='act4') act4 = tf.nn.dropout(act4, keep_prob=0.5) #deconvolution, activation, bias, repeat! conv5 = tf.layers.conv2d_transpose(act4, c8, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv5') bn5 = tf.contrib.layers.batch_norm(conv5, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn5') act5 = tf.nn.relu(bn5, name='act5') #deconvolution, activation, bias, repeat! conv6 = tf.layers.conv2d_transpose(act5, c4, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv6') bn6 = tf.contrib.layers.batch_norm(conv6, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn6') act6 = tf.nn.relu(bn6, name='act6') #deconvolution, activation, bias, repeat! conv7 = tf.layers.conv2d_transpose(act6, c2, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv7') bn7 = tf.contrib.layers.batch_norm(conv7, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn7') act7 = tf.nn.relu(bn7, name='act7') #deconvolution, activation, bias, repeat! conv8 = tf.layers.conv2d_transpose(act7, output_dim, kernel_size=[5,5], strides =[2,2], padding = "SAME", kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), name ='conv8') bn8 = tf.contrib.layers.batch_norm(conv8, is_training=is_train, epsilon=1e-5, decay =0.9, updates_collections=None, scope='bn8') act8 = tf.nn.relu(bn8, name='act8') return act8 # Return generated image (eventually we want this generator to take in one image and output another that's what we're training it to do)
def main(args): logging.info('###### all args #####: %s' % args) network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory if not args.no_store_revision_info: src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, args.filter_filename, args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) logging.info('Model directory: %s' % model_dir) logging.info('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) logging.info('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: logging.info('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list)>0, 'The dataset should not be empty' logging.info('image_list size %d, label_list size %d' %(len(image_list), len(label_list))) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] logging.info('labels shape %s, range size: %s' % (labels.shape, str(range_size))) index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) logging.info('batch size:%d, epoch size:%d' % (args.batch_size ,args.epoch_size)) index_dequeue_op = index_queue.dequeue_many(args.batch_size*args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None,1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1,), (1,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() logging.info('# filenames len:%s' % filenames.shape) images = [] for filename in tf.unstack(filenames): # logging.info('#file:%s' % filename) file_contents = tf.read_file(filename) image = tf.image.decode_jpeg(file_contents) logging.info('#image shape:%s' % image.shape) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) # if args.random_crop: # image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') logging.info('Total number of classes: %d' % nrof_classes) logging.info('Total number of examples: %d' % len(image_list)) logging.info('Building training graph') batch_norm_params = { # Decay for the moving averages 'decay': 0.995, # epsilon to prevent 0s in variance 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], # Only update statistics during training mode 'is_training': phase_train_placeholder } # Build the inference graph # with tf.device('/GPU:0'): prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, weight_decay=args.weight_decay) bottleneck = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='Bottleneck', reuse=False) logits = slim.fully_connected(bottleneck, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(bottleneck, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor>0.0: prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=10) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default() , tf.device('/gpu:1'): if pretrained_model: logging.info('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop logging.info('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) sess.close() return model_dir
def resnet_v1(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, spatial_squeeze=True, store_non_strided_activations=False, reuse=None, scope=None): with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with (slim.arg_scope([slim.batch_norm], decay=0.99, zero_debias_moving_mean=True, is_training=is_training) if is_training is not None else NoOpScope()): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 net = slim.batch_norm(net, decay=0.99, zero_debias_moving_mean=True, scale=True) net = slim.conv2d(net, 64, 7, stride=2, padding='SAME', normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.99, 'zero_debias_moving_mean': True }, activation_fn=tf.nn.relu, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, padding='SAME', scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection) num_types = 4 num_color = 3 types = slim.conv2d( net, num_types, [1, 1], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( mean=0, stddev=0.01), normalizer_fn=None, scope='logits_type') end_points[sc.name + '/logits_type'] = net if global_pool: types = tf.reduce_mean(types, [1, 2], name='pool5', keepdims=True) end_points['global_pool_types'] = types if spatial_squeeze: types = tf.squeeze(types, [1, 2], name='type') end_points[sc.name + '/type'] = types end_points['predictions_types'] = types color = slim.conv2d( net, num_color, [1, 1], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( mean=0, stddev=0.01), normalizer_fn=None, scope='logits') end_points[sc.name + '/logits'] = color if global_pool: color = tf.reduce_mean(color, [1, 2], name='pool6', keepdims=True) end_points['global_pool_color'] = color if spatial_squeeze: color = tf.squeeze(color, [1, 2], name='color') end_points[sc.name + '/color'] = color end_points['predictions_color'] = color return net, end_points
n,x,y,c = patches.shape batch_size = 200 xi=tf.placeholder(tf.float32, shape=[None, x, y, 1]) y_=tf.placeholder(tf.float32, shape=[None, 2]) network=tl.layers.InputLayer(xi,name='input_layer') conv1=tl.layers.Conv2dLayer(network, act=tf.nn.relu,shape=[3,3,1,32], strides=[1,1,1,1], padding='SAME', W_init=tf.truncated_normal_initializer(stddev=0.1), b_init=tf.constant_initializer(value=0.1), name='conv1') pool1=tl.layers.PoolLayer(conv1,ksize=[1, 2, 2 ,1],strides=[1,2,2,1],padding='SAME',pool=tf.nn.max_pool,name='pool1') conv2=tl.layers.Conv2dLayer(pool1, act=tf.nn.relu,shape=[3,3,32,64], strides=[1,1,1,1], padding='SAME', W_init=tf.truncated_normal_initializer(stddev=0.1), b_init=tf.constant_initializer(value=0.1), name='conv2')
def _add_seq2seq(self): """Add the whole sequence-to-sequence model to the graph.""" """在图中添加完整的seq2seq模型""" # 获取参数和词表大小 hps = self._hps vsize = self._vocab.size() with tf.variable_scope('seq2seq'): # 部分初始化 self.rand_unif_init = tf.random_uniform_initializer(-hps.rand_unif_init_mag, hps.rand_unif_init_mag, seed=123) self.trunc_norm_init = tf.truncated_normal_initializer(stddev=hps.trunc_norm_init_std) # Add embedding matrix (shared by the encoder and decoder inputs) # 添加embedding矩阵(encoder和decoder输入共享) with tf.variable_scope('embedding'): embedding = tf.get_variable('embedding', [vsize, hps.emb_dim], dtype=tf.float32, initializer=self.trunc_norm_init) if hps.mode=="train": self._add_emb_vis(embedding) # add to tensorboard emb_enc_inputs = tf.nn.embedding_lookup(embedding, self._enc_batch) # tensor with shape (batch_size, max_enc_steps, emb_size) emb_dec_inputs = [tf.nn.embedding_lookup(embedding, x) for x in tf.unstack(self._dec_batch, axis=1)] # list length max_dec_steps containing shape (batch_size, emb_size) # 添加编码层 enc_outputs, fw_st, bw_st = self._add_encoder(emb_enc_inputs, self._enc_lens) self._enc_states = enc_outputs # Our encoder is bidirectional and our decoder is unidirectional so we need to reduce the final encoder hidden state to the right size to be the initial decoder hidden state # 我们的encoder层是双向的lstm,但编码层是单向的,所以这里加上这一层,使得编码器最终的隐藏层状态能和编码器初始的隐藏层状态匹配 self._dec_in_state = self._reduce_states(fw_st, bw_st) # Add the decoder. # 添加编码层 with tf.variable_scope('decoder'): decoder_outputs, self._dec_out_state, self.attn_dists, self.p_gens, self.coverage = self._add_decoder(emb_dec_inputs) # Add the output projection to obtain the vocabulary distribution # 增加输出映射来获取词表分布 with tf.variable_scope('output_projection'): w = tf.get_variable('w', [hps.hidden_dim, vsize], dtype=tf.float32, initializer=self.trunc_norm_init) w_t = tf.transpose(w) v = tf.get_variable('v', [vsize], dtype=tf.float32, initializer=self.trunc_norm_init) vocab_scores = [] # vocab_scores is the vocabulary distribution before applying softmax. Each entry on the list corresponds to one decoder step for i,output in enumerate(decoder_outputs): if i > 0: tf.get_variable_scope().reuse_variables() # 做一个线性变化,相当于wx+v vocab_scores.append(tf.nn.xw_plus_b(output, w, v)) # apply the linear layer vocab_dists = [tf.nn.softmax(s) for s in vocab_scores] # The vocabulary distributions. List length max_dec_steps of (batch_size, vsize) arrays. The words are in the order they appear in the vocabulary file. # For pointer-generator model, calc final distribution from copy distribution and vocabulary distribution # 如果是生成模式,那么就利用词表的概率分布,注意力机制分布,重新计算最终的生成概率分布 if FLAGS.pointer_gen: final_dists = self._calc_final_dist(vocab_dists, self.attn_dists) else: # final distribution is just vocabulary distribution final_dists = vocab_dists # 如果是train和eval if hps.mode in ['train', 'eval']: # 计算损失 with tf.variable_scope('loss'): if FLAGS.pointer_gen: # Calculate the loss per step # This is fiddly; we use tf.gather_nd to pick out the probabilities of the gold target words loss_per_step = [] # will be list length max_dec_steps containing shape (batch_size) batch_nums = tf.range(0, limit=hps.batch_size) # shape (batch_size) for dec_step, dist in enumerate(final_dists): targets = self._target_batch[:,dec_step] # The indices of the target words. shape (batch_size) indices = tf.stack( (batch_nums, targets), axis=1) # shape (batch_size, 2) gold_probs = tf.gather_nd(dist, indices) # shape (batch_size). prob of correct words on this step losses = -tf.log(gold_probs) loss_per_step.append(losses) # Apply dec_padding_mask and get loss self._loss = _mask_and_avg(loss_per_step, self._dec_padding_mask) else: # baseline model self._loss = tf.contrib.seq2seq.sequence_loss(tf.stack(vocab_scores, axis=1), self._target_batch, self._dec_padding_mask) # this applies softmax internally tf.summary.scalar('loss', self._loss) # Calculate coverage loss from the attention distributions # 如果打开了coverage模式,需要计算coverage loss if hps.coverage: with tf.variable_scope('coverage_loss'): self._coverage_loss = _coverage_loss(self.attn_dists, self._dec_padding_mask) tf.summary.scalar('coverage_loss', self._coverage_loss) self._total_loss = self._loss + hps.cov_loss_wt * self._coverage_loss tf.summary.scalar('total_loss', self._total_loss) if hps.mode == "decode": # We run decode beam search mode one decoder step at a time assert len(final_dists)==1 # final_dists is a singleton list containing shape (batch_size, extended_vsize) final_dists = final_dists[0] topk_probs, self._topk_ids = tf.nn.top_k(final_dists, hps.batch_size*2) # take the k largest probs. note batch_size=beam_size in decode mode self._topk_log_probs = tf.log(topk_probs)
def deconv2d(x, output_shape, filter_height=5, filter_width=5, stride_hor=2, stride_ver=2, name='deconv2d'): with tf.variable_scope(name): filter = tf.get_variable('filter', [filter_height, filter_width, output_shape[-1], x.get_shape()[-1]], initializer=tf.truncated_normal_initializer(stddev=0.02)) deconvolution = tf.nn.conv2d_transpose(x, filter, output_shape=output_shape, strides=[1, stride_hor, stride_ver, 1]) bias = tf.get_variable('bias', [output_shape[-1]], initializer=tf.constant_initializer(0)) weighted_sum = deconvolution + bias return weighted_sum
def _build_initializer(initializer, build_for_keras=False): """Build a tf initializer from config. Args: initializer: hyperparams_pb2.Hyperparams.regularizer proto. build_for_keras: Whether the initializers should be built for Keras operators. If false builds for Slim. Returns: tf initializer. Raises: ValueError: On unknown initializer. """ initializer_oneof = initializer.WhichOneof('initializer_oneof') if initializer_oneof == 'truncated_normal_initializer': return tf.truncated_normal_initializer( mean=initializer.truncated_normal_initializer.mean, stddev=initializer.truncated_normal_initializer.stddev) if initializer_oneof == 'random_normal_initializer': return tf.random_normal_initializer( mean=initializer.random_normal_initializer.mean, stddev=initializer.random_normal_initializer.stddev) if initializer_oneof == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = enum_descriptor.values_by_number[ initializer.variance_scaling_initializer.mode].name if build_for_keras: if initializer.variance_scaling_initializer.uniform: return tf.variance_scaling_initializer( scale=initializer.variance_scaling_initializer.factor, mode=mode.lower(), distribution='uniform') else: # In TF 1.9 release and earlier, the truncated_normal distribution was # not supported correctly. So, in these earlier versions of tensorflow, # the ValueError will be raised, and we manually truncate the # distribution scale. # # It is insufficient to just set distribution to `normal` from the # start, because the `normal` distribution in newer Tensorflow versions # creates a truncated distribution, whereas it created untruncated # distributions in older versions. try: return tf.variance_scaling_initializer( scale=initializer.variance_scaling_initializer.factor, mode=mode.lower(), distribution='truncated_normal') except ValueError: truncate_constant = 0.87962566103423978 truncated_scale = initializer.variance_scaling_initializer.factor / ( truncate_constant * truncate_constant) return tf.variance_scaling_initializer( scale=truncated_scale, mode=mode.lower(), distribution='normal') else: return slim.variance_scaling_initializer( factor=initializer.variance_scaling_initializer.factor, mode=mode, uniform=initializer.variance_scaling_initializer.uniform) raise ValueError( 'Unknown initializer function: {}'.format(initializer_oneof))
def inception_v3(images, trainable=True, is_training=True, weight_decay=0.00004, stddev=0.1, dropout_keep_prob=0.8, use_batch_norm=True, batch_norm_params=None, add_summaries=True, scope="InceptionV3"): """Builds an Inception V3 subgraph for image embeddings. Args: images: A float32 Tensor of shape [batch, height, width, channels]. trainable: Whether the inception submodel should be trainable or not. is_training: Boolean indicating training mode or not. weight_decay: Coefficient for weight regularization. stddev: The standard deviation of the trunctated normal weight initializer. dropout_keep_prob: Dropout keep probability. use_batch_norm: Whether to use batch normalization. batch_norm_params: Parameters for batch normalization. See tf.contrib.layers.batch_norm for details. add_summaries: Whether to add activation summaries. scope: Optional Variable scope. Returns: end_points: A dictionary of activations from inception_v3 layers. """ # Only consider the inception model to be in training mode if it's trainable. is_inception_model_training = trainable and is_training if use_batch_norm: # Default parameters for batch normalization. if not batch_norm_params: batch_norm_params = { "is_training": is_inception_model_training, "trainable": trainable, # Decay for the moving averages. "decay": 0.9997, # Epsilon to prevent 0s in variance. "epsilon": 0.001, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], } } else: batch_norm_params = None if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, "InceptionV3", [images]) as scope: with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer( stddev=stddev), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net, end_points = inception_v3_base(images, scope=scope) with tf.variable_scope("logits"): shape = net.get_shape() net = slim.avg_pool2d(net, shape[1:3], padding="VALID", scope="pool") net = slim.dropout(net, keep_prob=dropout_keep_prob, is_training=is_inception_model_training, scope="dropout") net = slim.flatten(net, scope="flatten") # Add summaries. if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model. by david """ model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() # 从主干模型获得模型的输出 hidden_size = output_layer.shape[-1].value with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.6) labels = tf.cast(labels, tf.float32) y_indx = 0 losses = tf.constant(0.0) probabilities = [] logs = [] for i in range(20): num_labels = 4 with tf.name_scope('aspect_{}'.format(i)): output_weights = tf.get_variable( # 分类模型特有的分类层的参数 "output_weights_{}".format(i), [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( # 分类模型特有的bias "output_bias_{}".format(i), [num_labels], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) # 分类模型特有的分类层 logits = tf.nn.bias_add(logits, output_bias) logs.append(logits) prob = tf.nn.softmax(logits, axis=-1) # prediction = tf.argmax(logits, 1, name='aspect_{}_prediction'.format(i)) probabilities.append(prob) label = labels[:, y_indx: y_indx + 4] loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=label, name='aspect_{}_loss'.format(i)) loss = tf.reduce_mean(loss, name='{}_loss'.format(i)) losses += loss y_indx += 4 probabilities = tf.concat(probabilities, 1, name='probabilities') logits = tf.concat(logs, 1, name='logits') per_example_loss = losses loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
print('Current step: {}'.format(current_step)) print('Test accuracy {:g}'.format( accuracy.eval( feed_dict={ images_placeholder: data_sets['images_test'], labels_placeholder: data_sets['labels_test'] }))) print(tf.global_variables()) # print("all values %s" % sess.run(tf.global_variables())) endTime = time.time() print('Total time: {:5.2f}s'.format(endTime - beginTime)) #### Illustrated Variable #### demo_weights = tf.get_variable(name='weights', shape=[128, 64], initializer=tf.truncated_normal_initializer( stddev=1.0 / np.sqrt(float(128)))) print("demo_weights") print(demo_weights) """ # checkpoint_path = os.path.join(FLAGS.train_dir, "tf_logs") checkpoint_path = FLAGS.train_dir reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path) var_to_shape_map = reader.get_variable_to_shape_map() for key in var_to_shape_map: print("tensor_name: ", key) print(reader.get_tensor(key)) """