def mlp_param_init(dim, scheme = 'zero'):
    """
    @note: Initializes parameters to build a multi-layer perceptron with tensorflow.
        The shapes are:
            W1: [n1, n_x]
            B1: [n1, 1]
            W2: [n2, n1]
            B2: [n2, 1]
            ...
            Wl: [n_y, nl-1]
            Bl: [n_y, 1]
        
    @param dim: the number of unit in each level -- dim = [n_x, n1, n2, ..., n(l-1), n_y]    
    @param scheme: the initial scheme of Weight, including {'zero', 'xavier'}
    
    @return: parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
    """
    parameters = {}
    l = len(dim)  # the layers' count
    
    # parameter initializing (using xavier_initializer for weight)
    # (from 0 - input to l-1 - output)
    for i in range(1, l):
        if scheme == 'xavier':
            parameters['W'+str(i)] = tf.get_variable('W'+str(i), [dim[i], dim[i-1]], \
                                                     initializer = tf.contrib.layers.xavier_initializer())
        else:
            parameters['W'+str(i)] = tf.get_variable('W'+str(i), [dim[i], dim[i-1]], \
                                                     initializer = tf.zeros_initializer())     
        parameters['B'+str(i)] = tf.get_variable('B'+str(i), [dim[i], 1], \
                                                 initializer = tf.zeros_initializer())

    return parameters
Exemplo n.º 2
0
Arquivo: ops.py Projeto: gdahia/DLF
def conv2d_zeros(x,
                 width,
                 filter_size=[3, 3],
                 stride=[1, 1],
                 pad="SAME",
                 logscale_factor=3,
                 skip=1,
                 edge_bias=True,
                 name=None):
    with tf.variable_scope(name, "conv2d"):
        if edge_bias and pad == "SAME":
            x = add_edge_padding(x, filter_size)
            pad = 'VALID'

        n_in = int(x.get_shape()[3])
        stride_shape = [1] + stride + [1]
        filter_shape = filter_size + [n_in, width]
        w = tf.get_variable("W", filter_shape, tf.float32,
                            initializer=tf.zeros_initializer())
        if skip == 1:
            x = tf.nn.conv2d(x, w, stride_shape, pad, data_format='NHWC')
        else:
            assert stride[0] == 1 and stride[1] == 1
            x = tf.nn.atrous_conv2d(x, w, skip, pad)
        x += tf.get_variable("b", [1, 1, 1, width],
                             initializer=tf.ones_initializer())
        x *= tf.exp(tf.get_variable("logs",
                                    [1, width], initializer=tf.zeros_initializer()) * logscale_factor)
    return x
Exemplo n.º 3
0
def Discriminator_with_Vanilla(input_Pattern, hidden_Unit_Size = 128, label_Unit_Size = 10, is_Training  = True, reuse = False):
    with tf.variable_scope('discriminator', reuse=reuse):
        hidden_Activation = tf.layers.dense(
            inputs = input_Pattern,
            units = hidden_Unit_Size,
            activation = tf.nn.relu,
            use_bias = True,
            kernel_initializer = tf.truncated_normal_initializer(stddev=0.1),
            bias_initializer = tf.zeros_initializer(),
            name = "hidden"
            )
        discrimination_Logits = tf.layers.dense(
            inputs = hidden_Activation,
            units = 1,
            activation = None,
            use_bias = True,
            kernel_initializer = tf.truncated_normal_initializer(stddev=0.1),
            bias_initializer = tf.zeros_initializer(),
            name = "discrimination"
            )
        discrimination_Activation = tf.nn.sigmoid(discrimination_Logits);

        label_Logits = tf.layers.dense(
            inputs = hidden_Activation,
            units = label_Unit_Size,
            activation = None,
            use_bias = True,
            kernel_initializer = tf.truncated_normal_initializer(stddev=0.1),
            bias_initializer = tf.zeros_initializer(),
            name = "label"
            )
        label_Activation = tf.nn.softmax(label_Logits);

        return discrimination_Logits, label_Logits, discrimination_Activation, label_Activation;
def initialize_parameters():
    """
    Initializes parameters to build a neural network with tensorflow. The shapes are:
                        W1 : [n_hidden_1, n_input]
                        b1 : [n_hidden_1, 1]
                        W2 : [n_hidden_2, n_hidden_1]
                        b2 : [n_hidden_2, 1]
                        W3 : [n_classes, n_hidden_2]
                        b3 : [n_classes, 1]
    """
    tf.set_random_seed(42)
    # First hidden layer
    W1 = tf.get_variable("W1", [n_hidden_1, n_input], initializer=tf.contrib.layers.xavier_initializer(seed=42))
    b1 = tf.get_variable("b1", [n_hidden_1, 1], initializer=tf.zeros_initializer())

    # Second hidden layer
    W2 = tf.get_variable("W2", [n_hidden_2, n_hidden_1], initializer=tf.contrib.layers.xavier_initializer(seed=42))
    b2 = tf.get_variable("b2", [n_hidden_2, 1], initializer=tf.zeros_initializer())

    # Output layer
    W3 = tf.get_variable("W3", [n_classes, n_hidden_2], initializer=tf.contrib.layers.xavier_initializer(seed=42))
    b3 = tf.get_variable("b3", [n_classes, 1], initializer=tf.zeros_initializer())

    # Store initializations as a dictionary of parameters
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2,
        "W3": W3,
        "b3": b3
    }

    return parameters
Exemplo n.º 5
0
 def query_encoder(self, v_q, is_training=True, scope="query_encoder"):
     """Encode query image feature
     
     Args:
         v_q: query image feature (batch_size, img_dim)
         is_training: True - training model / False - inference model
     Returns:
         phi_q: query vector
         v_qr: reconstructed v_q
     """
     with tf.variable_scope(scope):
         h1 = tf.contrib.layers.fully_connected(inputs=v_q, 
             num_outputs=256,
             activation_fn=tf.nn.tanh,
             weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay),
             biases_initializer=tf.zeros_initializer())
         phi_q = tf.contrib.layers.fully_connected(inputs=h1, 
             num_outputs=128,
             activation_fn=tf.nn.tanh,
             weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay),
             biases_initializer=tf.zeros_initializer())
         h2 = tf.contrib.layers.fully_connected(inputs=phi_q, 
             num_outputs=256,
             activation_fn=tf.nn.tanh,
             weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay),
             biases_initializer=tf.zeros_initializer())
         v_qr = tf.contrib.layers.fully_connected(inputs=h2, 
             num_outputs=self.img_dim,
             activation_fn=tf.nn.tanh,
             weights_regularizer=tf.contrib.layers.l2_regularizer(self.weight_decay),
             biases_initializer=tf.zeros_initializer())
         return phi_q, v_qr
Exemplo n.º 6
0
def auxnet(embedding, size, dropout_rate=.5, std=.2, is_training=True, scope='auxnet'):
    """
    Defines the fully connected layers for the auxnet: 
        -- so far, one layer to batch norm to relu to dropout
    Args:
        embedding: the histogram embedding matrix
        size: int size of each hidden layer
        dropout_rate: rate to dropout (usually .5)
        std: standard deviation used for initilizer 
        is_training: bool--used to turn off dropout for inference
        scope: name the op/tensor
    Returns:
        fc: the fully connected network as a tensor of size (pxsize)
    """
    # make lower/upper for uniform init
    a,b = 0 - np.sqrt(3)*std, np.sqrt(3)*std

    with tf.variable_scope(scope,'Aux'):
	# notes: if you use dropout and batchnorm no need for regularizer
        with slim.arg_scope([slim.fully_connected],
                weights_initializer = tf.random_uniform_initializer(minval=a,maxval=b),
                #weights_initializer = tf.truncated_normal_initializer(std),
	        weights_regularizer = slim.l2_regularizer(.005),
		activation_fn=tf.nn.relu):
            
            """
            net = slim.fully_connected(embedding, size, scope='hidden')
            net = slim.dropout(net, dropout_rate,
                    is_training=is_training, scope='dropout')
            net= slim.fully_connected(net, size, scope='output',
                    activation_fn=None)
	    """
	    fc  = slim.fully_connected(embedding, size,
					 biases_initializer=tf.zeros_initializer(), 
                                         activation_fn=None, #tf.nn.relu,
                                         scope='hidden')
	    #tf.summary.histogram('beforebn/%s' % scope, fc, collections=['train'])
            

	    fc = slim.batch_norm(fc, center=True, 
                                    scale=True, 
			      	    zero_debias_moving_mean=True,
				    is_training=is_training,
                                    scope='bn')
            
            # mod option: add another layer here:
            fc = tf.nn.relu(fc, 'relu')
    	    
            # now apply the dropout:
            fc = slim.dropout(fc, dropout_rate,
			      is_training=is_training, 
			      scope='dropout')
	
	    # add another layer:	
	    fc = slim.fully_connected(fc, size, biases_initializer=tf.zeros_initializer(),
					activation_fn=tf.nn.tanh, scope="hidden2")

    #tf.summary.histogram('activations/auxnet/%s' % scope, fc, collections=['train'])

    return fc 
Exemplo n.º 7
0
  def create_slots(self, var):
    """Create the factorized Adam accumulators for diet variables."""
    params = self.params
    shape = var.get_shape().as_list()

    if not hasattr(params, "slots"):
      params.slots = defaultdict(dict)

    name = var.op.name
    slots = params.slots[name]

    if params.factored_second_moment_accumulator and len(shape) == 2:
      slots["adam_vr"] = tf.get_variable(
          name + "_adam_vr", [shape[0], 1],
          trainable=False,
          initializer=tf.zeros_initializer())
      slots["adam_vc"] = tf.get_variable(
          name + "_adam_vc", [1, shape[1]],
          trainable=False,
          initializer=tf.zeros_initializer())
    else:
      slots["adam_v"] = tf.get_variable(
          name + "_adam_v",
          shape,
          trainable=False,
          initializer=tf.zeros_initializer())
    if params.beta1 != 0.0:
      slots["adam_m"] = tf.get_variable(
          name + "_adam_m",
          shape,
          trainable=False,
          initializer=tf.zeros_initializer())
Exemplo n.º 8
0
def basic_fc_discriminator(x):
    """Compute discriminator score for a batch of input images.
    
    Inputs:
    - x: TensorFlow Tensor of flattened input images, shape [batch_size, 784]
    
    Returns:
    TensorFlow Tensor with shape [batch_size, 1], containing the score 
    for an image being real for each input image.
    """

    with tf.variable_scope("bfcdiscriminator"):

        W1 = tf.get_variable("W1", (784, 256))
        b1 = tf.get_variable("b1", (256, ), initializer=tf.zeros_initializer())
        W2 = tf.get_variable("W2", (256, 256))
        b2 = tf.get_variable("b2", (256, ), initializer=tf.zeros_initializer())
        W3 = tf.get_variable("W3", (256, 1), )
        b3 = tf.get_variable("b3", (1, ), initializer=tf.zeros_initializer())

        H1 = tf.matmul(x, W1) + b1
        H1L = leaky_relu(H1)
        H2 = tf.matmul(H1L, W2) + b2
        H2L = leaky_relu(H2)
        logits = tf.matmul(H2L, W3) + b3

        return logits
def initialize_parameters():
    '''
    初始化神经网络的参数,参数的维度如下:
    W1:[25,12288]
    b1:[25,1]
    W2:[12,25]
    b2:[12,1]
    W3:[6.12]
    b3:[6,1]
    :return: 
    parameters - 包含了W和b的字典
    '''

    tf.set_random_seed(1)#指定随机种子

    W1= tf.get_variable('W1',[25,12288],initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b1 = tf.get_variable('b1',[25,1],initializer=tf.zeros_initializer())
    W2 = tf.get_variable('W2',[12,25],initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b2 = tf.get_variable('b2',[12,1],initializer=tf.zeros_initializer())
    W3 = tf.get_variable('W3',[6,12],initializer=tf.contrib.layers.xavier_initializer(seed=1))
    b3 = tf.get_variable('b3',[6,1],initializer=tf.zeros_initializer())

    parameters = {
        'W1':W1,
        'b1':b1,
        'W2':W2,
        'b2':b2,
        'W3':W3,
        'b3':b3
    }
    return parameters
Exemplo n.º 10
0
    def project_layer(self, lstm_outputs, name=None):
        """
        """
        with tf.variable_scope("project" if not name else name):
            with tf.variable_scope("hidden"):
                w_tanh = tf.get_variable("w_tanh", shape=[self.lstm_dim * 2, self.lstm_dim],
                                    dtype=tf.float32, initializer=self.initializer, regularizer=tf.contrib.layers.l2_regularizer(0.001))

                b_tanh = tf.get_variable("b_tanh", shape=[self.lstm_dim], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())

                output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2])
                hidden = tf.tanh(tf.nn.xw_plus_b(output, w_tanh, b_tanh))

                drop_hidden = tf.nn.dropout(hidden, self.dropout)


            # project to score of tags
            with tf.variable_scope("output"):
                w_out = tf.get_variable("w_out", shape=[self.lstm_dim, self.num_tags],
                                    dtype=tf.float32, initializer=self.initializer, regularizer=tf.contrib.layers.l2_regularizer(0.001))

                b_out = tf.get_variable("b_out", shape=[self.num_tags], dtype=tf.float32,
                                    initializer=tf.zeros_initializer() )
                pred = tf.nn.xw_plus_b(drop_hidden, w_out, b_out, name="pred")
            self.logits = tf.reshape(pred, [-1, self.num_steps, self.num_tags], name="logits")
    def project_bilstm_layer(self, lstm_outputs, name=None):
        """
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size] 
        :return: [batch_size, num_steps, num_tags]
        """
        with tf.variable_scope("project" if not name else name):
            with tf.variable_scope("hidden"):
                W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())
                output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2])
                hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))

            # project to score of tags
            with tf.variable_scope("logits"):
                W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels],
                                    dtype=tf.float32, initializer=self.initializers.xavier_initializer())

                b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())

                pred = tf.nn.xw_plus_b(hidden, W, b)
            return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
Exemplo n.º 12
0
def bacthnorm(inputs, scope, epsilon=1e-05, momentum=0.99, is_training=True):
    inputs_shape = inputs.get_shape().as_list()# 输出 形状尺寸
    params_shape = inputs_shape[-1:]# 输入参数的长度
    axis = list(range(len(inputs_shape) - 1))

    with tf.variable_scope(scope):
        beta = create_variable("beta", params_shape,
                               initializer=tf.zeros_initializer())
        gamma = create_variable("gamma", params_shape,
                                initializer=tf.ones_initializer())
        # 均值 常量 不需要训练 for inference
        moving_mean = create_variable("moving_mean", params_shape,
                            initializer=tf.zeros_initializer(), trainable=False)
		# 方差 常量 不需要训练
        moving_variance = create_variable("moving_variance", params_shape,
                            initializer=tf.ones_initializer(), trainable=False)
    if is_training:
        mean, variance = tf.nn.moments(inputs, axes=axis)# 计算均值和方差
		# 移动平均求 均值和 方差  考虑上一次的量 xt = a * x_t-1 +(1-a)*x_now
        update_move_mean = moving_averages.assign_moving_average(moving_mean,
                                                mean, decay=momentum)
        update_move_variance = moving_averages.assign_moving_average(moving_variance,
                                                variance, decay=momentum)
        tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_mean)
        tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_variance)
    else:
        mean, variance = moving_mean, moving_variance
    return tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
Exemplo n.º 13
0
def batch_norm(x, decay=0.999, epsilon=1e-03, is_training=True,
               scope="scope"):
    x_shape = x.get_shape()
    num_inputs = x_shape[-1]
    reduce_dims = list(range(len(x_shape) - 1))
    with tf.variable_scope(scope):
        beta = create_var("beta", [num_inputs,],
                               initializer=tf.zeros_initializer())
        gamma = create_var("gamma", [num_inputs,],
                                initializer=tf.ones_initializer())
        # for inference
        moving_mean = create_var("moving_mean", [num_inputs,],
                                 initializer=tf.zeros_initializer(),
                                 trainable=False)
        moving_variance = create_var("moving_variance", [num_inputs],
                                     initializer=tf.ones_initializer(),
                                     trainable=False)
    if is_training:
        mean, variance = tf.nn.moments(x, axes=reduce_dims)
        update_move_mean = moving_averages.assign_moving_average(moving_mean,
                                                mean, decay=decay)
        update_move_variance = moving_averages.assign_moving_average(moving_variance,
                                                variance, decay=decay)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean)
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_variance)
    else:
        mean, variance = moving_mean, moving_variance
    return tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon)
    def initializeParameters(self, m, n):
        """
        Arguments:
            m -- number of users
            n -- number of items

        Returns:
            parameters -- parameters['b'], global bias, scalar
                          parameters['u'], users bias, shape (m, 1)
                          parameters['d'], item bias, shape (1, n)
                          parameters['P'], users feature matrix, shape (m, K)
                          parameters['Q'], items feature matrix, shape (n, K)        
        """
        k = self.K
        
        parameters = {}
        parameters['b'] = tf.get_variable(name='b', dtype=tf.float64, shape=[],
                                          initializer=tf.zeros_initializer())

        parameters['u'] = tf.get_variable(name='u', dtype=tf.float64, shape=[m, 1],
                                          initializer=tf.zeros_initializer())

        parameters['d'] = tf.get_variable(name='d', dtype=tf.float64, shape=[1, n],
                                          initializer=tf.zeros_initializer())

        parameters['P'] = tf.get_variable(name='P', dtype=tf.float64, shape=[m, k],
                                          initializer=tf.random_normal_initializer())

        parameters['Q'] = tf.get_variable(name='Q', dtype=tf.float64, shape=[n, k],
                                          initializer=tf.random_normal_initializer())

        return parameters
def initialize_parameters():
    """
    Initializes parameters to build a neural network with tensorflow. The shapes are:
                        W1 : [25, 12288]
                        b1 : [25, 1]
                        W2 : [12, 25]
                        b2 : [12, 1]
                        W3 : [6, 12]
                        b3 : [6, 1]
    
    Returns:
    parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
    """
    
    tf.set_random_seed(1)                   # so that your "random" numbers match ours
        
    ### START CODE HERE ### (approx. 6 lines of code)
    W1 = tf.get_variable("W1", [25,12288], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b1 = tf.get_variable("b1", [25,1], initializer = tf.zeros_initializer())
    W2 = tf.get_variable("W2", [12,25], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b2 = tf.get_variable("b2", [12,1], initializer = tf.zeros_initializer())
    W3 = tf.get_variable("W3", [6,12], initializer = tf.contrib.layers.xavier_initializer(seed = 1))
    b3 = tf.get_variable("b3", [6,1], initializer = tf.zeros_initializer())
    ### END CODE HERE ###

    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters
Exemplo n.º 16
0
def main(_):
  ed.set_seed(42)

  # DATA
  x_data = build_toy_dataset(FLAGS.N)

  # MODEL
  pi = Dirichlet(concentration=tf.ones(FLAGS.K))
  mu = Normal(0.0, 1.0, sample_shape=[FLAGS.K, FLAGS.D])
  sigma = InverseGamma(concentration=1.0, rate=1.0,
                       sample_shape=[FLAGS.K, FLAGS.D])
  c = Categorical(logits=tf.log(pi) - tf.log(1.0 - pi), sample_shape=FLAGS.N)
  x = Normal(loc=tf.gather(mu, c), scale=tf.gather(sigma, c))

  # INFERENCE
  qpi = Empirical(params=tf.get_variable(
      "qpi/params",
      [FLAGS.T, FLAGS.K],
      initializer=tf.constant_initializer(1.0 / FLAGS.K)))
  qmu = Empirical(params=tf.get_variable("qmu/params",
                                         [FLAGS.T, FLAGS.K, FLAGS.D],
                                         initializer=tf.zeros_initializer()))
  qsigma = Empirical(params=tf.get_variable("qsigma/params",
                                            [FLAGS.T, FLAGS.K, FLAGS.D],
                                            initializer=tf.ones_initializer()))
  qc = Empirical(params=tf.get_variable("qc/params",
                                        [FLAGS.T, FLAGS.N],
                                        initializer=tf.zeros_initializer(),
                                        dtype=tf.int32))

  gpi = Dirichlet(concentration=tf.constant([1.4, 1.6]))
  gmu = Normal(loc=tf.constant([[1.0, 1.0], [-1.0, -1.0]]),
               scale=tf.constant([[0.5, 0.5], [0.5, 0.5]]))
  gsigma = InverseGamma(concentration=tf.constant([[1.1, 1.1], [1.1, 1.1]]),
                        rate=tf.constant([[1.0, 1.0], [1.0, 1.0]]))
  gc = Categorical(logits=tf.zeros([FLAGS.N, FLAGS.K]))

  inference = ed.MetropolisHastings(
      latent_vars={pi: qpi, mu: qmu, sigma: qsigma, c: qc},
      proposal_vars={pi: gpi, mu: gmu, sigma: gsigma, c: gc},
      data={x: x_data})

  inference.initialize()

  sess = ed.get_session()
  tf.global_variables_initializer().run()

  for _ in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

    t = info_dict['t']
    if t == 1 or t % inference.n_print == 0:
      qpi_mean, qmu_mean = sess.run([qpi.mean(), qmu.mean()])
      print("")
      print("Inferred membership probabilities:")
      print(qpi_mean)
      print("Inferred cluster means:")
      print(qmu_mean)
Exemplo n.º 17
0
def fc(inputs, w_shape, b_shape):
    weight = tf.get_variable("weights",
                             w_shape,
                             initializer=tf.zeros_initializer(tf.float32))
    bias = tf.get_variable("bias",
                           b_shape,
                           initializer=tf.zeros_initializer(tf.float32))
    return tf.matmul(inputs, weight) + bias
def evaluate_precision_recall(
    input_, labels, threshold=0.5, per_example_weights=None, name=PROVIDED, phase=Phase.train
):
    """Computes the precision and recall of the prediction vs the labels.

  Args:
    input_: A rank 2 Tensor or a Pretty Tensor holding the result of the model.
    labels: The target labels to learn as a float tensor.
    threshold: The threshold to use to decide if the prediction is true.
    per_example_weights: A Tensor with a weight per example.
    name: An optional name.
    phase: The phase of this model; non training phases compute a total across
      all examples.
  Returns:
    Precision and Recall.
  """
    _ = name  # Eliminate warning, name used for namescoping by PT.
    selected, sum_retrieved, sum_relevant = _compute_precision_recall(input_, labels, threshold, per_example_weights)

    if phase != Phase.train:
        dtype = tf.float32
        # Create the variables in all cases so that the load logic is easier.
        relevant_count = tf.get_variable(
            "relevant_count",
            [],
            dtype,
            tf.zeros_initializer(),
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        retrieved_count = tf.get_variable(
            "retrieved_count",
            [],
            dtype,
            tf.zeros_initializer(),
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )
        selected_count = tf.get_variable(
            "selected_count",
            [],
            dtype,
            tf.zeros_initializer(),
            collections=[bookkeeper.GraphKeys.TEST_VARIABLES],
            trainable=False,
        )

        with input_.g.device(selected_count.device):
            selected = tf.assign_add(selected_count, selected)
        with input_.g.device(retrieved_count.device):
            sum_retrieved = tf.assign_add(retrieved_count, sum_retrieved)
        with input_.g.device(relevant_count.device):
            sum_relevant = tf.assign_add(relevant_count, sum_relevant)

    return (
        tf.where(tf.equal(sum_retrieved, 0), tf.zeros_like(selected), selected / sum_retrieved),
        tf.where(tf.equal(sum_relevant, 0), tf.zeros_like(selected), selected / sum_relevant),
    )
Exemplo n.º 19
0
 def _batch_norm_without_layers(self, input_layer, decay, use_scale,
                                epsilon):
     """Batch normalization on `input_layer` without tf.layers."""
     shape = input_layer.shape
     num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
     beta = self.get_variable(
         'beta', [num_channels],
         tf.float32,
         tf.float32,
         initializer=tf.zeros_initializer())
     if use_scale:
         gamma = self.get_variable(
             'gamma', [num_channels],
             tf.float32,
             tf.float32,
             initializer=tf.ones_initializer())
     else:
         gamma = tf.constant(1.0, tf.float32, [num_channels])
     moving_mean = tf.get_variable(
         'moving_mean', [num_channels],
         tf.float32,
         initializer=tf.zeros_initializer(),
         trainable=False)
     moving_variance = tf.get_variable(
         'moving_variance', [num_channels],
         tf.float32,
         initializer=tf.ones_initializer(),
         trainable=False)
     if self.phase_train:
         bn, batch_mean, batch_variance = tf.nn.fused_batch_norm(
             input_layer,
             gamma,
             beta,
             epsilon=epsilon,
             data_format=self.data_format,
             is_training=True)
         mean_update = moving_averages.assign_moving_average(
             moving_mean, batch_mean, decay=decay, zero_debias=False)
         variance_update = moving_averages.assign_moving_average(
             moving_variance,
             batch_variance,
             decay=decay,
             zero_debias=False)
         tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update)
         tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update)
     else:
         bn, _, _ = tf.nn.fused_batch_norm(
             input_layer,
             gamma,
             beta,
             mean=moving_mean,
             variance=moving_variance,
             epsilon=epsilon,
             data_format=self.data_format,
             is_training=False)
     return bn
Exemplo n.º 20
0
def mnist_model(train_data_flat, train_labels, x0):
  """Creates a simple linear model that evaluates cross-entropy loss and
  gradient on MNIST dataset. Mirrors 'linear' model from train-on-mnist.lua

  Result is a Python callable that accepts ITensor parameter vector and returns
  ITensor loss and gradient.
  """
  
  #  batchSize = 60000
  batchSize = 1
  x_size = 10
  x_offset = 512

  # reshape flat parameter vector into W and b parameter matrices
  x_placeholder, param = tf.get_session_tensor(x0.tf_handle, x0.dtype)
  W_flat = tf.slice(param, [0], [x_size*10])
  W = tf.reshape(W_flat, [x_size, 10])
  b_flat = tf.slice(param, [x_size*10], [10])
  b = tf.reshape(b_flat, [1, 10])

  # create model
  data = tf.Variable(tf.zeros_initializer((batchSize, x_size), dtype=dtype))
  targets = tf.Variable(tf.zeros_initializer((batchSize, x_size), dtype=dtype))
  logits = tf.matmul(data, W) + b
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, targets)

  # create loss and gradient ops
  cross_entropy_loss = tf.reduce_mean(cross_entropy)
  Wnorm = tf.reduce_sum(tf.square(W))
  bnorm = tf.reduce_sum(tf.square(b))
  loss = cross_entropy_loss + (bnorm + Wnorm)/2
  [grad] = tf.gradients(loss, [param])

  # get handle ops that will be used to initialize ITensors
  loss_handle_tensor = tf.get_session_handle(loss)
  grad_handle_tensor = tf.get_session_handle(grad)

  # initialize data and targets
  data_placeholder = tf.placeholder(dtype=dtype)
  data_init = data.assign(data_placeholder)
  labels_placeholder = tf.placeholder(shape=(batchSize), dtype=tf.int32)
  labels_onehot = tf.one_hot(labels_placeholder - 1, 10, dtype=dtype)
  targets_init = targets.assign(labels_onehot)
  sess.run(data_init, feed_dict={data_placeholder:train_data_flat[:batchSize,x_offset:x_offset+x_size]})
  sess.run(targets_init, feed_dict={labels_placeholder:
                                    train_labels[:batchSize]})

  # Create our callable that works on persistent Tensors
  def eval_model(x):
    loss_handle, grad_handle = sess.run([loss_handle_tensor,
                                         grad_handle_tensor],
                                        feed_dict={x_placeholder: x.tf_handle})
    return [env.handle_to_itensor(loss_handle),
            env.handle_to_itensor(grad_handle)]

  return eval_model
Exemplo n.º 21
0
def module_with_variables():
  tf.get_variable(
      name="weights",
      shape=[3],
      initializer=tf.zeros_initializer())
  tf.get_variable(
      name="partition",
      shape=[4],
      initializer=tf.zeros_initializer(),
      partitioner=tf.fixed_size_partitioner(3))
Exemplo n.º 22
0
def mnistCost(train_data_flat, train_labels, x0, env):
  """Creates a simple linear model that evaluates cross-entropy loss and
  gradient on MNIST dataset. Mirrors 'linear' model from train-on-mnist.lua

  Result is a Python callable that accepts ITensor parameter vector and
  returns ITensor loss and gradient. It works as a plug-in replacement of
  "opfunc" in train-on-mnist

  IE, you can do:
  x = ti.ones(...)
  opfunc=mnist_model(x0)
  loss, grad = opfunc(x0)
  x1 = lbfgs(opfunc,...)
  """

  batchSize = 100

  # create our input end-point, this is where ITensor->Tensor conversion
  # happens
  param = env.make_input(x0)

  # reshape flat parameter vector into W and b parameter matrices
  W_flat = tf.slice(param, [0], [10240])
  W = tf.reshape(W_flat, [1024, 10])
  b_flat = tf.slice(param, [10240], [10])
  b = tf.reshape(b_flat, [1, 10])

  # create model
  data = tf.Variable(tf.zeros_initializer((batchSize, 1024)))
  targets = tf.Variable(tf.zeros_initializer((batchSize, 10)))
  logits = tf.matmul(data, W) + b
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, targets)

  # create loss and gradient ops
  cross_entropy_loss = tf.reduce_mean(cross_entropy)
  Wnorm = tf.reduce_sum(tf.square(W))
  bnorm = tf.reduce_sum(tf.square(b))
  loss = cross_entropy_loss + (bnorm + Wnorm)/2
  [grad] = tf.gradients(loss, [param])

  # initialize data and targets. Load entire dataset into tf Variable
  data_placeholder = tf.placeholder(dtype=tf.float32)
  data_init = data.assign(data_placeholder)
  labels_placeholder = tf.placeholder(shape=(batchSize), dtype=tf.int32)
  labels_onehot = tf.one_hot(labels_placeholder - 1, 10)
  targets_init = targets.assign(labels_onehot)
  env.sess.run(data_init, feed_dict={data_placeholder:
                                     train_data_flat[:batchSize]})
  env.sess.run(targets_init, feed_dict={labels_placeholder:
                                        train_labels[:batchSize]})

  # create imperative wrapper of tensorflow graph we just constructed
  # ITensor input is automatically converged and fed into param
  # and outputs are converted to ITensor objects and returned
  return env.make_function(inputs=[param], outputs=[loss, grad])
Exemplo n.º 23
0
def linear_zeros(name, x, width, logscale_factor=3):
    with tf.variable_scope(name):
        n_in = int(x.get_shape()[1])
        w = tf.get_variable("W", [n_in, width], tf.float32,
                            initializer=tf.zeros_initializer())
        x = tf.matmul(x, w)
        x += tf.get_variable("b", [1, width],
                             initializer=tf.zeros_initializer())
        x *= tf.exp(tf.get_variable("logs",
                                    [1, width], initializer=tf.zeros_initializer()) * logscale_factor)
        return x
Exemplo n.º 24
0
def module_with_variables():
  tf.get_variable(
      name="weights",
      shape=[3],
      initializer=tf.zeros_initializer())
  tf.get_variable(
      name="partition",
      shape=[4],
      initializer=tf.zeros_initializer(),
      partitioner=tf.fixed_size_partitioner(3))
  hub.add_signature(outputs=tf.constant(1.0))
Exemplo n.º 25
0
Arquivo: GAN.py Projeto: CODEJIN/GAN
 def Weight_Generate(self, initializer_Std = 0.1):
     self.weightMatrix_Dict = {};
     self.biasMatrix_Dict = {};
     
     self.weightMatrix_Dict["Generator","IH"] = tf.get_variable(
         name = "Weight_Generator_IH",
         shape = (self.noise_Size, self.hidden_Size),
         dtype = tf.float32,
         initializer = tf.truncated_normal_initializer(stddev=initializer_Std)
     )
     self.weightMatrix_Dict["Generator","HO"] = tf.get_variable(
         name = "Weight_Generator_HO",
         shape = (self.hidden_Size, 28*28),
         dtype = tf.float32,
         initializer = tf.truncated_normal_initializer(stddev=initializer_Std)
     )
     self.weightMatrix_Dict["Discriminator","IH"] = tf.get_variable(
         name = "Weight_Discriminator_IH",
         shape = (28*28, self.hidden_Size),
         dtype = tf.float32,
         initializer = tf.truncated_normal_initializer(stddev=initializer_Std)
     )
     self.weightMatrix_Dict["Discriminator","HO"] = tf.get_variable(
         name = "Weight_Discriminator_HO",
         shape = (self.hidden_Size, 1),
         dtype = tf.float32,
         initializer = tf.truncated_normal_initializer(stddev=initializer_Std)
     )
     
     self.biasMatrix_Dict["Generator","H"] = tf.get_variable(
         name = "Bias_Generator_IH",
         shape = (1, self.hidden_Size),
         dtype = tf.float32,
         initializer = tf.zeros_initializer()
     )
     self.biasMatrix_Dict["Generator","O"] = tf.get_variable(
         name = "Bias_Generator_HO",
         shape = (1, 28*28),
         dtype = tf.float32,
         initializer = tf.zeros_initializer()
     )
     self.biasMatrix_Dict["Discriminator","H"] = tf.get_variable(
         name = "Bias_Discriminator_IH",
         shape = (1, self.hidden_Size),
         dtype = tf.float32,
         initializer = tf.zeros_initializer()
     )
     self.biasMatrix_Dict["Discriminator","O"] = tf.get_variable(
         name = "Bias_Discriminator_HO",
         shape = (1, 1),
         dtype = tf.float32,
         initializer = tf.zeros_initializer()
     )
Exemplo n.º 26
0
    def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
        """A normalizer that ensures that observations are approximately distributed according to
        a standard Normal distribution (i.e. have mean zero and variance one).

        Args:
            size (int): the size of the observation to be normalized
            eps (float): a small constant that avoids underflows
            default_clip_range (float): normalized observations are clipped to be in
                [-default_clip_range, default_clip_range]
            sess (object): the TensorFlow session to be used
        """
        self.size = size
        self.eps = eps
        self.default_clip_range = default_clip_range
        self.sess = sess if sess is not None else tf.get_default_session()

        self.local_sum = np.zeros(self.size, np.float32)
        self.local_sumsq = np.zeros(self.size, np.float32)
        self.local_count = np.zeros(1, np.float32)

        self.sum_tf = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum',
            trainable=False, dtype=tf.float32)
        self.sumsq_tf = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq',
            trainable=False, dtype=tf.float32)
        self.count_tf = tf.get_variable(
            initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count',
            trainable=False, dtype=tf.float32)
        self.mean = tf.get_variable(
            initializer=tf.zeros_initializer(), shape=(self.size,), name='mean',
            trainable=False, dtype=tf.float32)
        self.std = tf.get_variable(
            initializer=tf.ones_initializer(), shape=(self.size,), name='std',
            trainable=False, dtype=tf.float32)
        self.count_pl = tf.placeholder(name='count_pl', shape=(1,), dtype=tf.float32)
        self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size,), dtype=tf.float32)
        self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size,), dtype=tf.float32)

        self.update_op = tf.group(
            self.count_tf.assign_add(self.count_pl),
            self.sum_tf.assign_add(self.sum_pl),
            self.sumsq_tf.assign_add(self.sumsq_pl)
        )
        self.recompute_op = tf.group(
            tf.assign(self.mean, self.sum_tf / self.count_tf),
            tf.assign(self.std, tf.sqrt(tf.maximum(
                tf.square(self.eps),
                self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf)
            ))),
        )
        self.lock = threading.Lock()
  def state(self, state_name):
    """Returns, creating if necessary, a state variable with the given name."""
    if state_name not in self.requested_tensors:
      count = tf.get_variable('count_%s' % state_name,
                              [],
                              tf.int32,
                              tf.zeros_initializer(),
                              trainable=False)
      value = tf.get_variable(state_name, [self.batch_size, self.node_depth],
                              tf.float32, tf.zeros_initializer())
      self.requested_tensors[state_name] = (count, value)

    return self.requested_tensors[state_name][1]
Exemplo n.º 28
0
 def _init_parameters(self):
     if self.Bmean is None:
         self.Bmean = vs.get_variable("Bmean", [self._num_units], initializer = tf.zeros_initializer())
     if self.Bvar is None:
         self.Bvar = vs.get_variable("Bvar", [self._num_units], initializer = tf.zeros_initializer())
     if self.Wmean is None:
         self.Wmean = vs.get_variable("Wmean", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
     if self.Wvar is None:
         self.Wvar = vs.get_variable("Wvar", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
     if self.Wmean_r is None:
         self.Wmean_r = vs.get_variable("Wmean_r", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
     if self.Wvar_r is None:
         self.Wvar_r = vs.get_variable("Wvar_r", [self._num_units, self._num_units], initializer = tf.uniform_unit_scaling_initializer(factor=weight_init_factor))
Exemplo n.º 29
0
    def __init__(self,dim,name=None):
        super(CrossCompressUnit,self).__init__(name)
        self.dim = dim
        with tf.variable_scope(self.name):
            self.weight_vv = tf.get_variable(name='weight_vv',shape=(dim,1),dtype=tf.float32)
            self.weight_ev = tf.get_variable(name='weight_ev',shape=(dim,1),dtype=tf.float32)
            self.weight_ve = tf.get_variable(name='weight_ve',shape=(dim,1),dtype=tf.float32)
            self.weight_ee = tf.get_variable(name='weight_ee',shape=(dim,1),dtype=tf.float32)

            self.bias_v = tf.get_variable(name='bias_v',shape=dim,initializer=tf.zeros_initializer())
            self.bias_e = tf.get_variable(name='bias_e',shape=dim,initializer=tf.zeros_initializer())

        self.vars = [self.weight_vv,self.weight_ev,self.weight_ve,self.weight_ee]
Exemplo n.º 30
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """See base class."""
    assignments = []
    for (grad, param) in grads_and_vars:
      if grad is None or param is None:
        continue

      param_name = self._get_variable_name(param.name)

      m = tf.get_variable(
          name=param_name + "/adam_m",
          shape=param.shape.as_list(),
          dtype=tf.float32,
          trainable=False,
          initializer=tf.zeros_initializer())
      v = tf.get_variable(
          name=param_name + "/adam_v",
          shape=param.shape.as_list(),
          dtype=tf.float32,
          trainable=False,
          initializer=tf.zeros_initializer())

      # Standard Adam update.
      next_m = (
          tf.multiply(self.beta_1, m) + tf.multiply(1.0 - self.beta_1, grad))
      next_v = (
          tf.multiply(self.beta_2, v) + tf.multiply(1.0 - self.beta_2,
                                                    tf.square(grad)))

      update = next_m / (tf.sqrt(next_v) + self.epsilon)

      # Just adding the square of the weights to the loss function is *not*
      # the correct way of using L2 regularization/weight decay with Adam,
      # since that will interact with the m and v parameters in strange ways.
      #
      # Instead we want ot decay the weights in a manner that doesn't interact
      # with the m/v parameters. This is equivalent to adding the square
      # of the weights to the loss with plain (non-momentum) SGD.
      if self._do_use_weight_decay(param_name):
        update += self.weight_decay_rate * param

      update_with_lr = self.learning_rate * update

      next_param = param - update_with_lr

      assignments.extend(
          [param.assign(next_param),
           m.assign(next_m),
           v.assign(next_v)])
    return tf.group(*assignments, name=name)
Exemplo n.º 31
0
def _create_cqa_modules(is_training, predictions, update_num):
    num_labels = 2
    input_extract = predictions["input_extract"]
    input1_extract = predictions["input1_extract"]
    input2_extract = predictions["input2_extract"]
    input3_extract = predictions["input3_extract"]
    embedding = predictions["embedding"]
    input_mask = predictions["input_mask"]
    q_type = predictions["q_type"]
    labels = predictions["label_ids"]
    encoder_output1 = predictions["last_layer"]
    # encoder_output = encoder_output1 + encoder_output2 + \
    #     encoder_output3 + encoder_output4
    encoder_output = predictions["output_layer"]



    sent1 = None
    sent2 = None
    sent3 = None

    sent1_mask = None
    sent2_mask = None
    sent3_mask = None

    mark0 = None
    mark1 = None
    mark2 = None
    mark3 = None

    if input_extract is None and input3_extract is None:
        sent1_mask = tf.cast(tf.not_equal(input1_extract, 0), tf.float32)
        sent2_mask = tf.cast(tf.not_equal(input2_extract, 0), tf.float32)

        sent1 = tf.batch_gather(encoder_output, input1_extract)
        sent2 = tf.batch_gather(encoder_output, input2_extract)
    elif input3_extract is None:
        sent1_mask = tf.cast(tf.equal(input_extract, 1), tf.float32)
        sent2_mask = tf.cast(tf.equal(input_extract, 2), tf.float32)

        sent1 = encoder_output * tf.expand_dims(sent1_mask, axis=-1)
        sent2 = encoder_output * tf.expand_dims(sent2_mask, axis=-1)
    else:
        sent1_mask = tf.cast(tf.not_equal(input1_extract, 0), tf.float32)
        sent2_mask = tf.cast(tf.not_equal(input2_extract, 0), tf.float32)
        sent3_mask = tf.cast(tf.not_equal(input3_extract, 0), tf.float32)

        sent1 = tf.batch_gather(encoder_output, input1_extract)
        sent2 = tf.batch_gather(encoder_output, input2_extract)
        sent3 = tf.batch_gather(encoder_output, input3_extract)

    mark0 = tf.squeeze(encoder_output1[:, 0:1, :], axis=1)

    model = CQAMODEL(is_training=is_training,
                      all_sent=encoder_output, input_mask=input_mask,
                      sent1=sent1, sent2=sent2, sent3=sent3,
                      sent1_mask=sent1_mask, sent2_mask=sent2_mask, sent3_mask=sent3_mask,
                      mark0=mark0, mark1=mark1, mark2=mark2, mark3=mark3,
                      embedding=embedding, update_num=update_num)
    # model = Baseline(is_training=is_training,
    #                  sent1=sent1, sent2=sent2, sent3=sent3,
    #                  sent1_mask=sent1_mask, sent2_mask=sent2_mask, sent3_mask=sent3_mask,
    #                  mark0=mark0, mark1=mark1, mark2=mark2, mark3=mark3)

    result = model.get_output()  # (B, dim)
    # mark0 = tf.layers.dense(mark0, 768, activation=tf.tanh)
    # result = mark0

    hidden_size = result.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights_v2", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias_v2", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            result = tf.nn.dropout(result, keep_prob=0.9)

        logits = tf.matmul(result, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        prob = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        total_loss = tf.reduce_mean(per_example_loss)

    return total_loss, logits, prob
def mobilenet(inputs,
              num_classes=1001,
              prediction_fn=slim.softmax,
              reuse=None,
              scope='Mobilenet',
              base_only=False,
              **mobilenet_args):
    """Mobilenet model for classification, supports both V1 and V2.

  Note: default mode is inference, use mobilenet.training_scope to create
  training network.


  Args:
    inputs: a tensor of shape [batch_size, height, width, channels].
    num_classes: number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer (before dropout)
      are returned instead.
    prediction_fn: a function to get predictions out of logits
      (default softmax).
    reuse: whether or not the network and its variables should be reused. To be
      able to reuse 'scope' must be given.
    scope: Optional variable_scope.
    base_only: if True will only create the base of the network (no pooling
    and no logits).
    ## 卷积层的定义
    **mobilenet_args: passed to mobilenet_base verbatim.
      - conv_defs: list of conv defs
      - multiplier: Float multiplier for the depth (number of channels)
      for all convolution ops. The value must be greater than zero. Typical
      usage will be to set this value in (0, 1) to reduce the number of
      parameters or computation cost of the model.
      - output_stride: will ensure that the last layer has at most total stride.
      If the architecture calls for more stride than that provided
      (e.g. output_stride=16, but the architecture has 5 stride=2 operators),
      it will replace output_stride with fractional convolutions using Atrous
      Convolutions.

  Returns:
    logits: the pre-softmax activations, a tensor of size
      [batch_size, num_classes]
    end_points: a dictionary from components of the network to the corresponding
      activation tensor.

  Raises:
    ValueError: Input rank is invalid.
  """
    is_training = mobilenet_args.get('is_training', False)
    input_shape = inputs.get_shape().as_list()
    if len(input_shape) != 4:
        raise ValueError('Expected rank 4 input, was: %d' % len(input_shape))

    with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope:
        inputs = tf.identity(inputs, 'input')
        net, end_points, netFirst = mobilenet_base(inputs,
                                                   scope=scope,
                                                   **mobilenet_args)
        if base_only:
            return net, end_points, netFirst

        net = tf.identity(net, name='embedding')

        with tf.variable_scope('Logits'):
            net = global_pool(net)
            end_points['global_pool'] = net
            if not num_classes:
                return net, end_points
            net = slim.dropout(net, scope='Dropout', is_training=is_training)
            # 1 x 1 x num_classes
            # Note: legacy scope name.
            logits = slim.conv2d(net,
                                 num_classes, [1, 1],
                                 activation_fn=None,
                                 normalizer_fn=None,
                                 biases_initializer=tf.zeros_initializer(),
                                 scope='Conv2d_1c_1x1')

            logits = tf.squeeze(logits, [1, 2])

            logits = tf.identity(logits, name='output')
        end_points['Logits'] = logits
        if prediction_fn:
            end_points['Predictions'] = prediction_fn(logits, 'Predictions')
    return logits, end_points
Exemplo n.º 33
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.2,
               spatial_squeeze=True,
               scope='alexnet_v2',
               global_pool=False):
    """AlexNet version 2.

    Described in: http://arxiv.org/pdf/1404.5997v2.pdf
    Parameters from:
    github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
    layers-imagenet-1gpu.cfg

    Note: All the fully_connected layers have been transformed to conv2d layers.
          To use in classification mode, resize input to 224x224 or set
          global_pool=True. To use in fully convolutional mode, set
          spatial_squeeze to false.
          The LRN layers have been removed and change the initializers from
          random_normal_initializer to xavier_initializer.

    Args:
      inputs: a tensor of size [batch_size, height, width, channels].
      num_classes: the number of predicted classes. If 0 or None, the logits layer
      is omitted and the input features to the logits layer are returned instead.
      is_training: whether or not the model is being trained.
      dropout_keep_prob: the probability that activations are kept in the dropout
        layers during training.
      spatial_squeeze: whether or not should squeeze the spatial dimensions of the
        logits. Useful to remove unnecessary dimensions for classification.
      scope: Optional scope for the variables.
      global_pool: Optional boolean flag. If True, the input to the classification
        layer is avgpooled to size 1x1, for any input size. (This is not part
        of the original AlexNet.)

    Returns:
      net: the output of the logits layer (if num_classes is a non-zero integer),
        or the non-dropped-out input to the logits layer (if num_classes is 0
        or None).
      end_points: a dict of tensors with intermediate activations.
    """
    with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with slim.arg_scope(
            [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                outputs_collections=[end_points_collection]):
            net = slim.conv2d(inputs,
                              64, [11, 11],
                              4,
                              padding='VALID',
                              scope='conv1')
            net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = slim.conv2d(net, 192, [5, 5], scope='conv2')
            net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = slim.conv2d(net, 384, [3, 3], scope='conv3')
            net = slim.conv2d(net, 384, [3, 3], scope='conv4')
            net = slim.conv2d(net, 256, [3, 3], scope='conv5')
            net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
            # Use conv2d instead of fully_connected layers.
            with slim.arg_scope(
                [slim.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=tf.constant_initializer(0.1)):
                net = slim.conv2d(net,
                                  4096, [5, 5],
                                  padding='VALID',
                                  scope='fc6')
                net = slim.dropout(net,
                                   dropout_keep_prob,
                                   is_training=is_training,
                                   scope='dropout6')
                net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
                # Convert end_points_collection into a end_point dict.
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)
                if global_pool:
                    net = tf.reduce_mean(net, [1, 2],
                                         keep_dims=True,
                                         name='global_pool')
                    end_points['global_pool'] = net
                if num_classes:
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='dropout7')
                    net = slim.conv2d(
                        net,
                        num_classes, [1, 1],
                        activation_fn=None,
                        normalizer_fn=None,
                        biases_initializer=tf.zeros_initializer(),
                        scope='fc8')
                    net = slim.dropout(net,
                                       dropout_keep_prob,
                                       is_training=is_training,
                                       scope='dropout8')
                    if spatial_squeeze:
                        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                    end_points[sc.name + '/fc8'] = net
            return net, end_points
Exemplo n.º 34
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        # Add the variable to track layers
        self.num_hid_layers = num_hid_layers
        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        with tf.variable_scope('vf'):
            obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std,
                                   -5.0, 5.0)
            last_out = obz
            for i in range(num_hid_layers):
                last_out = tf.nn.tanh(
                    tf.layers.dense(
                        last_out,
                        hid_size,
                        name="fc%i" % (i + 1),
                        kernel_initializer=U.normc_initializer(1.0)))
            self.vpred = tf.layers.dense(
                last_out,
                1,
                name='final',
                kernel_initializer=U.normc_initializer(0.1))[:, 0]

        with tf.variable_scope('pol'):
            last_out = obz
            for i in range(num_hid_layers):
                last_out = tf.nn.tanh(
                    tf.layers.dense(
                        last_out,
                        hid_size,
                        name='fc%i' % (i + 1),
                        kernel_initializer=U.normc_initializer(1.0)))
            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0] // 2,
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(
                    name="logstd",
                    shape=[1, pdtype.param_shape()[0] // 2],
                    initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0],
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))

        pdparam = tf.clip_by_value(pdparam, -5.0, 5.0)
        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function(
            [stochastic, ob],
            [ac, self.vpred, tf.exp(self.pd.logp(ac))])
Exemplo n.º 35
0
def P_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    #why activation is prelu, why?
    '''
    leaky relu vs prelu:
      https://datascience.stackexchange.com/questions/18583/what-is-the-difference-between-leakyrelu-and-prelu
      Leaky ReLUs: allow a small, non-zero gradient when the unit is not active.
      Parametric ReLUs: take this idea further by making the coefficient of leakage into a parameter
                        that is learned along with the other neural network parameters.
    '''
    with slim.arg_scope(
        [slim.conv2d],
            activation_fn=prelu,
            weights_initializer=slim.xavier_initializer(),
            biases_initializer=tf.zeros_initializer(
            ),  # slim does not have zeros initilizer
            weights_regularizer=slim.l2_regularizer(0.0005),
            padding='valid'):
        print("PNet input shape: ", inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=10,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1')
        print("PNet conv1 shape: ", net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              padding='SAME',
                              scope='pool1')
        print("PNet pool1 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=16,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print("PNet conv2 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print("PNet conv3 shape: ", net.get_shape())
        # final 3 conv to get H*W*2 classifier, H*W*4 bbox, H*W*10 landmar_pred
        conv4_1 = slim.conv2d(net,
                              num_outputs=2,
                              kernel_size=[1, 1],
                              stride=1,
                              scope='conv4_1',
                              activation_fn=tf.nn.softmax)
        print('P_Net conv4_1 shape ', net.get_shape())
        bbox_pred = slim.conv2d(
            net,
            num_outputs=4,
            kernel_size=[1, 1],
            stride=1,
            scope='conv4_2',
            activation_fn=None
        )  # important scope name should not be the same as veriable name
        print('P_Net bbox_pred conv layer shape ', bbox_pred.get_shape())
        landmark_pred = slim.conv2d(net,
                                    num_outputs=10,
                                    kernel_size=[1, 1],
                                    stride=1,
                                    scope='conv4_3',
                                    activation_fn=None)
        print('P_Net ladmark conv layer shape', landmark_pred.get_shape())

        if training:
            #batch*2 to determin if it is a face
            #why squeezing? what will happe
            cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob')
            cls_loss = cls_ohem(cls_prob, label)
            #check bbox_loss
            bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred')
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            #landmark loss
            landmark_pred = tf.squeeze(landmark_pred, [1, 2],
                                       name='landmark_pred')
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            accuracy = cal_accuracy(cls_prob, label)

            #tf.add_n: Adds all input tensors element-wise.
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            #test, batch_size=1
            cls_prob_test = tf.squeeze(conv4_1, axis=0)
            bbox_pred_test = tf.squeeze(bbox_pred, axis=0)
            landmark_pred_test = tf.squeeze(landmark_pred, axis=0)
            return cls_prob_test, bbox_pred_test, landmark_pred_test
Exemplo n.º 36
0
test_lbls = np.array([int(lbl) for (lbl, img) in data], np.int32)

##########################################################
#Train model
##########################################################
graph = tf.Graph()
with graph.as_default():
    images = tf.placeholder(tf.float32, [None, 28 * 28], 'images')
    labels = tf.placeholder(tf.int32, [None], 'labels')

    #Define the encoder model
    with tf.variable_scope('encoder'):
        W = tf.get_variable('W', [28 * 28, thought_vector_size], tf.float32,
                            tf.random_normal_initializer(stddev=0.1))
        b = tf.get_variable('b', [thought_vector_size], tf.float32,
                            tf.zeros_initializer())
        thoughts = tf.tanh(tf.matmul(images, W) + b)  # The thought vector

    #Define the decoder model
    with tf.variable_scope('decoder'):
        # The transpose of the weight matrix of the encoder layer is used as weight for this layer
        # W = tf.get_variable('W', [thought_vector_size, 28*28], tf.float32, tf.random_normal_initializer(stddev=0.1))
        b = tf.get_variable('b', [28 * 28], tf.float32, tf.zeros_initializer())
        dec_logits = tf.matmul(thoughts, tf.transpose(W)) + b
        dec_outs = tf.sigmoid(dec_logits)  # The output image

    #Define the classifier model
    with tf.variable_scope('classifier'):
        W = tf.get_variable('W', [thought_vector_size, 10], tf.float32,
                            tf.random_normal_initializer(stddev=0.1))
        b = tf.get_variable('b', [10], tf.float32, tf.zeros_initializer())
Exemplo n.º 37
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=tf.nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=tf.zeros_initializer(),
                    biases_regularizer=None,
                    do_spec_norm=False,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed fc layer, See tensorflow.contrib.layers.python.layers.fully_connected for doc.
  """
    # ***Added section***
    layer_class = layers.core_layers.Dense
    if do_spec_norm:
        layer_class = SpectralNormedDense
    # ***Added section ends***

    if not isinstance(num_outputs, layers.six.integer_types):
        raise ValueError('num_outputs should be int or long, got %s.' %
                         (num_outputs, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'fully_connected', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        layer = layer_class(units=num_outputs,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.bias is not None:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        # Apply normalizer function / layer.
        if normalizer_fn is not None:
            if not normalizer_params:
                normalizer_params = {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)

        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)
Exemplo n.º 38
0
def deform_conv_2d(inputs, num_outputs, kernel_size=3, stride=1, dilate_rate=1, deformable_group=1, data_format='channels_first', no_bias=True, name=None):
    with tf.variable_scope(name, 'deform_conv'):
        if 'channels_last' == data_format:
            inputs = tf.transpose(inputs, [0, 3, 1, 2], name='trans')
        offset = tf.layers.conv2d(inputs, 2 * deformable_group * kernel_size**2, kernel_size, padding='SAME', dilation_rate=(dilate_rate, dilate_rate), strides=(stride, stride), data_format='channels_first')

        kernel = tf.get_variable(name='kernel', shape=(num_outputs, inputs.get_shape().as_list()[1], kernel_size, kernel_size), initializer=tf.glorot_uniform_initializer())
        if not no_bias:
            bias_var = tf.get_variable(name='bias', shape=(1, num_outputs, 1, 1), initializer=tf.zeros_initializer())
        res = deform_conv_op(inputs, filter=kernel, offset=offset, rates=[1, 1, dilate_rate, dilate_rate], padding='SAME', strides=[1, 1, stride, stride], num_groups=1, deformable_group=deformable_group)
        if 'channels_last' == data_format:
            res = tf.transpose(res, [0, 2, 3, 1], name='trans_inv')
        if not no_bias:
          res = res + bias_var
    return res
Exemplo n.º 39
0
    def _one_step(self, x):
        """one time-step execution which follows
           1. from top to bottom, 'R's are updated
           2. predict the next frame, and the next input-data is fed
           3. from bottom to top, 'E's are updated
        Args:
            x : 4-dim (batch_size, height, width, num_channels) tensor
                'None' if no input frame
        """
        if x is not None:
            assert len(x.shape) == 4, "the dimension of the input tensor must be {}, but {}.".format(4, len(x.shape))
        
        # initialize 'R's and 'E's as 0 when started
        if self.stack_E is None:
            
            self.stack_E = []
            self.stack_R = []
            
            # the following calculations are messy
            # as in the case of implementing convLSTM
            tmp_A = tf.zeros_like(x, tf.float32)
            
            for l in range(self.num_layers):
                # calculation to double the number of channels
                chx2 = tf.layers.Conv2D(filters=2*self.stack_channels[l], kernel_size=1, trainable=False, kernel_initializer=tf.zeros_initializer())
                # E.shape == 2 * A.shape
                tmp_E = chx2(tmp_A)
                self.stack_E.append(tf.zeros_like(tmp_E))
                # R.shape == A.shape
                self.stack_R.append(tf.zeros_like(tmp_A))
                
                getattr(self, "R_block"+str(l)).reset_state()
                
                if l != self.num_layers - 1:
                    # calculation to extract the shape in the above layer
                    # 2D-shape is reduced to halve, while channel size is increased
                    goup = tf.layers.Conv2D(filters=self.stack_channels[l+1], kernel_size=2, strides=2, trainable=False, kernel_initializer=tf.zeros_initializer())
                    # A[l].shape -> A[l+1].shape
                    tmp_A = goup(tmp_A)
                    
        # update R-block from top to bottom
        for l in reversed(range(self.num_layers)):
            if l != self.num_layers-1:
                new_R = getattr(self, "R_block"+str(l))(self.stack_R[l], self.stack_E[l], self.stack_R[l+1])
            else:
                new_R = getattr(self, "R_block"+str(l))(self.stack_R[l], self.stack_E[l])
            self.stack_R[l] = new_R
            
        # update E-block from bottom to top
        for l in range(self.num_layers):
            if l != 0:
                new_E, _ = getattr(self, "E_block"+str(l))(self.stack_R[l], self.stack_E[l-1])
            else:
                new_E, pred = getattr(self, "E_block"+str(l))(self.stack_R[l], x)     
            
            self.stack_E[l] = new_E

        tmp_loss = tf.reduce_mean(self.stack_E[0]) # loss for this time step
        
        return tmp_loss, pred
    slopes_t = tf.reduce_max(slopes) if cfg.bMaxGrad else slopes

    if cfg.oReg == 'cp':
        dis_lip_loss = cfg.fWeightLip * tf.reduce_mean(tf.square(slopes_t))
    elif cfg.oReg == 'gp':
        dis_lip_loss = cfg.fWeightLip * tf.reduce_mean(
            tf.square(slopes_t - cfg.fLipTarget))
    elif cfg.oReg == 'lp':
        dis_lip_loss = cfg.fWeightLip * tf.reduce_mean(
            tf.square(tf.maximum(0.0, slopes_t - cfg.fLipTarget)))
    elif cfg.oReg == 'al':
        if not cfg.bMaxGrad:
            slopes_t = tf.reduce_mean(slopes)
        al_lambda = tf.get_variable('lambda', [],
                                    initializer=tf.zeros_initializer(),
                                    trainable=False)
        constraint = slopes_t - cfg.fLipTarget
        dis_lip_loss = cfg.fWeightLip * tf.square(
            constraint) + al_lambda * constraint
        if cfg.fLrAL != 0:
            al_lambda_update_op = tf.assign(al_lambda,
                                            al_lambda + cfg.fLrAL * constraint)
        else:
            al_lambda_update_op = tf.assign(
                al_lambda, al_lambda + 2 * cfg.fWeightLip * constraint)
    elif cfg.oReg == 'ali':
        if not cfg.bMaxGrad:
            slopes_t = tf.reduce_mean(slopes)
        al_lambda = tf.get_variable('lambda', [],
                                    initializer=tf.zeros_initializer(),
Exemplo n.º 41
0
    def __init__(self, modelpp):
        path = modelpp.get_remote_date(
            "https://www.flyai.com/m/multi_cased_L-12_H-768_A-12.zip")
        data_root = os.path.splitext(path)[0]
        bert_config_file = os.path.join(data_root, 'bert_config.json')
        bert_config = modeling.BertConfig.from_json_file(bert_config_file)
        init_checkpoint = os.path.join(data_root, 'bert_model.ckpt')
        bert_vocab_file = os.path.join(data_root, 'vocab.txt')

        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, None],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32,
                                         shape=[None, None],
                                         name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, None],
                                          name='segment_ids')

        self.labels = tf.placeholder(tf.int32, shape=[
            None,
        ], name='labels')
        self.keep_prob = tf.placeholder(tf.float32, name='keep_prob')
        self.learning_rate = tf.placeholder(tf.float32, name='learning_rate')

        # 创建bert模型
        with tf.name_scope('Bert'):
            model = modeling.BertModel(
                config=bert_config,
                is_training=True,
                input_ids=self.input_ids,
                input_mask=self.input_mask,
                token_type_ids=self.segment_ids,
                # 这里如果使用TPU 设置为True,速度会快些。使用CPU 或GPU 设置为False ,速度会快些。
                use_one_hot_embeddings=False)
            # 这个获取每个token的output 输入数据[batch_size, seq_length, embedding_size] 如果做seq2seq 或者ner 用这个
            # output_layer = model.get_sequence_output()
            tvars = tf.trainable_variables()
            # 加载BERT模型
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            output_layer = model.get_pooled_output()  # 这个获取句子的output
            hidden_size = output_layer.shape[-1].value  # 获取输出的维度

        # 构建W 和 b
        output_weights = tf.get_variable(
            "output_weights", [hidden_size, num_labels],
            initializer=tf.truncated_normal_initializer(stddev=0.02))

        output_bias = tf.get_variable("output_bias", [num_labels],
                                      initializer=tf.zeros_initializer())

        with tf.variable_scope("predict"):
            output_layer = tf.nn.dropout(output_layer,
                                         keep_prob=self.keep_prob)
            logits = tf.nn.bias_add(tf.matmul(output_layer, output_weights),
                                    output_bias)
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            self.pred = tf.argmax(log_probs, 1, name='pred')

        with tf.name_scope("accuracy"):
            # 准确率
            correct_pred = tf.equal(self.labels, tf.cast(self.pred, tf.int32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32),
                                           name='acc')

        with tf.name_scope("optimize"):
            # 将label进行onehot转化
            one_hot_labels = tf.one_hot(self.labels,
                                        depth=num_labels,
                                        dtype=tf.float32)
            # 构建损失函数
            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
            self.loss = tf.reduce_mean(per_example_loss)

            # 优化器
            self.train_op = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(self.loss)
    def build_net(self, ob, ac_shape, hid_size, num_hid_layers):
        self.ob = ob
        self.ob_shape = ob.shape.as_list()[1:]

        with tf.variable_scope("ob_filter"):
            self.ob_rms = RunningMeanStd(ob.shape.as_list()[1:])

        # normalized observation
        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0,
                               5.0)

        # net to fit value function
        net = obz
        for i in range(num_hid_layers):
            net = tf.layers.dense(
                inputs=net,
                units=hid_size,
                activation=tf.nn.tanh,
                kernel_initializer=tf.random_normal_initializer(mean=0,
                                                                stddev=1),
                name="vffc%i" % (i + 1))
        self.vpred = tf.layers.dense(
            inputs=net,
            units=1,
            activation=None,
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=1),
            name="vffinal")
        # train value function
        self.vreal = tf.placeholder(dtype=tf.float32,
                                    shape=(None, ),
                                    name="vreal")
        vloss = tf.reduce_mean(tf.square(self.vreal - self.vpred))
        valueFunctionVars = [
            v for v in self.get_trainable_variables()
            if v.name.startswith("%s/vff" % self.scope)
        ]
        self.vadam = tf.train.AdamOptimizer().minimize(
            vloss, var_list=valueFunctionVars)

        # net to predict mean and standard deviation of action
        net = obz
        for i in range(num_hid_layers):
            net = tf.layers.dense(
                inputs=net,
                units=hid_size,
                activation=tf.nn.tanh,
                kernel_initializer=tf.random_normal_initializer(mean=0,
                                                                stddev=1),
                name="polc%i" % (i + 1))
        mean = tf.layers.dense(inputs=net,
                               units=ac_shape[0],
                               activation=None,
                               kernel_initializer=tf.random_normal_initializer(
                                   mean=0, stddev=0.01))
        logstd = mean * 0.0 + tf.get_variable(
            name="logstd",
            shape=[1, ac_shape[0]],
            initializer=tf.zeros_initializer(),
            dtype=tf.float32)  # std not related to observation

        # action is normally distributed
        self.pd = DiagGaussianPd(mean, logstd)
        self.stochastic = tf.placeholder(dtype=tf.bool,
                                         shape=(),
                                         name="stochastic")
        self.action = tf.cond(self.stochastic, lambda: self.pd.sample(),
                              lambda: self.pd.mode())
Exemplo n.º 43
0
def XinNingNetwork2(input,
                    heatmap,
                    is_training,
                    weight_decay,
                    batch_norm_params,
                    num_labels,
                    depth_multi,
                    min_depth=8):
    print("labels; ", num_labels)
    time.sleep(3)

    def depth(d):
        return max(int(d * depth_multi), min_depth)

    with tf.variable_scope('pfld_inference2'):
        features = {}
        # normalizer_fn=slim.batch_norm,
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu6,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                biases_initializer=tf.zeros_initializer(),
                weights_regularizer=slim.l2_regularizer(weight_decay),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                padding='SAME',
                # trainable=is_training
        ):
            print('PFLD input shape({}): {}'.format(input.name,
                                                    input.get_shape()))
            print("=== start stage 2 ===")
            print(heatmap.name, heatmap.get_shape())
            # 112*112*1*2 / concat / 112*112*2
            concatted_2 = tf.concat([input, heatmap], 3)
            print(concatted_2.name, concatted_2.get_shape())
            # 112*112*2 / conv3*3 / c:8,n:1,s:2
            conv2_1 = conv2d(concatted_2,
                             stride=2,
                             channel=8,
                             kernel=3,
                             depth=depth,
                             scope='conv2_1')
            print(conv2_1.name, conv2_1.get_shape())
            # 56*56*8 / pool3*3 / c:28,n:1,s:2
            pool2_1 = slim.max_pool2d(conv2_1,
                                      kernel_size=[3, 3],
                                      stride=2,
                                      scope='pool2_1',
                                      padding='SAME')
            print(pool2_1.name, pool2_1.get_shape())
            # 28*28*8 / conv3*3 / c:16,n:1,s:1
            conv2_2 = conv2d(pool2_1,
                             stride=1,
                             channel=16,
                             kernel=3,
                             depth=depth,
                             scope='conv2_2')
            print(conv2_2.name, conv2_2.get_shape())
            # 28*28*16 / pool3*3 / c:16,n:1,s:2
            pool2_2 = slim.max_pool2d(conv2_2,
                                      kernel_size=[3, 3],
                                      stride=2,
                                      scope='pool2_2',
                                      padding='SAME')
            print(pool2_2.name, pool2_2.get_shape())
            # 14*14*16 / conv3*3 / c:64,n:1,s:1
            conv2_2_1 = conv2d(pool2_2,
                               stride=1,
                               channel=64,
                               kernel=3,
                               depth=depth,
                               scope='conv2_2.1')
            print(conv2_2_1.name, conv2_2_1.get_shape())
            # 14*14*64 / global_pool / c:64,n:1
            pool2_2_1 = slim.avg_pool2d(conv2_2_1, [14, 14],
                                        stride=[14, 14],
                                        scope='pool2_2.1',
                                        padding='SAME')
            print(pool2_2_1.name, pool2_2_1.get_shape())
            # 14*14*16 / conv3*3 / c:64,n:1,s:2
            conv2_3 = conv2d(pool2_2,
                             stride=2,
                             channel=64,
                             kernel=3,
                             depth=depth,
                             scope='conv2_3')
            print(conv2_3.name, conv2_3.get_shape())
            # 7*7*64 / pool3*3 / c:64,n:1,s:2
            pool2_3 = slim.max_pool2d(conv2_3,
                                      kernel_size=[3, 3],
                                      stride=2,
                                      scope='pool2_3',
                                      padding='SAME')
            print(pool2_3.name, pool2_3.get_shape())
            # 3*3*64 / conv3*3 / c:64,n:1,s:1
            conv2_3_1 = conv2d(pool2_3,
                               stride=2,
                               channel=64,
                               kernel=3,
                               depth=depth,
                               scope='conv2_3.1')
            print(conv2_3_1.name, conv2_3_1.get_shape())
            # 3*3*64 / global_pool / c:64,n:1
            pool2_3_1 = slim.avg_pool2d(conv2_3_1, [3, 3],
                                        stride=[3, 3],
                                        scope='pool2_3.1',
                                        padding='SAME')
            print(pool2_3_1.name, pool2_3_1.get_shape())
            # 3*3*64 / conv3*3 / c:64,n:1,s:2
            conv2_4 = conv2d(pool2_3,
                             stride=2,
                             channel=64,
                             kernel=3,
                             depth=depth,
                             scope='conv2_4')
            print(conv2_4.name, conv2_4.get_shape())
            # 2*2*64 / global_pool / c:64,n:1
            pool2_4_1 = slim.avg_pool2d(conv2_4, [2, 2],
                                        stride=[2, 2],
                                        scope='pool2_4.1',
                                        padding='SAME')
            print(pool2_4_1.name, pool2_4_1.get_shape())
            # 1*1*64*3 / concat / 1*1*192
            concatted_2 = tf.concat([pool2_2_1, pool2_3_1, pool2_4_1], 3)
            print(concatted_2.name, concatted_2.get_shape())
            flattened = slim.flatten(concatted_2)
            print(flattened.name, flattened.get_shape())
            # 1*1*192 / fc / 1*136
            output_2 = slim.fully_connected(flattened,
                                            num_outputs=num_labels * 2,
                                            scope='fc_2')
            print("last layer name")
            print(output_2.name, output_2.get_shape())

            return output_2
Exemplo n.º 44
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=tf.nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=tf.zeros_initializer(),
                biases_regularizer=None,
                do_spec_norm=False,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                scope=None):
    """Adds support for spectral normalization following https://arxiv.org/abs/1802.05957.

  For non-spectral normed convolution, See tensorflow.contrib.layers.python.layers.convolution for doc.
  """
    if data_format not in [
            None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW'
    ]:
        raise ValueError('Invalid data_format: %r' % (data_format, ))

    layer_variable_getter = layers._build_variable_getter({
        'bias': 'biases',
        'kernel': 'weights'
    })

    with tf.variable_scope(scope,
                           'Conv', [inputs],
                           reuse=reuse,
                           custom_getter=layer_variable_getter) as sc:
        inputs = tf.convert_to_tensor(inputs)
        input_rank = inputs.get_shape().ndims

        # ***Modified section***
        if input_rank == 3:
            layer_class = convolutional_layers.Convolution1D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        elif input_rank == 4:
            layer_class = convolutional_layers.Convolution2D
            if do_spec_norm:
                layer_class = SpecNormConv2d
        elif input_rank == 5:
            layer_class = convolutional_layers.Convolution3D
            if do_spec_norm:
                raise NotImplementedError(
                    'only supports 2d conv for spectral norm.')
        else:
            raise ValueError('Convolution not supported for input with rank',
                             input_rank)
    # ***Modified section ends***

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')
        layer = layer_class(filters=num_outputs,
                            kernel_size=kernel_size,
                            strides=stride,
                            padding=padding,
                            data_format=df,
                            dilation_rate=rate,
                            activation=None,
                            use_bias=not normalizer_fn and biases_initializer,
                            kernel_initializer=weights_initializer,
                            bias_initializer=biases_initializer,
                            kernel_regularizer=weights_regularizer,
                            bias_regularizer=biases_regularizer,
                            activity_regularizer=None,
                            trainable=trainable,
                            name=sc.name,
                            dtype=inputs.dtype.base_dtype,
                            _scope=sc,
                            _reuse=reuse)
        outputs = layer.apply(inputs)

        # Add variables to collections.
        layers._add_variable_to_collections(layer.kernel,
                                            variables_collections, 'weights')
        if layer.use_bias:
            layers._add_variable_to_collections(layer.bias,
                                                variables_collections,
                                                'biases')

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return layer_utils.collect_named_outputs(outputs_collections, sc.name,
                                                 outputs)
Exemplo n.º 45
0
def get_bn(zero_init=False):
    if zero_init:
        return lambda x, name: BatchNorm(
            'bn', x, gamma_init=tf.zeros_initializer())
    else:
        return lambda x, name: BatchNorm('bn', x)
Exemplo n.º 46
0
    def build_model(self, height, width, states_per_action):
        with tf.device('/gpu:0'):
            state = tf.placeholder('float32',
                                   shape=(None, height, width,
                                          states_per_action),
                                   name='states')
            self.layers['state'] = state

            # First convolutional layer
            with tf.variable_scope('conv1'):
                conv1 = tf.contrib.layers.convolution2d(
                    inputs=state,
                    num_outputs=16,
                    kernel_size=[8, 8],
                    stride=[4, 4],
                    padding="VALID",
                    activation_fn=tf.nn.relu,
                    weights_initializer=tf.contrib.layers.
                    xavier_initializer_conv2d(),
                    biases_initializer=tf.zeros_initializer())
                self.layers['conv1'] = conv1

            # Second convolutional layer
            with tf.variable_scope('conv2'):
                conv2 = tf.contrib.layers.convolution2d(
                    inputs=conv1,
                    num_outputs=32,
                    kernel_size=[4, 4],
                    stride=[2, 2],
                    padding="VALID",
                    activation_fn=tf.nn.relu,
                    weights_initializer=tf.contrib.layers.
                    xavier_initializer_conv2d(),
                    biases_initializer=tf.zeros_initializer())
                self.layers['conv2'] = conv2

            # Flatten the network
            with tf.variable_scope('flatten'):
                flatten = tf.contrib.layers.flatten(inputs=conv2)
                self.layers['flatten'] = flatten

            # Fully connected layer with 256 hidden units
            with tf.variable_scope('fc1'):
                fc1 = tf.contrib.layers.fully_connected(
                    inputs=flatten,
                    num_outputs=256,
                    activation_fn=tf.nn.relu,
                    weights_initializer=tf.contrib.layers.xavier_initializer(),
                    biases_initializer=tf.zeros_initializer())
                self.layers['fc1'] = fc1

            # The policy output
            with tf.variable_scope('policy'):
                policy = tf.contrib.layers.fully_connected(
                    inputs=fc1,
                    num_outputs=self.action_size,
                    activation_fn=tf.nn.softmax,
                    weights_initializer=tf.contrib.layers.xavier_initializer(),
                    biases_initializer=None)
                self.layers['policy'] = policy

            # The value output
            with tf.variable_scope('value'):
                value = tf.contrib.layers.fully_connected(
                    inputs=fc1,
                    num_outputs=1,
                    activation_fn=None,
                    weights_initializer=tf.contrib.layers.xavier_initializer(),
                    biases_initializer=None)
                self.layers['value'] = value

        return state, policy, value
Exemplo n.º 47
0
# Section One: Define the layers of the neural network itself

# Input Layer
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))

# Layer 1
with tf.variable_scope('layer_1'):
    weights = tf.get_variable(
        name="weights1",
        shape=[number_of_inputs, layer_1_nodes],
        initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases1",
                             shape=[layer_1_nodes],
                             initializer=tf.zeros_initializer())
    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)

# Layer 2
with tf.variable_scope('layer_2'):
    weights = tf.get_variable(
        name="weights2",
        shape=[layer_1_nodes, layer_2_nodes],
        initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases2",
                             shape=[layer_2_nodes],
                             initializer=tf.zeros_initializer())
    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)

# Layer 3
with tf.variable_scope('layer_3'):
Exemplo n.º 48
0
def cifarnet_bn(images,
                num_classes=10,
                is_training=False,
                dropout_keep_prob=0.5,
                prediction_fn=slim.softmax,
                scope='CifarNet'):

    end_points = {}

    with tf.variable_scope(scope, 'CifarNet', [images]):
        # Stem Network
        # Input: 3x28x28
        # Output: 96x28x28
        net = slim.conv2d(images,
                          32, [3, 3],
                          padding='SAME',
                          scope='stem_conv1')
        net = slim.conv2d(net, 96, [3, 3], padding='SAME', scope='stem_conv2')

        # Inception Module 1
        # Input: 96x28x28
        # Output: 128x28x28
        net = inception_module(net, [32, 96, 16, 64, 16, 16])
        end_points['inception1'] = net

        # Inception Module 2
        # Input: 128x28x28
        # Output: 240x28x28
        net = inception_module(net, [64, 128, 32, 96, 48, 32])
        end_points['inception2'] = net

        # Maxpool
        # Input: 240x28x28
        # Output: 240x14x14
        net = slim.max_pool2d(net, [3, 3],
                              stride=2,
                              padding='SAME',
                              scope='maxpool')

        end_points['maxpool'] = net

        # Inception Module 3
        # Input: 240x14x14
        # Output: 256x14x14
        net = inception_module(net, [96, 96, 16, 104, 24, 32])
        end_points['inception3'] = net

        # Avgpool
        # Input: 256x14x14
        # Output: 256x4x4
        net = slim.avg_pool2d(net, [5, 5],
                              stride=3,
                              padding='SAME',
                              scope='avgpool')
        end_points['avgpool'] = net

        # Flatten:
        # Input: 256x4x4
        # Output: 4096
        net = slim.flatten(net)
        end_points['flatten'] = net

        net = slim.dropout(net,
                           dropout_keep_prob,
                           is_training=is_training,
                           scope='dropout1')

        # FC
        # Input: 4096
        # Output: 10
        logits = slim.fully_connected(
            net,
            num_classes,
            biases_initializer=tf.zeros_initializer(),
            # weights_initializer=trunc_normal(1/50.0),
            weights_initializer=tf.contrib.layers.xavier_initializer(),
            weights_regularizer=None,
            activation_fn=None,
            scope='logits')

        end_points['Logits'] = logits
        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')

    return logits, end_points
Exemplo n.º 49
0
    def __init__(self,
                 env_spec,
                 name='CategoricalGRUPolicy',
                 hidden_dim=32,
                 hidden_nonlinearity=tf.nn.tanh,
                 hidden_w_init=tf.initializers.glorot_uniform(),
                 hidden_b_init=tf.zeros_initializer(),
                 recurrent_nonlinearity=tf.nn.sigmoid,
                 recurrent_w_init=tf.initializers.glorot_uniform(),
                 output_nonlinearity=None,
                 output_w_init=tf.initializers.glorot_uniform(),
                 output_b_init=tf.zeros_initializer(),
                 hidden_state_init=tf.zeros_initializer(),
                 hidden_state_init_trainable=False,
                 state_include_action=True,
                 layer_normalization=False):
        if not isinstance(env_spec.action_space, akro.Discrete):
            raise ValueError('CategoricalGRUPolicy only works'
                             'with akro.Discrete action space.')

        super().__init__(name, env_spec)
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.n

        self._hidden_dim = hidden_dim
        self._hidden_nonlinearity = hidden_nonlinearity
        self._hidden_w_init = hidden_w_init
        self._hidden_b_init = hidden_b_init
        self._recurrent_nonlinearity = recurrent_nonlinearity
        self._recurrent_w_init = recurrent_w_init
        self._output_nonlinearity = output_nonlinearity
        self._output_w_init = output_w_init
        self._output_b_init = output_b_init
        self._hidden_state_init = hidden_state_init
        self._hidden_state_init_trainable = hidden_state_init_trainable
        self._layer_normalization = layer_normalization
        self._state_include_action = state_include_action

        if state_include_action:
            self._input_dim = self._obs_dim + self._action_dim
        else:
            self._input_dim = self._obs_dim

        self._f_step_prob = None

        self.model = CategoricalGRUModel(
            output_dim=self._action_dim,
            hidden_dim=self._hidden_dim,
            name='prob_network',
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            recurrent_nonlinearity=recurrent_nonlinearity,
            recurrent_w_init=recurrent_w_init,
            hidden_state_init=hidden_state_init,
            hidden_state_init_trainable=hidden_state_init_trainable,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization)

        self._prev_actions = None
        self._prev_hiddens = None
        self._dist = None
        self._init_hidden = None

        self._initialize()
Exemplo n.º 50
0
 def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0):
     mean = fc(latent_vector, 'pi', self.size, init_scale=init_scale, init_bias=init_bias)
     logstd = tf.get_variable(name='logstd', shape=[1, self.size], initializer=tf.zeros_initializer())
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     return self.pdfromflat(pdparam), mean
mnist = input_data.read_data_sets('data/mnist', one_hot=True)
X_batch, Y_batch = mnist.train.next_batch(batch_size)

# Step2: 建立feature(X)和labels(Y)的placehoder
# MNIST data中每張圖片的大小 28*28 = 784
X = tf.placeholder(tf.float32, [batch_size, 784], name='image')
# 每張圖片要對應的 1~10 classes,對應的數字 0~9
Y = tf.placeholder(tf.int32, [batch_size, 10], name='label')

# Step3: 建立weights和bias
# w 是隨機初始化 mean of 0, stddev of 0.01
# w 的shape 是要mapping X (784)-> Y (10)
w = tf.get_variable(name='weight', shape=(784, 10), initializer=tf.random_normal_initializer())
# b 初始化 = 0
# b 的shape 跟Y的shape相同
b = tf.get_variable(name='bias', shape=(1, 10), initializer=tf.zeros_initializer())


# Step4: 建立model
logits = tf.matmul(X, w) + b

# Step5: 定義loss function
entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
# 計算整個batch的平均loss
loss = tf.reduce_mean(entropy)
# loss = tf.reduce_mean(-tf.reduce_sum(tf.nn.softmax(logits) * tf.log(Y), reduction_indices=[1]))

# Step6: 定義training op
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

Exemplo n.º 52
0
def initialize_parameters():

    
    tf.set_random_seed(1)
     
    x1_encoder_h1 = tf.get_variable("x1_encoder_h1", [144,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_encoder_b1 = tf.get_variable("x1_encoder_b1", [16], initializer = tf.zeros_initializer())
    x2_encoder_h1 = tf.get_variable("x2_encoder_h1", [21,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_encoder_b1 = tf.get_variable("x2_encoder_b1", [16], initializer = tf.zeros_initializer())
    
    x1_encoder_h2 = tf.get_variable("x1_encoder_h2", [16,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_encoder_b2 = tf.get_variable("x1_encoder_b2", [32], initializer = tf.zeros_initializer())
    x2_encoder_h2 = tf.get_variable("x2_encoder_h2", [16,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_encoder_b2 = tf.get_variable("x2_encoder_b2", [32], initializer = tf.zeros_initializer())

    x1_encoder_h3 = tf.get_variable("x1_encoder_h3", [32,64], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_encoder_b3 = tf.get_variable("x1_encoder_b3", [64], initializer = tf.zeros_initializer())
    x2_encoder_h3 = tf.get_variable("x2_encoder_h3", [32,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_encoder_b3 = tf.get_variable("x2_encoder_b3", [64], initializer = tf.zeros_initializer()) 

    x1_encoder_h4 = tf.get_variable("x1_encoder_h4", [64,128], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_encoder_b4 = tf.get_variable("x1_encoder_b4", [128], initializer = tf.zeros_initializer())
    x2_encoder_h4 = tf.get_variable("x2_encoder_h4", [64,128], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_encoder_b4 = tf.get_variable("x2_encoder_b4", [128], initializer = tf.zeros_initializer()) 
    
    joint_encoder_h1 = tf.get_variable("joint_encoder_h1", [256,128], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    joint_encoder_b1 = tf.get_variable("joint_encoder_b1", [128], initializer = tf.zeros_initializer())
    joint_encoder_h2 = tf.get_variable("joint_encoder_h2", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    joint_encoder_b2 = tf.get_variable("joint_encoder_b2", [64], initializer = tf.zeros_initializer())
    joint_encoder_h3 = tf.get_variable("joint_encoder_h3", [64,15], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    joint_encoder_b3 = tf.get_variable("joint_encoder_b3", [15], initializer = tf.zeros_initializer())       

    x1_decoder_h1 = tf.get_variable("x1_decoder_h1", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_decoder_b1 = tf.get_variable("x1_decoder_b1", [64], initializer = tf.zeros_initializer())
    x2_decoder_h1 = tf.get_variable("x2_decoder_h1", [128,64], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_decoder_b1 = tf.get_variable("x2_decoder_b1", [64], initializer = tf.zeros_initializer())
    
    x1_decoder_h2 = tf.get_variable("x1_decoder_h2", [64,32], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_decoder_b2 = tf.get_variable("x1_decoder_b2", [32], initializer = tf.zeros_initializer())
    x2_decoder_h2 = tf.get_variable("x2_decoder_h2", [64,32], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_decoder_b2 = tf.get_variable("x2_decoder_b2", [32], initializer = tf.zeros_initializer())
    
    x1_decoder_h3 = tf.get_variable("x1_decoder_h3", [32,16], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_decoder_b3 = tf.get_variable("x1_decoder_b3", [16], initializer = tf.zeros_initializer())
    x2_decoder_h3 = tf.get_variable("x2_decoder_h3", [32,16], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_decoder_b3 = tf.get_variable("x2_decoder_b3", [16], initializer = tf.zeros_initializer())

    x1_decoder_h4 = tf.get_variable("x1_decoder_h4", [16,144], initializer =tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x1_decoder_b4 = tf.get_variable("x1_decoder_b4", [144], initializer = tf.zeros_initializer())
    x2_decoder_h4 = tf.get_variable("x2_decoder_h4", [16,21], initializer = tf.contrib.layers.variance_scaling_initializer(seed = 1))
    x2_decoder_b4 = tf.get_variable("x2_decoder_b4", [21], initializer = tf.zeros_initializer())
    
    parameters = {"x1_encoder_h1": x1_encoder_h1,
                  "x1_encoder_b1": x1_encoder_b1,
                  "x2_encoder_h1": x2_encoder_h1,
                  "x2_encoder_b1": x2_encoder_b1,
                  "x1_encoder_h2": x1_encoder_h2,
                  "x1_encoder_b2": x1_encoder_b2,
                  "x2_encoder_h2": x2_encoder_h2,
                  "x2_encoder_b2": x2_encoder_b2,
                  "x1_encoder_h3": x1_encoder_h3,
                  "x1_encoder_b3": x1_encoder_b3,
                  "x2_encoder_h3": x2_encoder_h3,
                  "x2_encoder_b3": x2_encoder_b3,
                  "x1_encoder_h4": x1_encoder_h4,
                  "x1_encoder_b4": x1_encoder_b4,
                  "x2_encoder_h4": x2_encoder_h4,
                  "x2_encoder_b4": x2_encoder_b4,
                  "joint_encoder_h1": joint_encoder_h1,
                  "joint_encoder_b1": joint_encoder_b1,
                  "joint_encoder_h2": joint_encoder_h2,
                  "joint_encoder_b2": joint_encoder_b2,
                  "joint_encoder_h3": joint_encoder_h3,
                  "joint_encoder_b3": joint_encoder_b3,
                  "x1_decoder_h1": x1_decoder_h1,
                  "x1_decoder_b1": x1_decoder_b1,
                  "x2_decoder_h1": x2_decoder_h1,
                  "x2_decoder_b1": x2_decoder_b1,
                  "x1_decoder_h2": x1_decoder_h2,
                  "x1_decoder_b2": x1_decoder_b2,
                  "x2_decoder_h2": x2_decoder_h2,
                  "x2_decoder_b2": x2_decoder_b2,
                  "x1_decoder_h3": x1_decoder_h3,
                  "x1_decoder_b3": x1_decoder_b3,
                  "x2_decoder_h3": x2_decoder_h3,
                  "x2_decoder_b3": x2_decoder_b3,
                  "x1_decoder_h4": x1_decoder_h4,
                  "x1_decoder_b4": x1_decoder_b4,
                  "x2_decoder_h4": x2_decoder_h4,
                  "x2_decoder_b4": x2_decoder_b4}

    return parameters
Exemplo n.º 53
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings, extras):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings,
      extras=extras)

  output_layer = model.get_sequence_output()

  from_seq_length = output_layer.shape[1].value
  hidden_size = output_layer.shape[2].value

  # B 10 F 768
  output_layer = tf.stack([output_layer] * FLAGS.max_num_relations, axis=1)
  # B 10 F 1
  e1_mas = tf.reshape(extras.e1_mas, [-1, FLAGS.max_num_relations, from_seq_length, 1])
  # B 10 F 768
  e1 = tf.multiply(output_layer, tf.to_float(e1_mas))
  # B 10 768
  e1 = tf.reduce_sum(e1, axis=-2) / tf.maximum(1.0, tf.reduce_sum(tf.to_float(e1_mas), axis=-2))
  # B*10 768
  e1 = tf.reshape(e1, [-1, hidden_size])
    # B 10 F 1
  e2_mas = tf.reshape(extras.e2_mas, [-1, FLAGS.max_num_relations, from_seq_length, 1])
  # B 10 F 768
  e2 = tf.multiply(output_layer, tf.to_float(e2_mas))
  # B 10 768
  e2 = tf.reduce_sum(e2, axis=-2) / tf.maximum(1.0, tf.reduce_sum(tf.to_float(e2_mas), axis=-2))
  # B*10 768
  e2 = tf.reshape(e2, [-1, hidden_size])
  # B*10 768*2
  output_layer = tf.concat([e1, e2], axis=-1)

  output_weights = tf.get_variable(
      "cls/entity/output_weights", [num_labels, hidden_size*2],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "cls/entity/output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):
    if is_training:
      # I.e., 0.1 dropout
      output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
    # B*10 num_label
    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    # B*10 num_label
    logits = tf.nn.bias_add(logits, output_bias)
    # B*10 num_label
    probabilities = tf.nn.softmax(logits, axis=-1)
    # B*10 num_label
    log_probs = tf.nn.log_softmax(logits, axis=-1)
    # B*10
    labels = tf.reshape(labels, [-1])
    # B*10 num_label
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    # B*10
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    # B*10
    cls_mask = tf.reshape(tf.to_float(extras.cls_mask), [-1])
    # B*10
    per_example_loss = per_example_loss * cls_mask

    loss = tf.reduce_sum(per_example_loss) / tf.reduce_sum(cls_mask)

    return (loss, per_example_loss, logits, probabilities)
Exemplo n.º 54
0
def create_bias_initializer(unused_bias_shape, dtype=tf.float32):
    """Returns a default initializer for the biases of a linear/AddBias module."""
    return tf.zeros_initializer(dtype=dtype)
Exemplo n.º 55
0
def batch_norm(
        x,
        center=True,
        scale=True,
        training=True,
        trainable=True,
        epsilon=1e-6,
        gamma_initializer=tf.ones_initializer(),
        beta_initializer=tf.zeros_initializer(),
):
    """Batch Norm function that is compatible with pipelining.

    The normal batch norm function does not work correctly with pipelining as it relies
    on assign ops in the forward pass to update the moving averages which are not allowed.

    This function instead represents the moving averages as trainable variables but with
    a custom gradient that defines its gradient as the moving average update step. This
    means they can be correctly accumulated over the pipeline micro-batches.

    To ensure the moving average updates are correctly applied the Optimizer class must be
    augmented with the 'add_bn_moving_average_updates' function.

    Args:
      x: A Tensor with at least 2 dimensions in NHWC format. All
       shape dimensions must be fully defined.
      center: If True, add offset of `beta` to normalized tensor. If False, `beta`
        is ignored.
      scale: If True, multiply by `gamma`. If False, `gamma` is
        not used. When the next layer is linear (also e.g. `nn.relu`), this can be
        disabled since the scaling can be done by the next layer.
      epsilon: Small float added to variance to avoid dividing by zero.
      training: Whether this is operation is being used in a training network.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
      gamma_initializer: Optional initializer for  gamma.
      beta_initializer: Optional initializer for beta.

    Returns:
      A `Tensor` representing the output of the operation.
    """
    with tf.variable_scope('batch_norm'):
        num_channels = x.get_shape().as_list()[3]

        if center:
            beta = tf.get_variable('beta',
                                   shape=(num_channels),
                                   dtype=x.dtype,
                                   initializer=beta_initializer,
                                   trainable=trainable)
        else:
            beta = tf.constant(0.0, shape=(num_channels), dtype=x.dtype)

        if scale:
            gamma = tf.get_variable('gamma',
                                    shape=(num_channels),
                                    dtype=x.dtype,
                                    initializer=gamma_initializer,
                                    trainable=trainable)
        else:
            gamma = tf.constant(1.0, shape=(num_channels), dtype=x.dtype)

        moving_mean = tf.get_variable('moving_mean',
                                      shape=(num_channels),
                                      dtype=x.dtype,
                                      initializer=tf.zeros_initializer(),
                                      trainable=trainable)

        moving_variance = tf.get_variable('moving_variance',
                                          shape=(num_channels),
                                          dtype=x.dtype,
                                          initializer=tf.ones_initializer(),
                                          trainable=trainable)

        if training:
            x, mean, variance = tf.nn.fused_batch_norm(x,
                                                       gamma,
                                                       beta,
                                                       epsilon=epsilon,
                                                       data_format='NHWC')
        else:
            x, mean, variance = tf.nn.fused_batch_norm(
                x,
                gamma,
                beta,
                mean=moving_mean,
                variance=moving_variance,
                epsilon=epsilon,
                is_training=False,
                data_format='NHWC')

        @tf.custom_gradient
        def moving_avg_updates(X, moving_m, moving_v):
            def bw(dx):
                return dx, moving_m - mean, moving_v - variance

            return X, bw

        x = moving_avg_updates(x, moving_mean, moving_variance)

    return x
# Model architecture parameters
n_stocks = 500
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
n_target = 1

# Placeholder
X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks])
Y = tf.placeholder(dtype=tf.float32, shape=[None])

# Initializers
sigma = 1
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg", distribution="uniform", scale=sigma)
bias_initializer = tf.zeros_initializer()


# In[92]:


# Layer 1: Variables for hidden weights and biases
W_hidden_1 = tf.Variable(weight_initializer([n_stocks, n_neurons_1]))
bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1]))

# Layer 2: Variables for hidden weights and biases
W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1, n_neurons_2]))
bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))

# Layer 3: Variables for hidden weights and biases
W_hidden_3 = tf.Variable(weight_initializer([n_neurons_2, n_neurons_3]))
    def _build_graph(self):
        config = self.config
        x_size = config.dim_input_ctrl
        h_size = config.dim_hidden_ctrl
        a_size = config.dim_output_ctrl
        lr = self.lr_plh
        with self.graph.as_default():
            model_name = config.controller_model_name
            initializer = tf.contrib.layers.xavier_initializer(uniform=True)
            if model_name == '2layer':
                hidden = slim.fully_connected(self.state_plh, h_size,
                                              weights_initializer=initializer,
                                              activation_fn=tf.nn.leaky_relu)
                self.logits = slim.fully_connected(hidden, a_size,
                                                   weights_initializer=initializer,
                                                   activation_fn=None)
                self.output = tf.nn.softmax(self.logits)
            elif model_name == '2layer_logits_clipping':
                hidden = slim.fully_connected(self.state_plh, h_size,
                                              weights_initializer=initializer,
                                              activation_fn=tf.nn.leaky_relu)
                self.logits = slim.fully_connected(hidden, a_size,
                                                   weights_initializer=initializer,
                                                   activation_fn=None)
                self.output = tf.nn.softmax(self.logits /
                                            config.logit_clipping_c)
            elif model_name == 'linear':
                self.logits = slim.fully_connected(self.state_plh, a_size,
                                                   weights_initializer=initializer,
                                                   activation_fn=None)
                self.output = tf.nn.softmax(self.logits)
            elif model_name == 'linear_logits_clipping':
                #self.logits = slim.fully_connected(self.state_plh, a_size,
                #                                   weights_initializer=initializer,
                #                                   activation_fn=None)
                # ----Old version----
                w = tf.get_variable('w', shape=[x_size, a_size], dtype=tf.float32,
                                    initializer=initializer)
                b = tf.get_variable('b', shape=[a_size], dtype=tf.float32,
                                    initializer=tf.zeros_initializer())
                self.logits = tf.matmul(self.state_plh, w) + b
                self.output = tf.nn.softmax(self.logits /
                                            config.logit_clipping_c)
            else:
                raise Exception('Invalid controller_model_name')

            self.chosen_action = tf.argmax(self.output, 1)
            self.action = tf.cast(tf.argmax(self.action_plh, 1), tf.int32)
            self.indexes = tf.range(0, tf.shape(self.output)[0])\
                * tf.shape(self.output)[1] + self.action
            self.responsible_outputs = tf.gather(tf.reshape(self.output, [-1]),
                                                self.indexes)
            self.loss = -tf.reduce_mean(tf.log(self.responsible_outputs)
                                        * self.reward_plh)

            # ----Restore gradients and update them after several iterals.----
            optimizer = tf.train.AdamOptimizer(learning_rate=lr)
            self.tvars = tf.trainable_variables()
            tvars = self.tvars
            self.gradient_plhs = []
            for idx, var in enumerate(tvars):
                placeholder = tf.placeholder(tf.float32, name=str(idx) + '_plh')
                self.gradient_plhs.append(placeholder)

            gvs = optimizer.compute_gradients(self.loss, tvars)
            self.grads = [grad for grad, _ in gvs]
            self.train_op = optimizer.apply_gradients(zip(self.gradient_plhs, tvars))
            #self.train_op = optimizer.apply_gradients(gvs)
            self.init = tf.global_variables_initializer()
            self.saver = tf.train.Saver()
Exemplo n.º 58
0
def XinNingNetwork1(input,
                    is_training,
                    weight_decay,
                    batch_norm_params,
                    num_labels,
                    depth_multi,
                    min_depth=8):
    print("labels; ", num_labels)
    time.sleep(3)

    def depth(d):
        return max(int(d * depth_multi), min_depth)

    with tf.variable_scope('pfld_inference1'):
        features = {}
        # normalizer_fn=slim.batch_norm,
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu6,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01),
                biases_initializer=tf.zeros_initializer(),
                weights_regularizer=slim.l2_regularizer(weight_decay),
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                padding='SAME',
                # trainable=is_training
        ):
            print('PFLD input shape({}): {}'.format(input.name,
                                                    input.get_shape()))
            # 112*112*3(1) / conv3*3 / c:16,n:1,s:2
            conv1_1 = conv2d(input,
                             stride=2,
                             channel=16,
                             kernel=3,
                             depth=depth,
                             scope='conv1_1')
            print(conv1_1.name, conv1_1.get_shape())
            # 56*56*16 / conv3*3 / c:32,n:1,s:2
            conv1_2 = conv2d(conv1_1,
                             stride=2,
                             channel=32,
                             kernel=3,
                             depth=depth,
                             scope='conv1_2')
            print(conv1_2.name, conv1_2.get_shape())
            # 28*28*32 / pool2*2 / c:32,n:1,s:2
            pool1_2 = slim.max_pool2d(conv1_2,
                                      kernel_size=[2, 2],
                                      stride=2,
                                      scope='pool1_2',
                                      padding='SAME')
            print(pool1_2.name, pool1_2.get_shape())
            # 14*14*32 / conv3*3 / c:64,n:1,s:2
            conv1_2_1 = conv2d(pool1_2,
                               stride=2,
                               channel=64,
                               kernel=3,
                               depth=depth,
                               scope='conv1_2.1')
            print(conv1_2_1.name, conv1_2_1.get_shape())
            # 7*7*64 / global_pool / c:64,n:1
            pool1_2_1 = slim.avg_pool2d(conv1_2_1, [7, 7],
                                        stride=[7, 7],
                                        scope='pool1_2.1',
                                        padding='SAME')
            print(pool1_2_1.name, pool1_2_1.get_shape())
            # 14*14*32 / conv3*3 / c:64,n:1,s:2
            conv1_3 = conv2d(pool1_2,
                             stride=2,
                             channel=64,
                             kernel=3,
                             depth=depth,
                             scope='conv1_3')
            print(conv1_3.name, conv1_3.get_shape())
            # 7*7*64 / pool2*2 / c:64,n:1,s:2
            pool1_3 = slim.max_pool2d(conv1_3,
                                      kernel_size=[2, 2],
                                      stride=2,
                                      scope='pool1_3',
                                      padding='SAME')
            print(pool1_3.name, pool1_3.get_shape())
            # 4*4*64 / conv3*3 / c:64,n:1,s:2
            conv1_3_1 = conv2d(pool1_3,
                               stride=2,
                               channel=64,
                               kernel=3,
                               depth=depth,
                               scope='conv1_3.1')
            print(conv1_3_1.name, conv1_3_1.get_shape())
            # 2*2*64 / global_pool / c:64,n:1
            pool1_3_1 = slim.avg_pool2d(conv1_3_1, [2, 2],
                                        stride=[2, 2],
                                        scope='pool1_3.1',
                                        padding='SAME')
            print(pool1_3_1.name, pool1_3_1.get_shape())
            # 4*4*64 / conv3*3 / c:64,n:1,s:2
            conv1_4 = conv2d(pool1_3,
                             stride=2,
                             channel=64,
                             kernel=3,
                             depth=depth,
                             scope='conv1_4')
            print(conv1_4.name, conv1_4.get_shape())
            # 2*2*64 / global_pool / c:64,n:1
            pool1_4_1 = slim.avg_pool2d(conv1_4, [2, 2],
                                        stride=[2, 2],
                                        scope='pool1_4.1',
                                        padding='SAME')
            print(pool1_4_1.name, pool1_4_1.get_shape())
            # 1*1*64*3() / concat / 1*1*192
            concatted_1 = tf.concat([pool1_2_1, pool1_3_1, pool1_4_1], 3)
            print(concatted_1.name, concatted_1.get_shape())
            flattened_1 = slim.flatten(concatted_1)
            print(flattened_1.name, flattened_1.get_shape())
            # 1*1*192 / fc / 1*136
            output_1 = slim.fully_connected(flattened_1,
                                            num_outputs=num_labels * 2,
                                            scope='fc_1')
            print(output_1.name, output_1.get_shape())
            # 1*136 / transform / 112*112*1
            heatmap, _heat_values = _HeatMap(output_1, input, num_labels)
            print(heatmap.name, heatmap.get_shape())
            print("=== finish stage 1 ===")

            return output_1, heatmap, _heat_values
Exemplo n.º 59
0
def O_Net(inputs,
          label=None,
          bbox_target=None,
          landmark_target=None,
          training=True):
    with slim.arg_scope([slim.conv2d],
                        activation_fn=prelu,
                        weights_initializer=slim.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        weights_regularizer=slim.l2_regularizer(0.0005),
                        padding='valid'):
        print("ONet input shape: ", inputs.get_shape())
        net = slim.conv2d(inputs,
                          num_outputs=32,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv1')
        print("ONet conv1 shape: ", net.get_shape())
        # in the original model, for O net all pooling using stride of 2
        net = slim.max_pool2d(net,
                              kernel_size=[3, 3],
                              stride=2,
                              scope='pool1',
                              padding='SAME')
        print("ONet pool1 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv2')
        print("ONet conv2 shape: ", net.get_shape())
        net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope='pool2')
        print("ONet pool2 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=64,
                          kernel_size=[3, 3],
                          stride=1,
                          scope='conv3')
        print("ONet conv3 shape: ", net.get_shape())
        net = slim.max_pool2d(net,
                              kernel_size=[2, 2],
                              stride=2,
                              scope='pool3',
                              padding='SAME')
        print("ONet pool3 shape: ", net.get_shape())
        net = slim.conv2d(net,
                          num_outputs=128,
                          kernel_size=[2, 2],
                          stride=1,
                          scope='conv4')
        print("ONet conv4 shape: ", net.get_shape())
        fc_flatten = slim.flatten(net)
        print("ONet fc input shape: ", fc_flatten.get_shape())
        fc1 = slim.fully_connected(fc_flatten,
                                   num_outputs=256,
                                   scope='fc1',
                                   activation_fn=tf.nn.relu)
        #cls
        print('ONet fc shape after flattening: ', fc1.get_shape())
        cls_prob = slim.fully_connected(fc1,
                                        num_outputs=2,
                                        scope='cls_fc',
                                        activation_fn=tf.nn.softmax)
        print('ONet cls_prob fc shape ', cls_prob.get_shape())
        #bbox
        bbox_pred = slim.fully_connected(fc1,
                                         num_outputs=4,
                                         scope='bbox_fc',
                                         activation_fn=None)
        print('ONet bbox_pred fc shape ', bbox_pred.get_shape())
        #landmark
        landmark_pred = slim.fully_connected(fc1,
                                             num_outputs=10,
                                             scope='landmark_fc',
                                             activation_fn=None)
        print('ONet landmark fc shape ', landmark_pred.get_shape())
        if training:
            cls_loss = cls_ohem(cls_prob, label)
            bbox_loss = bbox_ohem(bbox_pred, bbox_target, label)
            accuracy = cal_accuracy(cls_prob, label)
            landmark_loss = landmark_ohem(landmark_pred, landmark_target,
                                          label)
            L2_loss = tf.add_n(slim.losses.get_regularization_losses())
            return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy
        else:
            return cls_prob, bbox_pred, landmark_pred
Exemplo n.º 60
0
    def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None):
        """A normalizer that ensures that observations are approximately distributed according to
        a standard Normal distribution (i.e. have mean zero and variance one).

        Args:
            size (int): the size of the observation to be normalized
            eps (float): a small constant that avoids underflows
            default_clip_range (float): normalized observations are clipped to be in
                [-default_clip_range, default_clip_range]
            sess (object): the TensorFlow session to be used
        """
        self.size = size
        self.eps = eps
        self.default_clip_range = default_clip_range
        self.sess = sess if sess is not None else tf.get_default_session()
        self.local_sum = np.zeros(self.size, np.float32)
        self.local_sumsq = np.zeros(self.size, np.float32)
        self.local_count = np.zeros(1, np.float32)

        self.sum_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                      shape=self.local_sum.shape,
                                      name='sum',
                                      trainable=False,
                                      dtype=tf.float32)
        self.sumsq_tf = tf.get_variable(initializer=tf.zeros_initializer(),
                                        shape=self.local_sumsq.shape,
                                        name='sumsq',
                                        trainable=False,
                                        dtype=tf.float32)
        self.count_tf = tf.get_variable(initializer=tf.ones_initializer(),
                                        shape=self.local_count.shape,
                                        name='count',
                                        trainable=False,
                                        dtype=tf.float32)
        self.mean = tf.get_variable(initializer=tf.zeros_initializer(),
                                    shape=(self.size, ),
                                    name='mean',
                                    trainable=False,
                                    dtype=tf.float32)
        self.std = tf.get_variable(initializer=tf.ones_initializer(),
                                   shape=(self.size, ),
                                   name='std',
                                   trainable=False,
                                   dtype=tf.float32)
        self.count_pl = tf.placeholder(name='count_pl',
                                       shape=(1, ),
                                       dtype=tf.float32)
        self.sum_pl = tf.placeholder(name='sum_pl',
                                     shape=(self.size, ),
                                     dtype=tf.float32)
        self.sumsq_pl = tf.placeholder(name='sumsq_pl',
                                       shape=(self.size, ),
                                       dtype=tf.float32)

        self.update_op = tf.group(self.count_tf.assign_add(self.count_pl),
                                  self.sum_tf.assign_add(self.sum_pl),
                                  self.sumsq_tf.assign_add(self.sumsq_pl))
        self.recompute_op = tf.group(
            tf.assign(self.mean, self.sum_tf / self.count_tf),
            tf.assign(
                self.std,
                tf.sqrt(
                    tf.maximum(
                        tf.square(self.eps), self.sumsq_tf / self.count_tf -
                        tf.square(self.sum_tf / self.count_tf)))),
        )
        self.lock = threading.Lock()