Exemplo n.º 1
0
    def batch_norm_layer(x, batch_norm_decay, train_phase, scope_bn):
        bn_train = batch_normalization(inputs=x,
                                       momentum=batch_norm_decay,
                                       center=True,
                                       scale=True,
                                       training=True,
                                       reuse=None,
                                       trainable=True,
                                       name=scope_bn)
        # bn_train = batch_norm(x, decay=batch_norm_decay, center=True, scale=True, updates_collections=None,
        #                       is_training=True, reuse=None, trainable=True, scope=scope_bn)

        # bn_inference = batch_norm(x, decay=batch_norm_decay, center=True, scale=True, updates_collections=None,
        #                           is_training=False, reuse=True, trainable=True, scope=scope_bn)

        bn_inference = batch_normalization(inputs=x,
                                           momentum=batch_norm_decay,
                                           center=True,
                                           scale=True,
                                           training=False,
                                           reuse=True,
                                           trainable=True,
                                           name=scope_bn)

        z = tf.compat.v1.cond(train_phase, lambda: bn_train,
                              lambda: bn_inference)
        return z
Exemplo n.º 2
0
def fully_connected(inputs,
                    num_outputs,
                    is_training=True,
                    is_bn=False,
                    activation=None):
    """
    Fully connected layer with non-linear operation
    :param inputs: (B, N)
    :param num_outputs: int
    :param is_training: bool
    :param is_bn: bool
    :param activation: activation function, such as tf.nn.relu
    :return: outputs: (B, num_outputs)
    """

    num_input_units = inputs.get_shape()[-1].value
    weights = tf.Variable(
        tf.truncated_normal([num_input_units, num_outputs],
                            dtype=tf.float32,
                            stddev=0.1))
    outputs = tf.matmul(inputs, weights)
    biases = tf.Variable(
        tf.constant(0.1, shape=[num_outputs], dtype=tf.float32))
    outputs = tf.nn.bias_add(outputs, biases)
    if is_bn:
        outputs = batch_normalization(outputs, training=is_training)
    if activation is not None:
        outputs = activation(outputs)
    return outputs
Exemplo n.º 3
0
def conv_2d(inputs,
            kernel_size,
            output_channel,
            stride,
            name,
            is_reuse=False,
            padding='SAME',
            data_format='NHWC',
            is_bn=False,
            is_training=True,
            activation=None):
    """
    2D Conv Layer
    :param name: scope name
    :param inputs: (B, H, W, C)
    :param kernel_size: [kernel_h, kernel_w]
    :param output_channel: feature num
    :param stride: a list of 2 ints
    :param padding: type of padding, str
    :param data_format: str, the format of input data
    :param is_bn: bool, is batch normalization
    :param is_training: bool, is training
    :param activation: activation function, such as tf.nn.relu
    :return: outputs
    """
    with tf.variable_scope(name) as scope:
        if is_reuse:
            scope.reuse_variables()
        kernel_h, kernel_w = kernel_size
        stride_h, stride_w = stride
        if data_format == 'NHWC':
            kernel_shape = [
                kernel_h, kernel_w,
                inputs.get_shape()[-1].value, output_channel
            ]
        else:
            kernel_shape = [
                kernel_h, kernel_w,
                inputs.get_shape()[1].value, output_channel
            ]
        init = tf.keras.initializers.he_normal()
        kernel = tf.get_variable(name='conv_kenel',
                                 shape=kernel_shape,
                                 initializer=init,
                                 dtype=tf.float32)
        # kernel = tf.Variable(tf.truncated_normal(kernel_shape, dtype=tf.float32, stddev=0.1))
        outputs = tf.nn.conv2d(input=inputs,
                               filter=kernel,
                               strides=[1, stride_h, stride_w, 1],
                               padding=padding,
                               data_format=data_format)
        biases = tf.Variable(
            tf.constant(0.1, shape=[output_channel], dtype=tf.float32))
        outputs = outputs + biases
        if is_bn:
            outputs = batch_normalization(outputs, training=is_training)
        if activation is not None:
            outputs = activation(outputs)
        return outputs
Exemplo n.º 4
0
def DBL(input, filters, kernel_size, strides=1):
    padding = 'same'
    if strides > 1:
        padding = 'valid'
        input = pad(input, kernel_size)
    input = layers.conv2d(input,
                          filters,
                          kernel_size,
                          strides=strides,
                          padding=padding,
                          use_bias=None)
    input = layers.batch_normalization(input,
                                       momentum=DECAY_BATCH_NORM,
                                       epsilon=EPSILON)
    input = tf.nn.leaky_relu(input, alpha=LEAKY_RELU)
    return input
Exemplo n.º 5
0
    def __init__(self, state_size, action_size, learning_rate, name='DQLearner'):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate

        with v1.variable_scope(name):
            # We create the placeholders
            # *state_size means that we take each elements of state_size in tuple hence is like if we wrote
            # [None, 84, 84, 4]
            self.inputs_ = v1.placeholder(tf.float32, [None, *state_size], name="inputs")
            self.actions_ = v1.placeholder(tf.float32, [None, 3], name="actions_")

            # Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
            self.target_Q = v1.placeholder(tf.float32, [None], name="target")

            """
            First convnet:
            CNN
            BatchNormalization
            ELU
            """
            # Input is 84x84x4
            self.conv1 = v1l.conv2d(inputs=self.inputs_,
                                          filters=32,
                                          kernel_size=[8, 8],
                                          strides=[4, 4],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv1")

            self.conv1_batchnorm = v1l.batch_normalization(self.conv1,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm1')

            self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out")
            ## --> [20, 20, 32]

            """
            Second convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv2 = v1l.conv2d(inputs=self.conv1_out,
                                          filters=64,
                                          kernel_size=[4, 4],
                                          strides=[2, 2],
                                          padding="VALID",
                                          kernel_initializer=v1.initializers.glorot_uniform(),
                                          name="conv2")

            self.conv2_batchnorm = v1l.batch_normalization(self.conv2,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm2')

            self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out")
            ## --> [9, 9, 64]

            """
            Third convnet:
            CNN
            BatchNormalization
            ELU
            """
            self.conv3 = v1l.conv2d(inputs=self.conv2_out,
                                  filters=128,
                                  kernel_size=[4, 4],
                                  strides=[2, 2],
                                  padding="VALID",
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  name="conv3")

            self.conv3_batchnorm = v1l.batch_normalization(self.conv3,
                                                         training=True,
                                                         epsilon=1e-5,
                                                         name='batch_norm3')

            self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out")
            ## --> [3, 3, 128]

            self.flatten = v1l.flatten(self.conv3_out)
            ## --> [1152]

            self.fc = v1l.dense(inputs=self.flatten,
                              units=512,
                              activation=tf.nn.elu,
                              kernel_initializer=v1.initializers.glorot_uniform(),
                              name="fc1")

            self.output = v1l.dense(inputs=self.fc,
                                  kernel_initializer=v1.initializers.glorot_uniform(),
                                  units=3,
                                  activation=None)

            # Q is our predicted Q value.
            self.Q = tf.math.reduce_sum(tf.math.multiply(self.output, self.actions_), axis=1)

            # The loss is the difference between our predicted Q_values and the Q_target
            # Sum(Qtarget - Q)^2
            self.loss = tf.math.reduce_mean(tf.math.square(self.target_Q - self.Q))

            self.optimizer = v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)