Exemplo n.º 1
0
    def _fc_layers(self):
        '''
        All FC layers of VGG16 (+custom layers)
        '''
        # fc1
        self.fc1, weights, biases = layers.fc(name='fc1',
                                              input=self.pool5,
                                              units=4096,
                                              activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc2
        self.fc2, weights, biases = layers.fc(name='fc2',
                                              input=self.fc1,
                                              units=4096,
                                              activation=self.FC_ACTIVATION)
        self.parameters += [weights, biases]

        # fc3
        self.fc3, weights, biases = layers.fc(name='fc3',
                                              input=self.fc2,
                                              units=FLAGS.num_classes,
                                              activation='linear')

        # Softmax
        self.predictions = tf.nn.softmax(self.fc3)
Exemplo n.º 2
0
  def build(self):
    """Create the network graph."""
    # 1st Layer: Conv (w ReLu) -> Lrn -> Pool
    conv1 = conv(self.x, 11, 11, 96, 4, 4, padding='VALID', name='conv1')
    norm1 = lrn(conv1, 2, 1e-05, 0.75, name='norm1')
    pool1 = max_pool(norm1, 3, 3, 2, 2, padding='VALID', name='pool1')

    # 2nd Layer: Conv (w ReLu)  -> Lrn -> Pool with 2 groups
    conv2 = conv(pool1, 5, 5, 256, 1, 1, groups=2, name='conv2')
    norm2 = lrn(conv2, 2, 1e-05, 0.75, name='norm2')
    pool2 = max_pool(norm2, 3, 3, 2, 2, padding='VALID', name='pool2')

    # 3rd Layer: Conv (w ReLu)
    conv3 = conv(pool2, 3, 3, 384, 1, 1, name='conv3')

    # 4th Layer: Conv (w ReLu) splitted into two groups
    conv4 = conv(conv3, 3, 3, 384, 1, 1, groups=2, name='conv4')

    # 5th Layer: Conv (w ReLu) -> Pool splitted into two groups
    conv5 = conv(conv4, 3, 3, 256, 1, 1, groups=2, name='conv5')
    pool5 = max_pool(conv5, 3, 3, 2, 2, padding='VALID', name='pool5')

    # 6th Layer: Flatten -> FC (w ReLu) -> Dropout
    flattened = tf.reshape(pool5, [-1, 6 * 6 * 256])
    fc6 = fc(flattened, 6 * 6 * 256, 4096, name='fc6')

    # 7th Layer: FC (w ReLu) -> Dropout
    fc7 = fc(fc6, 4096, 4096, name='fc7')

    # 8th Layer: FC and return unscaled activations
    self.fc8 = fc(fc7, 4096, self.num_classes, relu=False, name='fc8')
Exemplo n.º 3
0
    def _build_q_head(self, input_state):
        self.w_value, self.b_value, self.value = layers.fc('fc_value',
                                                           input_state,
                                                           1,
                                                           activation='linear')
        self.w_L, self.b_L, self.L_full = layers.fc('L_full',
                                                    input_state,
                                                    self.num_actions,
                                                    activation='linear')
        self.w_mu, self.b_mu, self.mu = layers.fc('mu',
                                                  input_state,
                                                  self.num_actions,
                                                  activation='linear')

        #elements above the main diagonal in L_full are unused
        D = tf.matrix_band_part(tf.exp(self.L_full) - L_full, 0, 0)
        L = tf.matrix_band_part(L_full, -1, 0) + D

        LT_u_minus_mu = tf.einsum('ikj,ik', L,
                                  self.selected_action_ph - self.mu)
        self.advantage = tf.einsum('ijk,ikj->i', LT_u_minus_mu, LT_u_minus_mu)

        q_selected_action = self.value + self.advantage
        diff = tf.subtract(self.target_ph, q_selected_action)
        return self._huber_loss(diff)
Exemplo n.º 4
0
def fc_network(x, pretrained=False, weights=None, biases=None, activation='swish', scope='fc_network', bn_phaze=False,
               keep_prob=0.5):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        else:
            act_func = tf.nn.sigmoid

        g_fc_layer1 = layers.fc(x, g_fc_layer1_dim, use_bias=False, scope='g_fc_layer1')
        g_fc_layer1 = layers.batch_norm(g_fc_layer1, bn_phaze, scope='g_fc_layer1_bn')
        g_fc_layer1 = act_func(g_fc_layer1)
        g_fc_layer1 = tf.nn.dropout(g_fc_layer1, keep_prob=keep_prob)

        g_fc_layer2 = layers.fc(g_fc_layer1, g_fc_layer2_dim, use_bias=False, scope='g_fc_layer2')
        g_fc_layer2 = layers.batch_norm(g_fc_layer2, bn_phaze, scope='g_fc_layer2_bn')
        g_fc_layer2 = act_func(g_fc_layer2)
        g_fc_layer2 = tf.nn.dropout(g_fc_layer2, keep_prob=keep_prob)

        g_fc_layer3 = layers.fc(g_fc_layer2, g_fc_layer3_dim, use_bias=False, scope='g_fc_layer3')
        g_fc_layer3 = layers.batch_norm(g_fc_layer3, bn_phaze, scope='g_fc_layer3_bn')
        g_fc_layer3 = act_func(g_fc_layer3)
        g_fc_layer3 = tf.nn.dropout(g_fc_layer3, keep_prob=keep_prob)

        return g_fc_layer3
Exemplo n.º 5
0
    def __compute_qkv(queries, keys, values, num_heads):
        """
        Add linear projection to queries, keys, and values.

        Args:
            queries(Tensor): a 3-D input Tensor.
            keys(Tensor): a 3-D input Tensor.
            values(Tensor): a 3-D input Tensor.
            num_heads(int): The number of heads. Linearly project the inputs
                            ONLY when num_heads > 1.

        Returns:
            Tensor: linearly projected output Tensors: queries', keys' and
                    values'. They have the same shapes with queries, keys and
                    values.
        """

        if num_heads == 1:
            return queries, keys, values

        q = layers.fc(input=queries,
                      size=queries.shape[-1],
                      num_flatten_dims=2)
        k = layers.fc(input=keys, size=keys.shape[-1], num_flatten_dims=2)
        v = layers.fc(input=values, size=values.shape[-1], num_flatten_dims=2)
        return q, k, v
Exemplo n.º 6
0
 def _build_network(self):
     # target_xxx means tensors in target Q net, for example: tf.assign(target_w, w)
     # xxx_target means training value, for example: loss = q_target - q_current
     weights = {}
     target_weights = {}
     with tf.name_scope('input'):
         state = tf.placeholder(dtype=tf.float32, shape=[None, self.STATE_SPACE])
     with tf.name_scope('Q_Net'):
         with tf.name_scope('hidden'):
             y1, weights['W1'], weights['b1'] = layers.fc(state, n_neurons=self.HIDDEN_NEURONS,
                                                          activation=tf.nn.tanh)
         with tf.name_scope('q_value'):
             q_values, weights['W2'], weights['b2'] = layers.fc(y1, n_neurons=self.ACTION_SPACE)
     with tf.name_scope('Q_Target'):
         with tf.name_scope('hidden'):
             target_y1, target_weights['W1'], target_weights['b1'] = layers.fc(state, n_neurons=self.HIDDEN_NEURONS,
                                                                               activation=tf.nn.tanh)
         with tf.name_scope('q_value'):
             target_q_values, target_weights['W2'], target_weights['b2'] = layers.fc(target_y1,
                                                                                     n_neurons=self.ACTION_SPACE)
         with tf.name_scope('update'):
             update_ops = []
             for name in weights:
                 update_ops.append(tf.assign(target_weights[name], weights[name]))
     # loss
     with tf.name_scope('loss'):
         action = tf.placeholder(tf.int32, [None])
         action_mask = tf.one_hot(action, depth=self.ACTION_SPACE, on_value=1.0, off_value=0.0, dtype=tf.float32)
         q_current = tf.reduce_sum(tf.multiply(q_values, action_mask), axis=1)
         q_target = tf.placeholder(tf.float32, [None])
         loss = tf.reduce_mean(tf.squared_difference(q_current, q_target))
         tf.summary.scalar('loss', loss)
     # train
     with tf.name_scope('train'):
         global_step = tf.Variable(0, trainable=False, name='global_step')
         train_step = tf.train.AdamOptimizer().minimize(loss, global_step=global_step)
     # tensor board
     merged = tf.summary.merge_all()
     train_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/train', self._sess.graph)
     test_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/test')
     #
     self._sess.run(tf.global_variables_initializer())
     #
     return {'state': state,
             'q_values': q_values,
             'action': action,
             'q_current': q_current,
             'q_target': q_target,
             'loss': loss,
             'train_step': train_step,
             'global_step': global_step,
             'merged': merged,
             'train_writer': train_writer,
             'test_writer': test_writer}, {'state': state,
                                           'q_values': target_q_values,
                                           'update_ops': update_ops}
Exemplo n.º 7
0
    def __init__(self, ob_shape, ac_shape, reuse=False, **kwargs):
        self.sess = tf.get_default_session()
        nbatch, nenvs = kwargs.values()

        obs_ph = tf.placeholder(tf.uint8, [nbatch,*ob_shape], 'obs_ph')

        with tf.variable_scope('model', reuse=reuse):

            if obs_ph.dtype != tf.float32:
                x = tf.cast(obs_ph, tf.float32) / 255.

            with tf.variable_scope('cnn'):
                h = layers.conv2d_block(x)

            with tf.variable_scope('actor'):
                logits = layers.fc(h, ac_shape[-1], 'logits', activate=False, gain=0.01)

            with tf.variable_scope('critic'):
                vf = layers.fc(h, 1, 'vf', activate=False, gain=1.0)[:,0]

        def sample():
            u = tf.random_uniform(tf.shape(logits))
            return tf.argmax(logits - tf.log(-tf.log(u)), axis=-1)

        def entropy():
            a0 = logits - tf.reduce_max(logits, axis=-1, keepdims=True)
            ea0 = tf.exp(a0)
            z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
            p0 = ea0 / z0
            return tf.reduce_sum(p0 * (tf.log(z0) - a0), axis=-1)

        def kl(other):
            a0 = logits - tf.reduce_max(logits, axis=-1, keep_dims=True)
            a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keep_dims=True)
            ea0 = tf.exp(a0)
            ea1 = tf.exp(a1)
            z0 = tf.reduce_sum(ea0, axis=-1, keepdims=True)
            z1 = tf.reduce_sum(ea1, axis=-1, keepdims=True)
            p0 = ea0 / z0
            return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)

        def neglogp(x):
            one_hot_actions = tf.one_hot(x, logits.get_shape().as_list()[-1])
            return tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits,
                labels=one_hot_actions)

        self.a0 = sample()
        self.neglogp0 = neglogp(self.a0)
        self.vf0 = vf
        self.entropy = entropy
        self.kl = kl
        self.neglogp = neglogp
        self.obs_ph = obs_ph
        self.initial_state = None
Exemplo n.º 8
0
def vgg16(inputs, num_classes, keep_prob, is_training):
    """vgg16 network

    """
    # x = tf.reshape(inputs, shape=[-1, 28, 28, 3])
    x = tf.nn.lrn(inputs, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='inputs')

    # first conv block
    conv1_1 = conv2d(x, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv1_1')
    conv1_2 = conv2d(conv1_1, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv1_2')
    pool1 = max_pooling(conv1_2, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool1')

    # second conv block
    conv2_1 = conv2d(pool1, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv2_1')
    conv2_2 = conv2d(conv2_1, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv2_2')
    pool2 = max_pooling(conv2_2, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool2')

    # 3th conv block
    conv3_1 = conv2d(pool2, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_1')
    conv3_2 = conv2d(conv3_1, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_2')
    conv3_3 = conv2d(conv3_2, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv3_3')
    pool3 = max_pooling(conv3_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool3')

    # 4th conv block
    conv4_1 = conv2d(pool3, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_1')
    conv4_2 = conv2d(conv4_1, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_2')
    conv4_3 = conv2d(conv4_2, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv4_3')
    pool4 = max_pooling(conv4_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool4')

    # 5th conv block
    conv5_1 = conv2d(pool4, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_1')
    conv5_2 = conv2d(conv5_1, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_2')
    conv5_3 = conv2d(conv5_2, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', is_training=is_training, name='conv5_3')
    pool5 = max_pooling(conv5_3, ksize=[2, 2], strides=[2, 2], padding='SAME', name='pool5')

    # fully connected block
    # flatten outputs of the previous layer as a one dimension vector
    # flatten_shape = tf.shape(pool5)[1] * tf.shape(pool5)[2] * tf.shape(pool5)[3]

    flatten_shape = pool5.get_shape()[1].value * pool5.get_shape()[2].value * pool5.get_shape()[3].value
    fc1 = tf.reshape(pool5, shape=[-1, flatten_shape])
    fc1 = fc(fc1, shape=[flatten_shape, 4096], name='fc1')
    fc1 = dropout(fc1, keep_prob=0.5, name='dropout1')

    fc2 = fc(fc1, shape=[4096, 4096], name='fc2')
    fc2 = dropout(fc2, keep_prob=0.5, name='dropout2')

    fc3 = fc(fc2, shape=[4096, num_classes], name='fc3')
    fc3 = dropout(fc3, keep_prob=0.5, name='dropout3')

    # output logits value
    logits = tf.nn.softmax(fc3, name="softmax")

    return logits
Exemplo n.º 9
0
 def __build_net(self):
     """
     Introduction
     ------------
         构建ONet模型结构
     """
     with tf.variable_scope('onet'):
         self.input = tf.placeholder(shape=[None, 48, 48, 3],
                                     dtype=tf.float32,
                                     name='input_data')
         layer = conv('conv1',
                      self.input,
                      kernel_size=(3, 3),
                      channels_output=32,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu1', layer)
         layer = max_pool('pool1', layer, kernel_size=(3, 3), stride=(2, 2))
         layer = conv('conv2',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=64,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu2', layer)
         layer = max_pool('pool2',
                          layer,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding='VALID')
         layer = conv('conv3',
                      layer,
                      kernel_size=(3, 3),
                      channels_output=64,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu3', layer)
         layer = max_pool('pool3', layer, kernel_size=(2, 2), stride=(2, 2))
         layer = conv('conv4',
                      layer,
                      kernel_size=(2, 2),
                      channels_output=128,
                      stride=(1, 1),
                      padding='VALID',
                      relu=False)
         layer = prelu('prelu4', layer)
         layer = fc('fc1', layer, channels_output=256, relu=False)
         layer = prelu('prelu5', layer)
         fc2 = fc('fc2-1', layer, channels_output=2, relu=False)
         self.prob = tf.nn.softmax(fc2, axis=1, name='prob')
         self.loc = fc('fc2-2', layer, channels_output=4, relu=False)
Exemplo n.º 10
0
    def build_graph(self):
        self.iterator = tf.data.Iterator.from_structure(
            (tf.float32, tf.int32),
            (tf.TensorShape([None, 224, 224, 3]), tf.TensorShape([None]))
        )
        self.inputs, self.labels = self.iterator.get_next()

        sp, st = [3, 3], [1, 1]
        mp = [2, 2]

        self.conv1_1 = layers.conv(self.inputs, sp, 64, st, name='conv1_1')
        self.conv1_2 = layers.conv(self.conv1_1, sp, 64, st, name='conv1_2')

        pool1 = layers.max_pool(self.conv1_2, mp, mp, name='pool1')

        self.conv2_1 = layers.conv(pool1, sp, 128, st, name='conv2_1')
        self.conv2_2 = layers.conv(self.conv2_1, sp, 128, st, name='conv2_2')

        pool2 = layers.max_pool(self.conv2_2, mp, mp, name='pool2')

        self.conv3_1 = layers.conv(pool2, sp, 256, st, name='conv3_1')
        self.conv3_2 = layers.conv(self.conv3_1, sp, 256, st, name='conv3_2')
        self.conv3_3 = layers.conv(self.conv3_2, sp, 256, st, name='conv3_3')
        self.conv3_4 = layers.conv(self.conv3_3, sp, 256, st, name='conv3_4')

        pool3 = layers.max_pool(self.conv3_3, mp, mp, name='pool3')

        self.conv4_1 = layers.conv(pool3, sp, 512, st, name='conv4_1')
        self.conv4_2 = layers.conv(self.conv4_1, sp, 512, st, name='conv4_2')
        self.conv4_3 = layers.conv(self.conv4_2, sp, 512, st, name='conv4_3')
        self.conv4_4 = layers.conv(self.conv4_3, sp, 512, st, name='conv4_4')

        pool4 = layers.max_pool(self.conv4_3, mp, mp, name='pool4')

        self.conv5_1 = layers.conv(pool4, sp, 512, st, name='conv5_1')
        self.conv5_2 = layers.conv(self.conv5_1, sp, 512, st, name='conv5_2')
        self.conv5_3 = layers.conv(self.conv5_2, sp, 512, st, name='conv5_3')
        self.conv5_4 = layers.conv(self.conv5_3, sp, 512, st, name='conv5_4')

        pool5 = layers.max_pool(self.conv5_3, mp, mp, name='pool5')
        flattened = tf.reshape(pool5, [-1, 25088])

        fc6 = layers.fc(flattened, 4096, name='fc6')
        fc7 = layers.fc(fc6, 4096, name='fc7')

        self.logits = layers.fc(fc7, self.num_classes, relu=False,
                                name='fc8')
        self.probs_op = tf.nn.softmax(self.logits)
        self.pred_op = tf.argmax(input=self.logits, axis=1)
        corrects_op = tf.equal(tf.cast(self.pred_op, tf.int32),
                               self.labels)
        self.acc_op = tf.reduce_mean(tf.cast(corrects_op, tf.float32))
Exemplo n.º 11
0
    def _build_encoder(self, vd):
        with tf.variable_scope(self.name):
            if self.arch == 'FC':
                layer_i = layers.flatten(self.input_ph)
                for i, layer_size in enumerate(self.fc_layer_sizes):
                    layer_i = layers.fc('fc{}'.format(i+1), layer_i, layer_size, activation=self.activation)[-1]
                self.ox = layer_i
            elif self.arch == 'ATARI-TRPO':
                self.w1, self.b1, self.o1 = layers.conv2d('conv1', self.input_ph, 16, 4, self.input_channels, 2, activation=self.activation)
                self.w2, self.b2, self.o2 = layers.conv2d('conv2', self.o1, 16, 4, 16, 2, activation=self.activation)
                self.w3, self.b3, self.o3 = layers.fc('fc3', layers.flatten(self.o2), 20, activation=self.activation)
                self.ox = self.o3
            elif self.arch == 'NIPS':
                self.w1, self.b1, self.o1 = layers.conv2d('conv1', vd, self.input_ph, 16, 8, self.input_channels, 4, activation=self.activation)
                self.w2, self.b2, self.o2 = layers.conv2d('conv2', vd, self.o1, 32, 4, 16, 2, activation=self.activation)
                self.w3, self.b3, self.o3 = layers.fc('fc3', vd, layers.flatten(self.o2), 256, activation=self.activation)
                self.ox = self.o3
            elif self.arch == 'NATURE':
                self.w1, self.b1, self.o1 = layers.conv2d('conv1', self.input_ph, 32, 8, self.input_channels, 4, activation=self.activation)
                self.w2, self.b2, self.o2 = layers.conv2d('conv2', self.o1, 64, 4, 32, 2, activation=self.activation)
                self.w3, self.b3, self.o3 = layers.conv2d('conv3', self.o2, 64, 3, 64, 1, activation=self.activation)
                self.w4, self.b4, self.o4 = layers.fc('fc4', layers.flatten(self.o3), 512, activation=self.activation)
                self.ox = self.o4
            else:
                raise Exception('Invalid architecture `{}`'.format(self.arch))

            if self.use_recurrent:
                with tf.variable_scope('lstm_layer') as vs:
                    self.lstm_cell = tf.contrib.rnn.BasicLSTMCell(
                        self.hidden_state_size, state_is_tuple=True, forget_bias=1.0)
                    
                    batch_size = tf.shape(self.step_size)[0]
                    self.ox_reshaped = tf.reshape(self.ox,
                        [batch_size, -1, self.ox.get_shape().as_list()[-1]])
                    state_tuple = tf.contrib.rnn.LSTMStateTuple(
                        *tf.split(self.initial_lstm_state, 2, 1))

                    self.lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(
                        self.lstm_cell,
                        self.ox_reshaped,
                        initial_state=state_tuple,
                        sequence_length=self.step_size,
                        time_major=False)

                    self.lstm_state = tf.concat(self.lstm_state, 1)
                    self.ox = tf.reshape(self.lstm_outputs, [-1,self.hidden_state_size], name='reshaped_lstm_outputs')

                    # Get all LSTM trainable params
                    self.lstm_trainable_variables = [v for v in 
                        tf.trainable_variables() if v.name.startswith(vs.name)]

            return self.ox
Exemplo n.º 12
0
 def net(self, X, reuse=None):
     with tf.variable_scope('EyeNet', reuse=reuse):
         conv1 = conv2d(X,output_dims=20,k_h=5,k_w=5,s_h=1,s_w=1,padding='VALID',name='conv1')   
         pool1 = max_pool(conv1,k_h=2,k_w=2,s_h=2,s_w=2,padding='SAME',name='pool1')
         conv2 = conv2d(pool1,output_dims=50,k_h=5,k_w=5,s_h=1,s_w=1,padding='VALID',name='conv2')              
         pool2 = max_pool(conv2,k_h=2,k_w=2,s_h=2,s_w=2,padding='SAME',name='pool2') 
         flatten = tf.reshape(pool2,[-1, pool2.get_shape().as_list()[1]
                                         *pool2.get_shape().as_list()[2]
                                         *pool2.get_shape().as_list()[3]], name='conv_reshape')
         fc1 = fc(flatten, output_dims=500, name='fc1')
         relu1 = relu(fc1, name='relu1')
         out = fc(relu1, output_dims=2, name='output')
         return out
Exemplo n.º 13
0
    def _create_model(self):
        inputs = self.x

        for idx, n_hidden in enumerate(self._n_hidden):
            outputs = layers.fc(inputs,
                                n_hidden,
                                nl=tf.tanh,
                                name='hidden_{}'.format(idx))
            inputs = outputs

        n_out_dim = self._n_mix * (1 + self._y_dim + self._y_dim)
        outputs = layers.fc(inputs, n_out_dim, name='out_layer')

        self.p, self.mu, self.sigma = self.get_mix_model_params(outputs)
Exemplo n.º 14
0
 def _build_network(self):
     with tf.name_scope('input'):
         state = tf.placeholder(dtype=tf.float32,
                                shape=[None, self.STATE_SPACE])
     with tf.name_scope('hidden'):
         y1, _, _ = layers.fc(state,
                              n_neurons=self.HIDDEN_NEURONS,
                              activation=tf.nn.tanh)
     with tf.name_scope('q_value'):
         q_values, _, _ = layers.fc(y1, n_neurons=self.ACTION_SPACE)
     # loss
     with tf.name_scope('loss'):
         action = tf.placeholder(tf.int32, [None])
         action_mask = tf.one_hot(action,
                                  depth=self.ACTION_SPACE,
                                  on_value=1.0,
                                  off_value=0.0,
                                  dtype=tf.float32)
         q_current = tf.reduce_sum(tf.multiply(q_values, action_mask),
                                   axis=1)
         q_target = tf.placeholder(tf.float32, [None])
         loss = tf.reduce_mean(tf.squared_difference(q_current, q_target))
         tf.summary.scalar('loss', loss)
     # train
     with tf.name_scope('train'):
         global_step = tf.Variable(0, trainable=False, name='global_step')
         train_step = tf.train.AdamOptimizer().minimize(
             loss, global_step=global_step)
     # tensor board
     merged = tf.summary.merge_all()
     train_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/train',
                                          self._sess.graph)
     test_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/test')
     #
     self._sess.run(tf.global_variables_initializer())
     #
     return {
         'state': state,
         'q_values': q_values,
         'action': action,
         'q_current': q_current,
         'q_target': q_target,
         'loss': loss,
         'train_step': train_step,
         'global_step': global_step,
         'merged': merged,
         'train_writer': train_writer,
         'test_writer': test_writer
     }
Exemplo n.º 15
0
def latent_discriminator(input_data, activation='swish', scope='ldiscriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = tf.reshape(input_data, shape=[-1, 4, 4, 8])

        l = layers.conv(l, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
    def test_W_gradient(this):

        # Test the gradients with respect to W
        W = np.random.randint(low=-5, high=5, size=(4, 5))
        b = np.random.randint(low=-5, high=5, size=(4, 1))
        x = np.random.randint(low=-5, high=5, size=(5, 1))
        layer = layers.fc(InputSize=5, NumHidden=4, W=W, b=b)
        for i in range(4):
            for j in range(5):
                dydWij_act = layer.W_gradient(x, i, j)
                dydWij_exp = np.zeros((4, 1))
                dydWij_exp[i] = x[j]
                nptest.assert_array_equal(
                    dydWij_act,
                    dydWij_exp,
                    err_msg=
                    "fc layer W_gradient incorrect for \n W= \n {} \n and \n b= \n {}"
                    .format(W, b))

        # Test return as tensor - dy(k)/dW(i,j) = x(j) if i==k, otherwise 0.
        dydW_act = layer.W_gradient(x)
        dydW_exp = np.zeros((4, 4, 5))
        for i in range(4):
            for j in range(5):
                dydW_exp[i, i, j] = x[j]
        nptest.assert_array_equal(
            dydW_act,
            dydW_exp,
            err_msg=
            "fc layer W_gradient tensor return incorrect for \n W= \n {} \n and \n b = \n {}"
            .format(W, b))
    def test_x_gradient(this):

        # Test the gradients with respect to x
        W = np.random.randint(low=-5, high=5, size=(4, 5))
        b = np.random.randint(low=-5, high=5, size=(4, 1))
        x = np.random.randint(low=-5, high=5, size=(5, 1))
        layer = layers.fc(InputSize=5, NumHidden=4, W=W, b=b)
        for i in range(4):
            dydxi_act = layer.x_gradient(x, i)
            dydxi_exp = W[:, i]
            nptest.assert_array_equal(
                dydxi_act,
                dydxi_exp,
                err_msg=
                "fc layer x_gradient incorrect for \n W= \n {} \n and \n b= \n {}"
                .format(W, b))

        # Test return as tensor - dy/dx = W
        dydx_act = layer.x_gradient(x)
        dydx_exp = W
        nptest.assert_array_equal(
            dydx_act,
            dydx_exp,
            err_msg=
            "fc layer x_gradient tensor return incorrect for \n W = \n {} and \n b = \n {}"
            .format(W, b))
    def test_b_gradient(this):

        # Test the gradients with respect to b
        W = np.random.randint(low=-5, high=5, size=(4, 5))
        b = np.random.randint(low=-5, high=5, size=(4, 1))
        x = np.random.randint(low=-5, high=5, size=(5, 1))
        layer = layers.fc(InputSize=5, NumHidden=4, W=W, b=b)
        for i in range(4):
            dydb_act = layer.b_gradient(x, i)
            dydb_exp = np.zeros((4, 1))
            dydb_exp[i] = 1
            nptest.assert_array_equal(
                dydb_act,
                dydb_exp,
                err_msg=
                "fc layer b_gradient incorrect for \n W= \n {} \n and \n b= \n {}"
                .format(W, b))

        # Test return as tensor - dy/db = Identity matrix in hidden dimension
        dydb_act = layer.b_gradient(x)
        dydb_exp = np.eye(4)
        nptest.assert_array_equal(
            dydb_act,
            dydb_exp,
            err_msg=
            "fc layer b_gradient tensor return incorrect for \n W = \n {} \n and \n b = \n {}"
            .format(W, b))
Exemplo n.º 19
0
    def __call__(self, input):
        with tf.variable_scope(self.name, reuse=self.reuse):
            input = ly.fc(input, 7 * 7 * 128, name='fc_0')
            input = ly.bn_layer(input, name='bn_0')
            input = tf.nn.leaky_relu(input)

            input = tf.reshape(input, (-1, 7, 7, 128))

            input = ly.deconv2d(input,
                                output_channel=64,
                                output_size=14,
                                strides=2,
                                name='deconv_0')
            input = ly.bn_layer(input, name='bn_1')
            input = tf.nn.leaky_relu(input)

            input = ly.deconv2d(input,
                                output_channel=1,
                                output_size=28,
                                strides=2,
                                name='deconv_1')
            input = ly.bn_layer(input, name='bn_2')
            input = tf.nn.sigmoid(input)

        return input  ## (-1,28,28,1)
Exemplo n.º 20
0
    def _decoder(self, z):

        with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):

            z = fc(z, 3 * 3 * self._nd_latent, 'fc-z')
            z = tf.reshape(z, (-1, 3, 3, self._nd_latent))

            h1 = conv2d_transpose(z,
                                  256,
                                  name='up1',
                                  padding='VALID',
                                  training=self.is_training,
                                  use_bn=True)

            h2 = conv2d_transpose(h1,
                                  128,
                                  name='up2',
                                  training=self.is_training,
                                  use_bn=True)

            h3 = conv2d_transpose(h2, 64, name='up3')

            recon = conv2d(h3, self._img_shape[-1], kernel_size=1, name='out')

            return tf.nn.sigmoid(recon)
Exemplo n.º 21
0
    def _build_q_head(self, input_state):
        self.w_value, self.b_value, self.value = layers.fc('fc_value', input_state, 1, activation='linear')
        self.w_adv, self.b_adv, self.advantage = layers.fc('fc_advantage', input_state, self.num_actions, activation='linear')

        self.output_layer = (
            self.value + self.advantage
            - tf.reduce_mean(
                self.advantage,
                axis=1,
                keep_dims=True
            )
        )

        q_selected_action = tf.reduce_sum(self.output_layer * self.selected_action_ph, axis=1)
        diff = tf.subtract(self.target_ph, q_selected_action)
        return self._huber_loss(diff)
Exemplo n.º 22
0
    def _build_value_head(self):
        self.critic_target_ph = tf.placeholder('float32', [None],
                                               name='target')
        self.wv, self.bv, self.output_layer_v = layers.fc('fc_value4',
                                                          self.ox,
                                                          1,
                                                          activation='linear')

        # Advantage critic
        self.adv_critic = tf.subtract(self.critic_target_ph,
                                      tf.reshape(self.output_layer_v, [-1]))
        # Critic loss
        if self.clip_loss_delta > 0:
            quadratic_part = tf.reduce_mean(
                tf.pow(
                    tf.minimum(tf.abs(self.adv_critic), self.clip_loss_delta),
                    2))
            linear_part = tf.subtract(tf.abs(self.adv_critic), quadratic_part)
            #OBS! For the standard L2 loss, we should multiply by 0.5. However, the authors of the paper
            # recommend multiplying the gradients of the V function by 0.5. Thus the 0.5
            self.critic_loss = tf.multiply(tf.constant(0.5), tf.nn.l2_loss(quadratic_part) + \
                self.clip_loss_delta * linear_part)
        else:
            self.critic_loss = 0.5 * tf.reduce_mean(tf.pow(self.adv_critic, 2))

        return self.critic_loss
Exemplo n.º 23
0
    def _discriminator(self, img, scope, reuse=False):

        with tf.variable_scope(scope, reuse=reuse):
            x = img
            h1 = conv2d(x,
                        128,
                        strides=2,
                        name='down1',
                        training=self.is_training,
                        use_bn=True)
            h2 = conv2d(h1,
                        256,
                        strides=2,
                        name='down2',
                        training=self.is_training,
                        use_bn=True)
            h3 = conv2d(h2,
                        512,
                        strides=2,
                        name='down3',
                        training=self.is_training,
                        use_bn=True)

            h4_flat = tf.layers.flatten(h3)

            logits = fc(h4_flat, 1, name='out', activation_fn=lambda x: x)

            return logits, tf.nn.sigmoid(logits)
Exemplo n.º 24
0
    def _similarity(self, sim1, sim2, scope, reuse=False):

        with tf.variable_scope(scope, reuse=reuse):
            l1_dist = tf.abs(sim1 - sim2)
            logits = fc(l1_dist, 1, name='logits', activation_fn=lambda x: x)

        return logits, tf.nn.sigmoid(logits)
Exemplo n.º 25
0
def inference(input, batch_size, num_segments, lstm_keep_prob=0.5, conv_keep_prob=1.0, train_conv123=False, train_conv45=False, train_fc67=False):
    # input size is [num_segments, batch_size, 224, 224, num_length*3/2]
    fc6_per_step = []
    with tf.variable_scope("conv"):
        for time_step in range(num_segments):
            if time_step > 0: tf.get_variable_scope().reuse_variables()
            fc8 = vgg16.inference(input[time_step, :, :, :, :], conv_keep_prob, train_conv123, train_conv45, train_fc67, False)
            fc7 = tf.get_default_graph().get_tensor_by_name("conv/fc7/fc7:0")
            fc6 = tf.get_default_graph().get_tensor_by_name("conv/fc6/fc6:0")
            # output is [batch_size*num_segments, 4096]
            fc6_per_step.append(fc6)

    with tf.variable_scope("lstm"):
        hidden_size = 512
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
        lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=lstm_keep_prob, output_keep_prob=lstm_keep_prob)
        cell = lstm_cell
        _initial_state = cell.zero_state(batch_size, tf.float32)

        outputs = []
        state = _initial_state
        for time_step in range(num_segments):
            if time_step > 0: tf.get_variable_scope().reuse_variables()
            (cell_output, state) = cell(fc6_per_step[time_step], state)
            outputs.append(cell_output)
        final_state = state
        lstm_params = [var for var in tf.all_variables() if var.name.startswith("lstm")]
        for var in lstm_params:
            tf.add_to_collection("params", var)
    logits = layers.fc(tf.concat(0, outputs, 'concat'), 101, relu=False, name='cls')

    return logits
Exemplo n.º 26
0
def rep_cnn(unscaled_images, SPIN_N, scope, **kwargs):
    """
    CNN with spin_N x k kernels
    """

    logging.debug("rep_cnn called")

    with tf.variable_scope(scope):
        scaled_images = tf.cast(unscaled_images, tf.float32)
        activ = tf.nn.leaky_relu
        h = scaled_images
        if len(h.shape) < 4:
            h = tf.expand_dims(h, axis=-1)
        hA = activ(
            conv_pseudo_1d(h,
                           f'conv_over_spins',
                           nf=64,
                           rf=(1, h.shape[2]),
                           stride=(1, h.shape[2]),
                           pad='VALID',
                           **kwargs))
        hB = activ(
            conv_pseudo_1d(h,
                           f'conv_over_reads',
                           nf=64,
                           rf=(h.shape[1], 1),
                           stride=(h.shape[1], 1),
                           pad='VALID',
                           **kwargs))

        h = tf.concat((tf.layers.flatten(hA), tf.layers.flatten(hB)), axis=1)

        h3 = conv_to_fc(h)
        return activ(fc(h3, 'fc1', nh=64))
Exemplo n.º 27
0
    def build_graph(self):
        self.iterator = tf.data.Iterator.from_structure(
            (tf.float32, tf.int32),
            (tf.TensorShape([None, 227, 227, 3]), tf.TensorShape([None]))
        )
        self.inputs, self.labels = self.iterator.get_next()

        self.conv1 = layers.conv(self.inputs, [11, 11], 96, [4, 4],
                                 padding='VALID', name='conv1', mask=True)
        norm1 = layers.lrn(self.conv1, 2, 1e-05, 0.75, name='norm1')
        pool1 = layers.max_pool(norm1, [3, 3], [2, 2], padding='VALID',
                                name='pool1')

        self.conv2 = layers.conv(pool1, [5, 5], 256, [1, 1], groups=2,
                                 name='conv2', mask=True)
        norm2 = layers.lrn(self.conv2, 2, 1e-05, 0.75, name='norm2')
        pool2 = layers.max_pool(norm2, [3, 3], [2, 2], padding='VALID',
                                name='pool2')

        self.conv3 = layers.conv(pool2, [3, 3], 384, [1, 1], name='conv3',
                                 mask=True)

        self.conv4 = layers.conv(self.conv3, [3, 3], 384, [1, 1], groups=2,
                                 name='conv4', mask=True)

        self.conv5 = layers.conv(self.conv4, [3, 3], 256, [1, 1], groups=2,
                                 name='conv5', mask=True)
        pool5 = layers.max_pool(self.conv5, [3, 3], [2, 2], padding='VALID',
                                name='pool5')

        self.keep_prob = tf.get_variable('keep_prob', shape=(),
                                         trainable=False)

        flattened = tf.reshape(pool5, [-1, 6 * 6 * 256])
        fc6 = layers.fc(flattened, 4096, name='fc6')
        dropout6 = layers.dropout(fc6, self.keep_prob)

        fc7 = layers.fc(dropout6, 4096, name='fc7')
        dropout7 = layers.dropout(fc7, self.keep_prob)

        self.logits = layers.fc(dropout7, self.num_classes, relu=False,
                                name='fc8')
        self.probs_op = tf.nn.softmax(self.logits)
        self.pred_op = tf.argmax(input=self.logits, axis=1)
        corrects_op = tf.equal(tf.cast(self.pred_op, tf.int32),
                               self.labels)
        self.acc_op = tf.reduce_mean(tf.cast(corrects_op, tf.float32))
Exemplo n.º 28
0
def discriminator(input_data, activation='swish', scope='discriminator', reuse=False, bn_phaze=False):
    with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
        #if reuse:
        #    tf.get_variable_scope().reuse_variables()

        if activation == 'swish':
            act_func = util.swish
        elif activation == 'relu':
            act_func = tf.nn.relu
        elif activation == 'tanh':
            act_func = tf.nn.tanh
        else:
            act_func = tf.nn.sigmoid

        l = layers.conv(input_data, scope='conv1', filter_dims=[3, 3, g_dense_block_depth/2], stride_dims=[1, 1],
                        non_linear_fn=None, bias=False)

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_0')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_1')

        l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
                                     act_func=act_func, bn_phaze=bn_phaze, scope='block_2')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_3')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_4')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth/2], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_5')

        #l = tf.nn.avg_pool(l, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_6')

        #l = add_residual_dense_block(l, filter_dims=[3, 3, g_dense_block_depth], num_layers=3,
        #                             act_func=act_func, bn_phaze=bn_phaze, scope='block_7')

        # dc_final_layer = batch_norm_conv(last_dense_layer, b_train=bn_phaze, scope='last_dense_layer')

        l = layers.global_avg_pool(l, representation_dim)
        dc_final_layer = l

        dc_output = layers.fc(dc_final_layer, scope='g_enc_z_fc', out_dim=1, non_linear_fn=None)

    return dc_final_layer, dc_output, tf.sigmoid(dc_output)
Exemplo n.º 29
0
def logit_small(x,
                num_classes,
                is_training=True,
                update_batch_stats=True,
                stochastic=True,
                seed=1234):

    if is_training:
        scope = tf.name_scope("Training")

    else:
        scope = tf.name_scope("Testing")

    with scope:
        h = x

        rng = np.random.RandomState(seed)

        h = L.fc(h,
                 dim_in=x.shape[1],
                 dim_out=64,
                 seed=rng.randint(123456),
                 name="fc1")
        h = L.lrelu(
            L.bn(h,
                 64,
                 is_training=is_training,
                 update_batch_stats=update_batch_stats,
                 name='fc1_normalized'), FLAGS.lrelu_a)
        h = L.fc(h,
                 dim_in=64,
                 dim_out=64,
                 seed=rng.randint(123456),
                 name="fc2")
        h = L.lrelu(
            L.bn(h,
                 64,
                 is_training=is_training,
                 update_batch_stats=update_batch_stats,
                 name='fc2_normalized'), FLAGS.lrelu_a)
        h = L.fc(h,
                 dim_in=64,
                 dim_out=num_classes,
                 seed=rng.randint(123456),
                 name="fc3")
        return h
Exemplo n.º 30
0
 def test_fc_shape(self):
     with self.test_session() as sess:
         # TODO Come up with better/useful testcase
         x = tf.zeros((50,10), dtype=tf.float32)
         expected_fc_out = tf.zeros((50,4),dtype=tf.float32, name='expectedout')
         actual_fc_out, weights, biases = layers.fc(x, 10, 4, name='fc', relu=False)
         sess.run(tf.initializers.variables([weights, biases]))
         self.assertAllEqual(tf.shape(actual_fc_out), tf.shape(expected_fc_out))
Exemplo n.º 31
0
    def _build_q_head(self, vd, input_state):
        self.w_out, self.b_out, self.output_layer = layers.fc(
            'fc_out', vd, input_state, self.num_actions, activation="linear")
        self.q_selected_action = tf.reduce_sum(self.output_layer *
                                               self.selected_action_ph,
                                               axis=1)

        diff = tf.subtract(self.target_ph, self.q_selected_action)
        return self._value_function_loss(diff)
 def __init__(self):
     self.lr = 0.01
     # conv net
     self.c1 = conv(1, 6, kernel=5, learning_rate=self.lr)
     self.relu1 = relu()
     self.s2 = max_pool(kernel=2, stride=2)
     self.c3 = conv(6, 16, kernel=5, learning_rate=self.lr)
     self.relu3 = relu()
     self.s4 = max_pool(kernel=2, stride=2)
     self.c5 = conv(16, 120, kernel=4, learning_rate=self.lr)
     self.relu5 = relu()
     # fc net
     self.f6 = fc(120, 84, learning_rate=self.lr)
     self.relu6 = relu()
     self.f7 = fc(84, 10)
     self.sig7 = softmax()
     # record the shape between the conv net and fc net
     self.conv_out_shape = None
Exemplo n.º 33
0
 def _build_network(self):
     with tf.name_scope('input'):
         state = tf.placeholder(dtype=tf.float32, shape=[None, self.STATE_SPACE])
     with tf.name_scope('hidden'):
         y1, _, _ = layers.fc(state, n_neurons=self.HIDDEN_NEURONS, activation=tf.nn.tanh)
     with tf.name_scope('q_value'):
         q_values, _, _ = layers.fc(y1, n_neurons=self.ACTION_SPACE)
     # loss
     with tf.name_scope('loss'):
         action = tf.placeholder(tf.int32, [None])
         action_mask = tf.one_hot(action, depth=self.ACTION_SPACE, on_value=1.0, off_value=0.0, dtype=tf.float32)
         q_current = tf.reduce_sum(tf.multiply(q_values, action_mask), axis=1)
         q_target = tf.placeholder(tf.float32, [None])
         loss = tf.reduce_mean(tf.squared_difference(q_current, q_target))
         tf.summary.scalar('loss', loss)
     # train
     with tf.name_scope('train'):
         global_step = tf.Variable(0, trainable=False, name='global_step')
         train_step = tf.train.AdamOptimizer().minimize(loss, global_step=global_step)
     # tensor board
     merged = tf.summary.merge_all()
     train_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/train', self._sess.graph)
     test_writer = tf.summary.FileWriter('/tmp/tensorflow-drl/dqn/test')
     #
     self._sess.run(tf.global_variables_initializer())
     #
     return {'state': state,
             'q_values': q_values,
             'action': action,
             'q_current': q_current,
             'q_target': q_target,
             'loss': loss,
             'train_step': train_step,
             'global_step': global_step,
             'merged': merged,
             'train_writer': train_writer,
             'test_writer': test_writer}