Exemplo n.º 1
0
    def _get_DQN_prediction(self, image):
        """ image: [0,255]"""
        #image = image / 255.0
        with argscope(Conv2D, nl=PReLU.f, use_bias=True):
            l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)

            l = FullyConnected('fc0',
                               l,
                               512,
                               nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
            # the original arch
            #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
            #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
            #.Conv2D('conv2', out_channel=64, kernel_shape=3)

        if not DUELING:
            Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity)
        else:
            V = FullyConnected('fctV', l, 1, nl=tf.identity)
            As = FullyConnected('fctA', l, NUM_ACTIONS, nl=tf.identity)
            Q = tf.add(As, V - tf.reduce_mean(As, 1, keep_dims=True))
        return tf.identity(Q, name='Qvalue')
Exemplo n.º 2
0
    def _get_DQN_prediction(self, image):
        """ image: [0,255]"""
        image = image / 255.0
        with argscope(Conv2D, nl=PReLU.f, use_bias=True):
            return (
                LinearWrap(image).Conv2D(
                    'conv0', out_channel=32,
                    kernel_shape=5).MaxPooling('pool0', 2).Conv2D(
                        'conv1', out_channel=32, kernel_shape=5).MaxPooling(
                            'pool1', 2).Conv2D('conv2',
                                               out_channel=64,
                                               kernel_shape=4).MaxPooling(
                                                   'pool2',
                                                   2).Conv2D('conv3',
                                                             out_channel=64,
                                                             kernel_shape=3)

                # the original arch
                #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
                #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
                #.Conv2D('conv2', out_channel=64, kernel_shape=3)
                .FullyConnected('fc0',
                                512,
                                nl=lambda x, name: LeakyReLU.f(x, 0.01, name)
                                ).FullyConnected('fct',
                                                 NUM_ACTIONS,
                                                 nl=tf.identity)())
Exemplo n.º 3
0
 def _get_DQN_prediction(self, image):
     #TODO: Do we need to add other pre-processing? e.g., subtract mean
     image = image / 255.0
     #TODO: The network structure can be improved?
     with argscope(Conv2D, nl=tf.nn.relu,
                   use_bias=True):  # Activation for each layer
         l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
         l = MaxPooling('pool0', l, 2)
         l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
         l = MaxPooling('pool1', l, 2)
         l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
         l = MaxPooling('pool2', l, 2)
         l = Conv2D('conv2', l, out_channel=64, kernel_shape=3)
         # the original arch
         # .Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
         # .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
         # .Conv2D('conv2', out_channel=64, kernel_shape=3)
         l = FullyConnected('fc0',
                            l,
                            512,
                            nl=lambda x, name: LeakyReLU.f(x, 0.01, name))
         l = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity())