示例#1
0
    def _get_NN_prediction(self, image):
        self._create_unnary_variables_with_summary(
            image[:, 0, :, 0], (10, 10, 6, 6, 6),
            ("rewards", "levels", "lives0", "lives1", "lives2"))
        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            lc0 = MaxPooling('pool0', lc0, 2)
            lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5)
            lc1 = MaxPooling('pool1', lc1, 2)
            lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4)
            lc2 = MaxPooling('pool2', lc2, 2)
            lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3)

        lfc0 = FullyConnected('fc0', lc3, 512, nl=tf.identity)
        lfc0 = PReLU('prelu', lfc0)
        policy = FullyConnected('fc-pi',
                                lfc0,
                                out_dim=self.number_of_actions,
                                nl=tf.identity)
        value = FullyConnected('fc-v', lfc0, 1, nl=tf.identity)

        # if DEBUGING_INFO:
        #     summary.add_activation_summary(lc0, "conv_0")
        #     summary.add_activation_summary(lc1, "conv_1")
        #     summary.add_activation_summary(lc2, "conv_2")
        #     summary.add_activation_summary(lc3, "conv_3")
        #     summary.add_activation_summary(lfc0, "fc0")
        #     summary.add_activation_summary(policy, "policy")
        #     summary.add_activation_summary(value, "fc-v")

        return policy, value
示例#2
0
 def _get_NN_prediction(self, image):
     image = image / 255.0
     with argscope(Conv2D, nl=tf.nn.relu):
         if NETWORK_ARCH == '1':
             l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
             l = MaxPooling('pool0', l, 2)
             l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
             l = MaxPooling('pool1', l, 2)
             l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
             l = MaxPooling('pool2', l, 2)
             l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)
         # conv3 output: [None, 10, 10, 64]
         elif NETWORK_ARCH == 'nature':
             l = Conv2D('conv0',
                        image,
                        out_channel=32,
                        kernel_shape=8,
                        stride=4)
             l = Conv2D('conv1',
                        l,
                        out_channel=64,
                        kernel_shape=4,
                        stride=2)
             l = Conv2D('conv2', l, out_channel=64, kernel_shape=3)
         # conv2 output: [None, 11, 11, 64]
     conv2 = tf.identity(l, name='convolutional-2')
     l = FullyConnected('fc0', l, 512, nl=tf.identity)
     l = PReLU('prelu', l)
     fc = tf.identity(l, name='fully-connected')
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=NUM_ACTIONS,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
示例#3
0
    def _get_NN_prediction(self, image):
        l = tf.reshape(image, [-1, 24])
        # This calculates the position of ball when hitting the plane of the agent
        xNew = image[:, 0, 1, 3]
        yNew = image[:, 0, 0, 3]
        xOld = image[:, 0, 1, 2]
        yOld = image[:, 0, 0, 2]
        yPredicted = yNew + (yNew - yOld) * (0.125 - xNew) / (xNew - xOld +
                                                              0.005)
        yPredictedTruncated = tf.maximum(tf.minimum(yPredicted, 1), -1)
        yPredictedTruncated = tf.expand_dims(yPredictedTruncated, 1)
        summary.add_activation_summary(yPredictedTruncated, "yPredicted")

        l = tf.concat(1, [l, yPredictedTruncated])

        for i in xrange(0, self.number_of_layers):
            l = FullyConnected('fc{}'.format(i),
                               l,
                               self.number_of_neurons,
                               nl=tf.identity)
            l = PReLU('prelu{}'.format(i), l)
            # summary.add_activation_summary(l, "fc {} relu output".format(i))
        policy = FullyConnected('fc-pi',
                                l,
                                out_dim=self.number_of_actions,
                                nl=tf.identity)
        value = FullyConnected('fc-v', l, 1, nl=tf.identity)
        return policy, value
示例#4
0
    def _get_NN_prediction(self, image):
        self._create_unnary_variables_with_summary(
            image[:, 0, :, 0], (10, 10, 6, 6, 6),
            ("rewards", "levels", "lives0", "lives1", "lives2"))
        NUMBER_OF_REWARD_EVENTS = 10

        rewards_events = []
        for x in xrange(NUMBER_OF_REWARD_EVENTS):
            rewards_events.append(tf.reshape(image[:, 0, x, 0], (-1, 1)))

        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            lc0 = MaxPooling('pool0', lc0, 2)
            lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5)
            lc1 = MaxPooling('pool1', lc1, 2)
            lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4)
            lc2 = MaxPooling('pool2', lc2, 2)
            lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3)

        policies = []
        values = []
        for x in xrange(10):
            lfc0 = FullyConnected('fc0{}'.format(x), lc3, 512, nl=tf.identity)
            lfc0 = PReLU('prelu{}'.format(x), lfc0)
            policy = FullyConnected('fc-pi{}'.format(x),
                                    lfc0,
                                    out_dim=self.number_of_actions,
                                    nl=tf.identity)
            value = FullyConnected('fc-v{}'.format(x), lfc0, 1, nl=tf.identity)

            policies.append(policy)
            values.append(value)

        weighted_policies = []
        weighted_values = []

        for weight, policy, value in zip(rewards_events, policies, values):
            weighted_policies.append(tf.multiply(weight, policy))
            weighted_values.append(tf.multiply(weight, value))

        policy = tf.add_n(weighted_policies)
        value = tf.add_n(weighted_values)
        # if DEBUGING_INFO:
        #     summary.add_activation_summary(lc0, "conv_0")
        #     summary.add_activation_summary(lc1, "conv_1")
        #     summary.add_activation_summary(lc2, "conv_2")
        #     summary.add_activation_summary(lc3, "conv_3")
        #     summary.add_activation_summary(lfc0, "fc0")
        #     summary.add_activation_summary(policy, "policy")
        #     summary.add_activation_summary(value, "fc-v")

        return policy, value
示例#5
0
 def _get_NN_prediction(self, image):
     l = image
     for i in xrange(0, self.numberOfLayers):
         l = FullyConnected('fc{}'.format(i),
                            l,
                            self.numberOfNeurons,
                            nl=tf.identity)
         l = PReLU('prelu{}'.format(i), l)
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=self.number_of_actions,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
示例#6
0
 def _get_NN_prediction(self, image):
     l = image
     for i in xrange(0, self.number_of_layers):
         l = FullyConnected('fc{}'.format(i),
                            l,
                            self.number_of_neurons,
                            nl=tf.identity)
         l = PReLU('prelu{}'.format(i), l)
         # summary.add_activation_summary(l, "fc {} relu output".format(i))
     policy = FullyConnected('fc-pi',
                             l,
                             out_dim=self.number_of_actions,
                             nl=tf.identity)
     value = FullyConnected('fc-v', l, 1, nl=tf.identity)
     return policy, value
示例#7
0
    def _get_NN_prediction(self, image):
        image = tf.cast(image, tf.float32) / 255.0
        with argscope(Conv2D, activation=tf.nn.relu):
            l = Conv2D('conv0', image, 32, 5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, 32, 5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, 64, 4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, 64, 3)

        l = FullyConnected('fc0', l, 512)
        l = PReLU('prelu', l)
        logits = FullyConnected('fc-pi', l,
                                self.num_actions)  # unnormalized policy
        value = FullyConnected('fc-v', l, 1)
        return logits, value
示例#8
0
    def _get_NN_prediction(self, image):
        image = image / 255.0
        with argscope(Conv2D, nl=tf.nn.relu):
            l = Conv2D('conv0', image, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool0', l, 2)
            l = Conv2D('conv1', l, out_channel=32, kernel_shape=5)
            l = MaxPooling('pool1', l, 2)
            l = Conv2D('conv2', l, out_channel=64, kernel_shape=4)
            l = MaxPooling('pool2', l, 2)
            l = Conv2D('conv3', l, out_channel=64, kernel_shape=3)

        l = FullyConnected('fc0', l, 512, nl=tf.identity)
        l = PReLU('prelu', l)
        policy = FullyConnected('fc-pi',
                                l,
                                out_dim=self.number_of_actions,
                                nl=tf.identity)
        value = FullyConnected('fc-v', l, 1, nl=tf.identity)

        return policy, value
示例#9
0
    def add_column(self, previous_column_layers, column_num, trainable=True):
        print "Creating column:{}".format(column_num)
        column_prefix = "-column-"
        # column_num = ""
        # print "Adding column:{}".format(column_num)
        new_column = []
        # We append this as this is input
        new_column.append(previous_column_layers[0])
        for i in xrange(1, self.number_of_layers[self.stage] + 1):
            input_neurons = new_column[-1]
            l = FullyConnected('fc-{}{}{}'.format(i, column_prefix,
                                                  column_num),
                               input_neurons,
                               self.number_of_neurons[self.stage],
                               nl=tf.identity,
                               trainable=trainable)
            l = PReLU('prelu-{}{}{}'.format(i, column_prefix, column_num), l)

            if len(previous_column_layers) > i:
                new_layer = tf.concat(1, [previous_column_layers[i], l])
            else:
                new_layer = l
            new_column.append(new_layer)

        last_hidden_layer = new_column[-1]
        policy = FullyConnected('fc-pi{}{}'.format(column_prefix, column_num),
                                last_hidden_layer,
                                out_dim=self.number_of_actions,
                                nl=tf.identity,
                                trainable=trainable)
        value = FullyConnected('fc-v{}{}'.format(column_prefix, column_num),
                               last_hidden_layer,
                               1,
                               nl=tf.identity,
                               trainable=trainable)

        visible_layer = tf.concat(1, [policy, value])
        new_column.append(visible_layer)
        return new_column, policy, value