def _get_NN_prediction(self, image): self._create_unnary_variables_with_summary( image[:, 0, :, 0], (10, 10, 6, 6, 6), ("rewards", "levels", "lives0", "lives1", "lives2")) image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5) lc0 = MaxPooling('pool0', lc0, 2) lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5) lc1 = MaxPooling('pool1', lc1, 2) lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4) lc2 = MaxPooling('pool2', lc2, 2) lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3) lfc0 = FullyConnected('fc0', lc3, 512, nl=tf.identity) lfc0 = PReLU('prelu', lfc0) policy = FullyConnected('fc-pi', lfc0, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', lfc0, 1, nl=tf.identity) # if DEBUGING_INFO: # summary.add_activation_summary(lc0, "conv_0") # summary.add_activation_summary(lc1, "conv_1") # summary.add_activation_summary(lc2, "conv_2") # summary.add_activation_summary(lc3, "conv_3") # summary.add_activation_summary(lfc0, "fc0") # summary.add_activation_summary(policy, "policy") # summary.add_activation_summary(value, "fc-v") return policy, value
def _get_NN_prediction(self, image): image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): if NETWORK_ARCH == '1': l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) # conv3 output: [None, 10, 10, 64] elif NETWORK_ARCH == 'nature': l = Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) l = Conv2D('conv1', l, out_channel=64, kernel_shape=4, stride=2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=3) # conv2 output: [None, 11, 11, 64] conv2 = tf.identity(l, name='convolutional-2') l = FullyConnected('fc0', l, 512, nl=tf.identity) l = PReLU('prelu', l) fc = tf.identity(l, name='fully-connected') policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): l = tf.reshape(image, [-1, 24]) # This calculates the position of ball when hitting the plane of the agent xNew = image[:, 0, 1, 3] yNew = image[:, 0, 0, 3] xOld = image[:, 0, 1, 2] yOld = image[:, 0, 0, 2] yPredicted = yNew + (yNew - yOld) * (0.125 - xNew) / (xNew - xOld + 0.005) yPredictedTruncated = tf.maximum(tf.minimum(yPredicted, 1), -1) yPredictedTruncated = tf.expand_dims(yPredictedTruncated, 1) summary.add_activation_summary(yPredictedTruncated, "yPredicted") l = tf.concat(1, [l, yPredictedTruncated]) for i in xrange(0, self.number_of_layers): l = FullyConnected('fc{}'.format(i), l, self.number_of_neurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) # summary.add_activation_summary(l, "fc {} relu output".format(i)) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): self._create_unnary_variables_with_summary( image[:, 0, :, 0], (10, 10, 6, 6, 6), ("rewards", "levels", "lives0", "lives1", "lives2")) NUMBER_OF_REWARD_EVENTS = 10 rewards_events = [] for x in xrange(NUMBER_OF_REWARD_EVENTS): rewards_events.append(tf.reshape(image[:, 0, x, 0], (-1, 1))) image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5) lc0 = MaxPooling('pool0', lc0, 2) lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5) lc1 = MaxPooling('pool1', lc1, 2) lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4) lc2 = MaxPooling('pool2', lc2, 2) lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3) policies = [] values = [] for x in xrange(10): lfc0 = FullyConnected('fc0{}'.format(x), lc3, 512, nl=tf.identity) lfc0 = PReLU('prelu{}'.format(x), lfc0) policy = FullyConnected('fc-pi{}'.format(x), lfc0, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v{}'.format(x), lfc0, 1, nl=tf.identity) policies.append(policy) values.append(value) weighted_policies = [] weighted_values = [] for weight, policy, value in zip(rewards_events, policies, values): weighted_policies.append(tf.multiply(weight, policy)) weighted_values.append(tf.multiply(weight, value)) policy = tf.add_n(weighted_policies) value = tf.add_n(weighted_values) # if DEBUGING_INFO: # summary.add_activation_summary(lc0, "conv_0") # summary.add_activation_summary(lc1, "conv_1") # summary.add_activation_summary(lc2, "conv_2") # summary.add_activation_summary(lc3, "conv_3") # summary.add_activation_summary(lfc0, "fc0") # summary.add_activation_summary(policy, "policy") # summary.add_activation_summary(value, "fc-v") return policy, value
def _get_NN_prediction(self, image): l = image for i in xrange(0, self.numberOfLayers): l = FullyConnected('fc{}'.format(i), l, self.numberOfNeurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): l = image for i in xrange(0, self.number_of_layers): l = FullyConnected('fc{}'.format(i), l, self.number_of_neurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) # summary.add_activation_summary(l, "fc {} relu output".format(i)) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): image = tf.cast(image, tf.float32) / 255.0 with argscope(Conv2D, activation=tf.nn.relu): l = Conv2D('conv0', image, 32, 5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, 32, 5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, 64, 4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, 64, 3) l = FullyConnected('fc0', l, 512) l = PReLU('prelu', l) logits = FullyConnected('fc-pi', l, self.num_actions) # unnormalized policy value = FullyConnected('fc-v', l, 1) return logits, value
def _get_NN_prediction(self, image): image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=tf.identity) l = PReLU('prelu', l) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def add_column(self, previous_column_layers, column_num, trainable=True): print "Creating column:{}".format(column_num) column_prefix = "-column-" # column_num = "" # print "Adding column:{}".format(column_num) new_column = [] # We append this as this is input new_column.append(previous_column_layers[0]) for i in xrange(1, self.number_of_layers[self.stage] + 1): input_neurons = new_column[-1] l = FullyConnected('fc-{}{}{}'.format(i, column_prefix, column_num), input_neurons, self.number_of_neurons[self.stage], nl=tf.identity, trainable=trainable) l = PReLU('prelu-{}{}{}'.format(i, column_prefix, column_num), l) if len(previous_column_layers) > i: new_layer = tf.concat(1, [previous_column_layers[i], l]) else: new_layer = l new_column.append(new_layer) last_hidden_layer = new_column[-1] policy = FullyConnected('fc-pi{}{}'.format(column_prefix, column_num), last_hidden_layer, out_dim=self.number_of_actions, nl=tf.identity, trainable=trainable) value = FullyConnected('fc-v{}{}'.format(column_prefix, column_num), last_hidden_layer, 1, nl=tf.identity, trainable=trainable) visible_layer = tf.concat(1, [policy, value]) new_column.append(visible_layer) return new_column, policy, value