def _get_DQN_prediction(self, image): """ image: [0,255]""" #image = image / 255.0 with argscope(Conv2D, nl=PReLU.f, use_bias=True): l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name)) # the original arch #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) #.Conv2D('conv2', out_channel=64, kernel_shape=3) if not DUELING: Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity) else: V = FullyConnected('fctV', l, 1, nl=tf.identity) As = FullyConnected('fctA', l, NUM_ACTIONS, nl=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keep_dims=True)) return tf.identity(Q, name='Qvalue')
def _get_NN_prediction(self, image): self._create_unnary_variables_with_summary( image[:, 0, :, 0], (10, 10, 6, 6, 6), ("rewards", "levels", "lives0", "lives1", "lives2")) image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5) lc0 = MaxPooling('pool0', lc0, 2) lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5) lc1 = MaxPooling('pool1', lc1, 2) lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4) lc2 = MaxPooling('pool2', lc2, 2) lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3) lfc0 = FullyConnected('fc0', lc3, 512, nl=tf.identity) lfc0 = PReLU('prelu', lfc0) policy = FullyConnected('fc-pi', lfc0, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', lfc0, 1, nl=tf.identity) # if DEBUGING_INFO: # summary.add_activation_summary(lc0, "conv_0") # summary.add_activation_summary(lc1, "conv_1") # summary.add_activation_summary(lc2, "conv_2") # summary.add_activation_summary(lc3, "conv_3") # summary.add_activation_summary(lfc0, "fc0") # summary.add_activation_summary(policy, "policy") # summary.add_activation_summary(value, "fc-v") return policy, value
def _get_NN_prediction(self, image): image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): if NETWORK_ARCH == '1': l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) # conv3 output: [None, 10, 10, 64] elif NETWORK_ARCH == 'nature': l = Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) l = Conv2D('conv1', l, out_channel=64, kernel_shape=4, stride=2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=3) # conv2 output: [None, 11, 11, 64] conv2 = tf.identity(l, name='convolutional-2') l = FullyConnected('fc0', l, 512, nl=tf.identity) l = PReLU('prelu', l) fc = tf.identity(l, name='fully-connected') policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): l = tf.reshape(image, [-1, 24]) # This calculates the position of ball when hitting the plane of the agent xNew = image[:, 0, 1, 3] yNew = image[:, 0, 0, 3] xOld = image[:, 0, 1, 2] yOld = image[:, 0, 0, 2] yPredicted = yNew + (yNew - yOld) * (0.125 - xNew) / (xNew - xOld + 0.005) yPredictedTruncated = tf.maximum(tf.minimum(yPredicted, 1), -1) yPredictedTruncated = tf.expand_dims(yPredictedTruncated, 1) summary.add_activation_summary(yPredictedTruncated, "yPredicted") l = tf.concat(1, [l, yPredictedTruncated]) for i in xrange(0, self.number_of_layers): l = FullyConnected('fc{}'.format(i), l, self.number_of_neurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) # summary.add_activation_summary(l, "fc {} relu output".format(i)) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): self._create_unnary_variables_with_summary( image[:, 0, :, 0], (10, 10, 6, 6, 6), ("rewards", "levels", "lives0", "lives1", "lives2")) NUMBER_OF_REWARD_EVENTS = 10 rewards_events = [] for x in xrange(NUMBER_OF_REWARD_EVENTS): rewards_events.append(tf.reshape(image[:, 0, x, 0], (-1, 1))) image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): lc0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5) lc0 = MaxPooling('pool0', lc0, 2) lc1 = Conv2D('conv1', lc0, out_channel=32, kernel_shape=5) lc1 = MaxPooling('pool1', lc1, 2) lc2 = Conv2D('conv2', lc1, out_channel=64, kernel_shape=4) lc2 = MaxPooling('pool2', lc2, 2) lc3 = Conv2D('conv3', lc2, out_channel=64, kernel_shape=3) policies = [] values = [] for x in xrange(10): lfc0 = FullyConnected('fc0{}'.format(x), lc3, 512, nl=tf.identity) lfc0 = PReLU('prelu{}'.format(x), lfc0) policy = FullyConnected('fc-pi{}'.format(x), lfc0, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v{}'.format(x), lfc0, 1, nl=tf.identity) policies.append(policy) values.append(value) weighted_policies = [] weighted_values = [] for weight, policy, value in zip(rewards_events, policies, values): weighted_policies.append(tf.multiply(weight, policy)) weighted_values.append(tf.multiply(weight, value)) policy = tf.add_n(weighted_policies) value = tf.add_n(weighted_values) # if DEBUGING_INFO: # summary.add_activation_summary(lc0, "conv_0") # summary.add_activation_summary(lc1, "conv_1") # summary.add_activation_summary(lc2, "conv_2") # summary.add_activation_summary(lc3, "conv_3") # summary.add_activation_summary(lfc0, "fc0") # summary.add_activation_summary(policy, "policy") # summary.add_activation_summary(value, "fc-v") return policy, value
def _get_NN_prediction(self, image): l = image for i in xrange(0, self.numberOfLayers): l = FullyConnected('fc{}'.format(i), l, self.numberOfNeurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): l = image for i in xrange(0, self.number_of_layers): l = FullyConnected('fc{}'.format(i), l, self.number_of_neurons, nl=tf.identity) l = PReLU('prelu{}'.format(i), l) # summary.add_activation_summary(l, "fc {} relu output".format(i)) policy = FullyConnected('fc-pi', l, out_dim=self.number_of_actions, nl=tf.identity) value = FullyConnected('fc-v', l, 1, nl=tf.identity) return policy, value
def _get_NN_prediction(self, image): image = tf.cast(image, tf.float32) / 255.0 with argscope(Conv2D, activation=tf.nn.relu): l = Conv2D('conv0', image, 32, 5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, 32, 5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, 64, 4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, 64, 3) l = FullyConnected('fc0', l, 512) l = PReLU('prelu', l) logits = FullyConnected('fc-pi', l, self.num_actions) # unnormalized policy value = FullyConnected('fc-v', l, 1) return logits, value
def _get_NN_prediction(self, image): image = image / 255.0 with argscope(Conv2D, nl=tf.nn.relu): l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=tf.identity) l = PReLU('prelu', l) policy = FullyConnected('fc-pi', l, out_dim=NUM_ACTIONS, nl=tf.identity) return policy
def _get_DQN_prediction(self, image): #TODO: Do we need to add other pre-processing? e.g., subtract mean image = image / 255.0 #TODO: The network structure can be improved? with argscope(Conv2D, nl=tf.nn.relu, use_bias=True): # Activation for each layer l = Conv2D('conv0', image, out_channel=32, kernel_shape=5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, out_channel=32, kernel_shape=5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv2', l, out_channel=64, kernel_shape=3) # the original arch # .Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) # .Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) # .Conv2D('conv2', out_channel=64, kernel_shape=3) l = FullyConnected('fc0', l, 512, nl=lambda x, name: LeakyReLU.f(x, 0.01, name)) l = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity())
def add_column(self, previous_column_layers, column_num, trainable=True): print "Creating column:{}".format(column_num) column_prefix = "-column-" # column_num = "" # print "Adding column:{}".format(column_num) new_column = [] # We append this as this is input new_column.append(previous_column_layers[0]) for i in xrange(1, self.number_of_layers[self.stage] + 1): input_neurons = new_column[-1] l = FullyConnected('fc-{}{}{}'.format(i, column_prefix, column_num), input_neurons, self.number_of_neurons[self.stage], nl=tf.identity, trainable=trainable) l = PReLU('prelu-{}{}{}'.format(i, column_prefix, column_num), l) if len(previous_column_layers) > i: new_layer = tf.concat(1, [previous_column_layers[i], l]) else: new_layer = l new_column.append(new_layer) last_hidden_layer = new_column[-1] policy = FullyConnected('fc-pi{}{}'.format(column_prefix, column_num), last_hidden_layer, out_dim=self.number_of_actions, nl=tf.identity, trainable=trainable) value = FullyConnected('fc-v{}{}'.format(column_prefix, column_num), last_hidden_layer, 1, nl=tf.identity, trainable=trainable) visible_layer = tf.concat(1, [policy, value]) new_column.append(visible_layer) return new_column, policy, value
def build_graph(self, pc, pc_feature): pc_symmetry = tf.stack([-pc[..., 0], pc[..., 1], pc[..., 2]], -1) # -x dist2sym = tf.reduce_sum((pc[:, :, None] - pc_symmetry[:, None])**2, -1) nearest_idx = tf.argmin(dist2sym, -1, output_type=tf.int32) # smoothnet encoder, only local features are used embedding = SmoothNet(pc_feature, self.cfg) with tf.variable_scope('encoder'): z = tf.sigmoid(embedding[:, :, -1], name='z') output_x = tf.nn.l2_normalize(embedding[:, :, :-1], axis=-1, name='feature') gp_loss = 0. loss_d = 0. loss_g = 0. if get_current_tower_context().is_training: beta_dist = tf.distributions.Beta( concentration1=self.cfg.beta.concentration1, concentration0=self.cfg.beta.concentration0) with tf.variable_scope('GAN'): real_z = beta_dist.sample(tf.shape(z)) fake_val = self.discriminator(tf.stop_gradient(z)) real_val = self.discriminator(real_z) loss_d = tf.reduce_mean(fake_val - real_val, name='loss_d') with varreplace.freeze_variables(stop_gradient=True): loss_g = tf.reduce_mean(-self.discriminator(z), name='loss_g') z_interp = z + tf.random_uniform( (tf.shape(fake_val)[0], 1)) * (real_z - z) gradient_f = tf.gradients(self.discriminator(z_interp), [z_interp])[0] gp_loss = tf.reduce_mean(tf.maximum( tf.norm(gradient_f, axis=-1) - 1, 0)**2, name='gp_loss') code = tf.concat([ tf.reduce_max(tf.nn.relu(output_x) * z[..., None], 1), tf.reduce_max(tf.nn.relu(-output_x) * z[..., None], 1) ], axis=-1, name='code') code = FullyConnected('fc_global', code, self.cfg.topnet.code_nfts, activation=None) # topnet decoder tarch = get_arch(self.cfg.topnet.nlevels, self.cfg.num_points) def create_level(level, input_channels, output_channels, inputs, bn): with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE): features = mlp_conv(inputs, [ input_channels, int(input_channels / 2), int(input_channels / 4), int(input_channels / 8), output_channels * int(tarch[level]) ], get_current_tower_context().is_training, bn) features = tf.reshape( features, [tf.shape(features)[0], -1, output_channels]) return features Nin = self.cfg.topnet.nfeat + self.cfg.topnet.code_nfts Nout = self.cfg.topnet.nfeat bn = True N0 = int(tarch[0]) nlevels = len(tarch) with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): level0 = mlp(code, [256, 64, self.cfg.topnet.nfeat * N0], get_current_tower_context().is_training, bn=True) level0 = tf.tanh(level0, name='tanh_0') level0 = tf.reshape(level0, [-1, N0, self.cfg.topnet.nfeat]) outs = [ level0, ] for i in range(1, nlevels): if i == nlevels - 1: Nout = 3 bn = False inp = outs[-1] y = tf.expand_dims(code, 1) y = tf.tile(y, [1, tf.shape(inp)[1], 1]) y = tf.concat([inp, y], 2) outs.append( tf.tanh(create_level(i, Nin, Nout, y, bn), name='tanh_%d' % (i))) reconstruction = tf.reshape(outs[-1], [-1, self.cfg.num_points, 3], name='recon_pc') loss_recon = chamfer(reconstruction, pc) loss_recon = tf.identity(self.cfg.recon_factor * tf.reduce_mean(loss_recon), name='recon_loss') batch_size = tf.shape(output_x)[0] batch_idx = tf.tile( tf.range(batch_size)[:, None], [1, tf.shape(nearest_idx)[1]]) feature_sym = tf.gather_nd(embedding, tf.stack([batch_idx, nearest_idx], -1)) loss_sym = tf.identity( self.cfg.symmetry_factor * tf.reduce_mean(tf.reduce_sum(tf.abs(feature_sym - embedding), -1)), 'symmetry_loss') wd_cost = tf.multiply(1e-4, regularize_cost('.*(_W|kernel)', tf.nn.l2_loss), name='regularize_loss') loss_gan = loss_d + loss_g + gp_loss total_cost = tf.add_n([loss_recon, wd_cost, loss_gan, loss_sym], name='total_cost') summary.add_moving_summary(loss_recon, loss_sym) summary.add_param_summary(['.*(_W|kernel)', ['histogram', 'rms']]) return total_cost