def Planner(self, training_input, testing_input, label_status, length, mask): with tf.variable_scope('planner'): batch_size = self.batch_size / self.gpu_num rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_status = tf.get_variable( 'w_status', [self.n_hidden, 2], initializer=tf.contrib.layers.xavier_initializer()) b_status = tf.get_variable( 'b_status', [2], initializer=tf.contrib.layers.xavier_initializer()) # training training_input_dropout = tf.nn.dropout(training_input, self.keep_prob) # b*l, h shape = training_input_dropout.get_shape().as_list() training_input_reshape = tf.reshape( training_input_dropout, [batch_size, self.max_step, shape[1]]) # b, l, h rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, training_input_reshape, sequence_length=length, dtype=tf.float32) # b, l, h rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob) rnn_output_reshape = tf.reshape(rnn_output_dropout, [-1, self.n_hidden]) # b*l, h logits = tf.reshape(tf.matmul(rnn_output_reshape, w_status), [-1, 2]) + b_status # b*l, n label_status_reshape = tf.reshape(label_status, [-1]) loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_status_reshape, logits=logits) loss_status_scalar = tf.reduce_sum(loss_status * mask) # testing prev_state = [] for l in xrange(self.n_layers): prev_state.append( LSTMStateTuple( tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.h'.format(l)))) if self.n_layers == 1: prev_state = prev_state[0] rnn_output_test, state = rnn_cell(testing_input, prev_state) # b*l, h prob = tf.reshape( tf.nn.softmax(tf.matmul(rnn_output_test, w_status) + b_status), [-1, 2]) # pred_status_test = tf.argmax(prob, axis=1) return loss_status_scalar, prob, state, prev_state
def Model(self, inputs): input_depth, input_cmd, input_prev_a, rnn_h_in = inputs # encode depth image conv1 = model_utils.conv2d(input_depth, 4, 5, 4, scope='conv1', max_pool=False) conv2 = model_utils.conv2d(conv1, 16, 5, 4, scope='conv2', max_pool=False) conv3 = model_utils.conv2d(conv2, 32, 3, 2, scope='conv3', max_pool=False) shape = conv3.get_shape().as_list() depth_vect = tf.reshape(conv3, shape=[-1, shape[1] * shape[2] * shape[3]]) # b,d # encode cmd embedding_cmd = tf.get_variable('cmd_embedding', [self.n_cmd_type, self.dim_emb]) cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd), [-1, self.dim_emb]) # encode prev action embedding_w_action = tf.get_variable('embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) prev_a_vect = tf.matmul(input_prev_a, embedding_w_action) + embedding_b_action input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1) # rnn if self.rnn_type == 'lstm': rnn_cell = model_utils._lstm_cell(self.n_hidden, 1, name='rnn_cell') else: rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell') rnn_output, rnn_h_out = rnn_cell(input_vect, self.rnn_h_in) # action a_linear = model_utils.dense_layer( rnn_output, 1, 'a_linear', activation=tf.nn.sigmoid) * self.action_range[0] a_angular = model_utils.dense_layer( rnn_output, 1, 'a_angular', activation=tf.nn.tanh) * self.action_range[1] pred_action = tf.concat([a_linear, a_angular], axis=1) return pred_action, rnn_h_out
def Model(self, inputs): laser, cmd, cmd_next, cmd_skip, prev_action, obj_goal, prev_state_2 = inputs with tf.variable_scope('encoder'): embedding_w_goal = tf.get_variable('embedding_w_goal', [self.dim_action, self.dim_emb]) embedding_b_goal = tf.get_variable('embedding_b_goal', [self.dim_emb]) embedding_status = tf.get_variable( 'embedding_status', [self.n_cmd_type**2, self.dim_emb]) embedding_w_action = tf.get_variable( 'embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) embedding_w_status = tf.get_variable('embedding_w_status', [self.dim_cmd, self.dim_emb]) embedding_b_status = tf.get_variable('embedding_b_status', [self.dim_emb]) # training input conv1 = model_utils.Conv1D(laser, 2, 5, 4, scope='conv1') conv2 = model_utils.Conv1D(conv1, 4, 5, 4, scope='conv2') conv3 = model_utils.Conv1D(conv2, 8, 5, 4, scope='conv3') shape = conv3.get_shape().as_list() vector_laser = tf.reshape(conv3, (-1, shape[1] * shape[2])) curr_status = cmd * self.n_cmd_type + cmd_next next_status = cmd_next * self.n_cmd_type + cmd_skip vector_curr_status = tf.reshape( tf.nn.embedding_lookup(embedding_status, curr_status), (-1, self.dim_emb)) vector_prev_action = tf.matmul( prev_action, embedding_w_action) + embedding_b_action vector_obj_goal = tf.matmul(obj_goal, embedding_w_goal) + embedding_b_goal input_vector = tf.concat([ vector_laser, vector_curr_status, vector_prev_action, vector_obj_goal ], axis=1) with tf.variable_scope('controller'): shape = input_vector.get_shape().as_list() w_hidden = tf.get_variable( 'w_hidden', [shape[1], self.n_hidden], initializer=tf.contrib.layers.xavier_initializer()) b_hidden = tf.get_variable( 'b_hidden', [self.n_hidden], initializer=tf.contrib.layers.xavier_initializer()) w_action_linear = tf.get_variable( 'w_action_linear', [self.n_hidden, self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) b_action_linear = tf.get_variable( 'b_action_linear', [self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) w_action_angular = tf.get_variable( 'w_action_angular', [self.n_hidden, self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) b_action_angular = tf.get_variable( 'b_action_angular', [self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) hidden = tf.nn.leaky_relu( tf.matmul(input_vector, w_hidden) + b_hidden) a_linear = tf.nn.sigmoid( tf.matmul(hidden, w_action_linear) + b_action_linear) * self.action_range[0] a_angular = tf.nn.tanh( tf.matmul(hidden, w_action_angular) + b_action_angular) * self.action_range[1] pred_action = tf.concat([a_linear, a_angular], axis=1) with tf.variable_scope('planner'): rnn_cell_2 = model_utils._lstm_cell(self.n_hidden, self.n_layers, name='rnn/basic_lstm_cell') w_status_matrix = tf.get_variable( 'w_status_matrix', [self.n_cmd_type**2, self.n_hidden], initializer=tf.contrib.layers.xavier_initializer()) b_status_matrix = tf.get_variable( 'b_status_matrix', [self.n_cmd_type**2], initializer=tf.contrib.layers.xavier_initializer()) status_curr = tf.reshape(cmd * self.n_cmd_type + cmd_next, [-1]) # b*l, 1 -> (1) status_next = tf.reshape(cmd_next * self.n_cmd_type + cmd_skip, [-1]) w_status_curr = tf.reshape(tf.gather(w_status_matrix, status_curr), [-1, self.n_hidden, 1]) # b, h, 1 w_status_next = tf.reshape(tf.gather(w_status_matrix, status_next), [-1, self.n_hidden, 1]) b_status_curr = tf.reshape(tf.gather(b_status_matrix, status_curr), [-1, 1]) # b, 1 b_status_next = tf.reshape(tf.gather(b_status_matrix, status_next), [-1, 1]) w_status = tf.concat([w_status_curr, w_status_next], axis=2) # b, h, 2 b_status = tf.concat([b_status_curr, b_status_next], axis=1) # b, 2 rnn_output_2, state_2 = rnn_cell_2(input_vector, prev_state_2) rnn_output_expand = tf.expand_dims(rnn_output_2, 1) # b, h, 1 logits = tf.reshape(tf.matmul(rnn_output_expand, w_status), [-1, 2]) + b_status return pred_action, logits, state_2
def ControllerLSTM(self, training_input, testing_input, label_action, length, mask): with tf.variable_scope('controller'): batch_size = self.batch_size / self.gpu_num rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_action_linear = tf.get_variable( 'w_action_linear', [self.n_hidden, self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) b_action_linear = tf.get_variable( 'b_action_linear', [self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) w_action_angular = tf.get_variable( 'w_action_angular', [self.n_hidden, self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) b_action_angular = tf.get_variable( 'b_action_angular', [self.dim_action / 2], initializer=tf.contrib.layers.xavier_initializer()) training_input_dropout = tf.nn.dropout(training_input, self.keep_prob) # b*l, h shape = training_input_dropout.get_shape().as_list() training_input_reshape = tf.reshape( training_input_dropout, [batch_size, self.max_step, shape[1]]) # b, l, h rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, training_input_reshape, sequence_length=length, dtype=tf.float32) # b, l, h rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob) rnn_output_reshape = tf.reshape(rnn_output_dropout, [-1, self.n_hidden]) # b*l, h a_linear = tf.nn.sigmoid( tf.matmul(rnn_output_reshape, w_action_linear) + b_action_linear) * self.action_range[0] a_angular = tf.nn.tanh( tf.matmul(rnn_output_reshape, w_action_angular) + b_action_angular) * self.action_range[1] pred_action = tf.concat([a_linear, a_angular], axis=1) # calculate the mean and variance of the masked error mask_reshape = tf.reshape( mask, [batch_size * self.max_step, 1]) # b*l, 1 mask_tile = tf.tile(mask_reshape, [1, 2]) # b*l, 2 masked_error = (pred_action - label_action) * mask_tile # b*l, 2 mean = tf.reduce_sum(masked_error, axis=0) / tf.cast( tf.reduce_sum(length), tf.float32) # 2 mean_expand = tf.expand_dims(mean, axis=0) # 1, 2 mean_tile = tf.tile(mean_expand, [batch_size * self.max_step, 1]) # b*l, 2 variance = tf.square( tf.reduce_sum( (masked_error - mean_tile) * mask_tile, axis=0)) / tf.cast( tf.reduce_sum(length), tf.float32) loss_action = tf.losses.mean_squared_error( labels=label_action, predictions=pred_action, reduction=tf.losses.Reduction.NONE) loss_action = tf.reduce_sum(loss_action, axis=1) loss_action_scalar = tf.reduce_sum(loss_action * mask) # testing prev_state = [] for l in xrange(self.n_layers): prev_state.append( LSTMStateTuple( tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.h'.format(l)))) if self.n_layers == 1: prev_state = prev_state[0] rnn_output_test, state = rnn_cell(testing_input, prev_state) # b*l, h a_linear_test = tf.nn.sigmoid( tf.matmul(rnn_output_test, w_action_linear) + b_action_linear) * self.action_range[0] a_angular_test = tf.nn.tanh( tf.matmul(rnn_output_test, w_action_angular) + b_action_angular) * self.action_range[1] pred_action_test = tf.concat([a_linear_test, a_angular_test], axis=1) return loss_action_scalar, pred_action_test, state, prev_state, mean, variance
def Model(self, inputs): laser, cmd, cmd_next, prev_action, obj_goal, action = inputs with tf.variable_scope('encoder'): embedding_w_goal = tf.get_variable('embedding_w_goal', [self.dim_action, self.dim_emb]) embedding_b_goal = tf.get_variable('embedding_b_goal', [self.dim_emb]) embedding_status = tf.get_variable( 'embedding_status', [self.n_cmd_type**2, self.dim_emb]) embedding_w_action = tf.get_variable( 'embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) embedding_w_status = tf.get_variable('embedding_w_status', [self.dim_cmd, self.dim_emb]) embedding_b_status = tf.get_variable('embedding_b_status', [self.dim_emb]) conv1 = model_utils.Conv1D(self.input_laser, 2, 5, 4, scope='conv1') conv2 = model_utils.Conv1D(conv1, 4, 5, 4, scope='conv2') conv3 = model_utils.Conv1D(conv2, 8, 5, 4, scope='conv3') shape = conv3.get_shape().as_list() vector_laser = tf.reshape(conv3, (-1, shape[1] * shape[2])) curr_status = cmd * self.n_cmd_type + cmd_next vector_curr_status = tf.reshape( tf.nn.embedding_lookup(embedding_status, curr_status), (-1, self.dim_emb)) vector_prev_action = tf.matmul( prev_action, embedding_w_action) + embedding_b_action vector_obj_goal = tf.matmul(obj_goal, embedding_w_goal) + embedding_b_goal vector_action = tf.matmul(action, embedding_w_action) + embedding_b_action input_vector = tf.concat([ vector_laser, vector_curr_status, vector_prev_action, vector_obj_goal, vector_action ], axis=1) with tf.variable_scope('q'): rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers, name='rnn/basic_lstm_cell') w_q = tf.get_variable('w_q', [self.n_hidden, 1], initializer=tf.initializers.random_uniform( -0.003, 0.003)) b_q = tf.get_variable('b_q', [1], initializer=tf.initializers.random_uniform( -0.003, 0.003)) shape = input_vector.get_shape().as_list() input_vector_reshape = tf.reshape( input_vector, [self.batch_size, self.max_step, shape[1]]) rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, input_vector_reshape, sequence_length=self.length, dtype=tf.float32) # b, l, h rnn_output_reshape = tf.reshape(rnn_output, [-1, self.n_hidden]) # b*l, h q = tf.matmul(rnn_output_reshape, w_q) + b_q return q
def Planner(self, training_input, testing_input, input_cmd, input_cmd_next, input_cmd_skip, label_status, length, mask): with tf.variable_scope('planner'): batch_size = self.batch_size / self.gpu_num rnn_cell = model_utils._lstm_cell(self.n_hidden, self.n_layers) w_status_matrix = tf.get_variable( 'w_status_matrix', [self.n_cmd_type**2, self.n_hidden], initializer=tf.contrib.layers.xavier_initializer()) b_status_matrix = tf.get_variable( 'b_status_matrix', [self.n_cmd_type**2], initializer=tf.contrib.layers.xavier_initializer()) status_curr = tf.reshape(input_cmd * self.n_cmd_type + input_cmd_next, [-1]) # b*l, 1 -> (1) status_next = tf.reshape( input_cmd_next * self.n_cmd_type + input_cmd_skip, [-1]) w_status_curr = tf.reshape(tf.gather(w_status_matrix, status_curr), [-1, self.n_hidden, 1]) # b*l, h, 1 w_status_next = tf.reshape(tf.gather(w_status_matrix, status_next), [-1, self.n_hidden, 1]) b_status_curr = tf.reshape(tf.gather(b_status_matrix, status_curr), [-1, 1]) # b*l, 1 b_status_next = tf.reshape(tf.gather(b_status_matrix, status_next), [-1, 1]) w_status = tf.concat([w_status_curr, w_status_next], axis=2) # b*l, h, 2 b_status = tf.concat([b_status_curr, b_status_next], axis=1) # b*l, 2 # training training_input_dropout = tf.nn.dropout(training_input, self.keep_prob) # b*l, h shape = training_input_dropout.get_shape().as_list() training_input_reshape = tf.reshape( training_input_dropout, [batch_size, self.max_step, shape[1]]) # b, l, h rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, training_input_reshape, sequence_length=length, dtype=tf.float32) # b, l, h rnn_output_dropout = tf.nn.dropout(rnn_output, self.keep_prob) rnn_output_reshape = tf.reshape(rnn_output_dropout, [-1, self.n_hidden]) # b*l, h rnn_output_expand = tf.expand_dims(rnn_output_reshape, 1) # b*l, 1, h # 1. dot product distance logits = tf.reshape(tf.matmul(rnn_output_expand, w_status), [-1, 2]) + b_status # b*l, 2 self.training_logits = logits self.training_pred = tf.argmax(logits, axis=1) # # 2. eucl distance # w_status_curr_reshape = tf.reshape(w_status_curr, [-1, self.n_hidden]) # b*l, h # w_status_next_reshape = tf.reshape(w_status_next, [-1, self.n_hidden]) # b*l, h # squared_dist_curr = tf.reduce_sum(tf.square(rnn_output_reshape - w_status_curr_reshape), axis=1, keepdims=True) # b*l # squared_dist_next = tf.reduce_sum(tf.square(rnn_output_reshape - w_status_next_reshape), axis=1, keepdims=True) # b*l # logits = tf.concat([squared_dist_curr, squared_dist_next], axis=1) # # 3. binary prediction # w_binary = tf.get_variable('w_status', [self.n_hidden, 2], initializer=tf.contrib.layers.xavier_initializer()) # b_binary = tf.get_variable('b_status', [2], initializer=tf.contrib.layers.xavier_initializer()) # logits = tf.matmul(rnn_output_reshape, w_binary) + b_binary # # 4. n^2 precition # w_square = tf.get_variable('w_square', [self.n_hidden, self.n_cmd_type**2 * 2], initializer=tf.contrib.layers.xavier_initializer()) # b_square = tf.get_variable('b_square', [self.n_cmd_type**2 * 2], initializer=tf.contrib.layers.xavier_initializer()) # logits = tf.matmul(rnn_output_reshape, w_square) + b_square # label_status_reshape = tf.reshape(label_status, [-1]) # label_status = (1 - label_status_reshape) * status_curr + label_status_reshape * status_next label_status_reshape = tf.reshape(label_status, [-1]) loss_status = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_status_reshape, logits=logits) loss_status_scalar = tf.reduce_sum(loss_status * mask) # testing prev_state = [] for l in xrange(self.n_layers): prev_state.append( LSTMStateTuple( tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.c'.format(l)), tf.placeholder(tf.float32, shape=[None, self.n_hidden], name='initial_state{0}.h'.format(l)))) if self.n_layers == 1: prev_state = prev_state[0] rnn_output_test, state = rnn_cell(testing_input, prev_state) # b, h rnn_output_test_expand = tf.expand_dims(rnn_output_test, 1) # b, 1, h w_status_curr = tf.reshape( tf.gather(w_status_matrix, self.test_status), [-1, self.n_hidden, 1]) # b, h, 1 w_status_next = tf.reshape( tf.gather(w_status_matrix, self.test_status_next), [-1, self.n_hidden, 1]) b_status_curr = tf.reshape( tf.gather(b_status_matrix, self.test_status), [-1, 1]) # b, 1 b_status_next = tf.reshape( tf.gather(b_status_matrix, self.test_status_next), [-1, 1]) w_status = tf.concat([w_status_curr, w_status_next], axis=2) # b, h, 2 b_status = tf.concat([b_status_curr, b_status_next], axis=1) logits = tf.reshape(tf.matmul(rnn_output_test_expand, w_status), [-1, 2]) + b_status pred_done = tf.argmax(logits, axis=1) return loss_status_scalar, pred_done, logits, state, prev_state