def Model(self, inputs): input_depth, input_cmd, input_prev_a, rnn_h_in = inputs # encode depth image conv1 = model_utils.conv2d(input_depth, 4, 5, 4, scope='conv1', max_pool=False) conv2 = model_utils.conv2d(conv1, 16, 5, 4, scope='conv2', max_pool=False) conv3 = model_utils.conv2d(conv2, 32, 3, 2, scope='conv3', max_pool=False) shape = conv3.get_shape().as_list() depth_vect = tf.reshape(conv3, shape=[-1, shape[1] * shape[2] * shape[3]]) # b,d # encode cmd embedding_cmd = tf.get_variable('cmd_embedding', [self.n_cmd_type, self.dim_emb]) cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd), [-1, self.dim_emb]) # encode prev action embedding_w_action = tf.get_variable('embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) prev_a_vect = tf.matmul(input_prev_a, embedding_w_action) + embedding_b_action input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1) # rnn if self.rnn_type == 'lstm': rnn_cell = model_utils._lstm_cell(self.n_hidden, 1, name='rnn_cell') else: rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell') rnn_output, rnn_h_out = rnn_cell(input_vect, self.rnn_h_in) # action a_linear = model_utils.dense_layer( rnn_output, 1, 'a_linear', activation=tf.nn.sigmoid) * self.action_range[0] a_angular = model_utils.dense_layer( rnn_output, 1, 'a_angular', activation=tf.nn.tanh) * self.action_range[1] pred_action = tf.concat([a_linear, a_angular], axis=1) return pred_action, rnn_h_out
def model(self, inputs): [ input_demo_img, input_demo_cmd, input_img, input_depth, input_prev_action, input_img_test, input_depth_test, input_prev_action_test, rnn_h_in, demo_len, seq_len ] = inputs # training input_img = tf.reshape(input_img, [-1] + self.dim_img) # b*l, dim_img img_vect = self.encode_image(input_img) # b*l, dim_img_feat input_depth = tf.reshape(input_depth, [-1] + self.dim_depth) # b*l, dim_depth depth_vect = self.encode_image(input_depth, scope='depth') # b*l, dim_depth_feat input_prev_action = tf.reshape(input_prev_action, [-1, self.dim_action]) # b*l, 1 prev_action_idx = tf.argmax(input_prev_action, axis=1) prev_a_vect = tf.reshape( tf.nn.embedding_lookup(self.embedding_a, prev_action_idx), [-1, self.dim_emb]) # b*l, dim_emb if self.att_mode == 'hard': demo_dense_seq, att_pos = self.process_demo_hard_att( input_demo_img, input_demo_cmd, img_vect, False, demo_len) elif self.att_mode == 'sum': demo_dense_seq, _ = self.process_demo_sum(input_demo_img, input_demo_cmd, demo_len) all_inputs = tf.concat( [demo_dense_seq, img_vect, depth_vect, prev_a_vect], axis=1) # b*l, n_hidden+dim_img_feat+dim_emb inputs_dense = model_utils.dense_layer( all_inputs, self.n_hidden, scope='inputs_dense') # b*l, n_hidden rnn_input = tf.reshape(inputs_dense, [-1, self.max_step, self.n_hidden]) rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell') rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, rnn_input, sequence_length=seq_len, dtype=tf.float32) # b, l, dim_emb rnn_output = tf.reshape(rnn_output, [-1, self.n_hidden]) # b*l, dim_emb q = model_utils.dense_layer(rnn_output, self.dim_action, scope='q', activation=None) # b*l, dim_action # testing input_img_test = tf.reshape(input_img_test, [-1] + self.dim_img) # b, dim_img img_vect_test = self.encode_image(input_img_test) # b, dim_img_feat input_depth_test = tf.reshape(input_depth_test, [-1] + self.dim_depth) # b, dim_depth depth_vect_test = self.encode_image(input_depth_test, scope='depth') # b, dim_depth_feat input_prev_action_test = tf.reshape(input_prev_action_test, [-1, self.dim_action]) # b, 1 prev_action_idx_test = tf.argmax(input_prev_action_test, axis=1) prev_a_vect_test = tf.reshape( tf.nn.embedding_lookup(self.embedding_a, prev_action_idx_test), [-1, self.dim_emb]) # b, dim_emb if self.att_mode == 'hard': demo_dense, att_pos = self.process_demo_hard_att( input_demo_img, input_demo_cmd, img_vect_test, True, demo_len) elif self.att_mode == 'sum': _, demo_dense = self.process_demo_sum(input_demo_img, input_demo_cmd, demo_len) att_pos = tf.zeros([1], dtype=tf.int32) all_inputs_test = tf.concat( [demo_dense, img_vect_test, depth_vect_test, prev_a_vect_test], axis=1) inputs_dense_test = model_utils.dense_layer(all_inputs_test, self.n_hidden, scope='inputs_dense') rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn/rnn_cell') rnn_output, rnn_h_out = rnn_cell(inputs_dense_test, rnn_h_in) # b, n_hidden | b, n_hidden q_test = model_utils.dense_layer(rnn_output, self.dim_action, scope='q_test', activation=None) # b, dim_action return q, q_test, rnn_h_out, att_pos
def __init__(self, sess, batch_size, max_step, demo_len, n_layers, n_hidden, dim_a=2, dim_img=[64, 64, 3], action_range=[0.3, np.pi / 6], learning_rate=1e-3, test_only=False, use_demo_action=False, use_demo_image=False): self.sess = sess self.batch_size = batch_size self.max_step = max_step self.demo_len = demo_len self.n_layers = n_layers self.n_hidden = n_hidden self.dim_a = dim_a self.dim_img = dim_img self.action_range = action_range self.learning_rate = learning_rate self.test_only = test_only self.use_demo_action = use_demo_action with tf.variable_scope('network', reuse=tf.AUTO_REUSE): # training input self.input_demo_img = tf.placeholder( tf.float32, shape=[None, demo_len] + dim_img, name='input_demo_img') #b,l of demo,h,d,c self.input_demo_a = tf.placeholder( tf.float32, shape=[None, demo_len, dim_a], name='input_demo_a') #b,l of demo,2 self.input_eta = tf.placeholder(tf.float32, shape=[None], name='input_eta') #b self.input_img = tf.placeholder( tf.float32, shape=[None, max_step, dim_img[0], dim_img[1], dim_img[2]], name='input_img') #b,l,h,d,c self.label_a = tf.placeholder(tf.float32, shape=[None, max_step, dim_a], name='label_a') #b,l,2 self.gru_h_in = tf.placeholder(tf.float32, shape=[None, n_hidden], name='gru_h_in') #b,n_hidden # testing input self.input_demo_img_test = tf.placeholder( tf.float32, shape=[None] + dim_img, name='input_demo_img_test') #l of demo,h,d,c self.input_demo_a_test = tf.placeholder( tf.float32, shape=[None, dim_a], name='input_demo_a_test') #l of demo,2 self.input_img_test = tf.placeholder(tf.float32, shape=[1] + dim_img, name='input_img_test') #h,d,c self.input_eta_test = tf.placeholder(tf.float32, shape=[], name='input_eta_test') # create gru cell gru_cell = model_utils._gru_cell(n_hidden, 1, name='gru_cell') # training if not test_only: # process demo seq input_demo_img_reshape = tf.reshape( self.input_demo_img, [-1] + dim_img) # b *l of demo,h,d,c input_demo_a_reshape = tf.reshape( self.input_demo_a, [-1, dim_a]) #b * l of demo, 2 demo_img_vect = self.encode_image( input_demo_img_reshape) #b * l of demob, -1 assert use_demo_action or use_demo_image, 'use demo image or action or both!' if use_demo_action and use_demo_image: print 'use demo action and image' demo_vect = tf.concat( [demo_img_vect, input_demo_a_reshape], axis=1) #b * l of demo, -1 elif use_demo_image: print 'only use demo image' demo_vect = demo_img_vect elif use_demo_action: print 'only use demo action' demo_vect = input_demo_a_reshape hidden1 = model_utils.dense_layer(demo_vect, n_hidden, scope='dense1_demo') demo_feat = model_utils.dense_layer( hidden1, n_hidden, scope='dense2_demo') #b * l of demo, n_hidden demo_feat_reshape = tf.reshape( demo_feat, [-1, demo_len, n_hidden]) #b, l of demo, n_hidden demo_feat_list = tf.unstack(demo_feat_reshape, axis=1) # l of demo [b, n_hidden] # process observation seq input_img_reshape = tf.reshape(self.input_img, [-1] + dim_img) #b * l, h, d, c img_vect = self.encode_image(input_img_reshape) # b * l, -1 shape = img_vect.get_shape().as_list() img_vect_reshape = tf.reshape( img_vect, [-1, max_step, shape[1]]) # b, l, -1 img_vect_list = tf.unstack(img_vect_reshape, axis=1) # l [b, n_hiddent] action_list = [] eta = tf.identity(self.input_eta, name='init_eta') eta_list = [] gru_h_in = self.gru_h_in for t, img_vect in enumerate(img_vect_list): mu_t_list = [] for j, demo_feat in enumerate(demo_feat_list): w_j = tf.exp(-tf.abs(eta - j)) #b w_j_expand = tf.expand_dims(w_j, axis=1) #b, 1 w_j_tile = tf.tile(w_j_expand, multiples=[1, n_hidden]) #b, n_hidden mu_t_list.append(demo_feat * w_j_tile) mu_t = tf.add_n(mu_t_list) input_t = tf.concat([mu_t, img_vect], axis=1) #b, n_hidden + dim of img vect gru_output, self.gru_h_out = gru_cell(input_t, gru_h_in) gru_h_in = self.gru_h_out increment = 1. + model_utils.dense_layer( gru_output, 1, activation=tf.nn.tanh, scope='dense_increment') #b, 1 increment = tf.squeeze(increment, axis=[1]) #b eta = tf.identity(eta + increment, name='eta_{}'.format(t)) eta_list.append(eta) action_linear = model_utils.dense_layer( gru_output, dim_a / 2, activation=tf.nn.sigmoid, scope='dense_a_linear') * action_range[0] #b,1 action_angular = model_utils.dense_layer( gru_output, dim_a / 2, activation=tf.nn.tanh, scope='dense_a_angular') * action_range[1] #b,1 action_list.append( tf.concat([action_linear, action_angular], axis=1)) #l[b,2] self.action_seq = tf.stack(action_list, axis=1) #b, l, 2 self.eta_array = tf.stack(eta_list, axis=1) #b, l self.loss = tf.losses.mean_squared_error( labels=self.label_a, predictions=self.action_seq) start_time = time.time() self.opt = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.loss) print 'construct opt time: {:.3f}'.format(time.time() - start_time) # testing # process demo seq demo_img_vect = self.encode_image(self.input_demo_img_test, ) # l of demob, -1 if use_demo_action and use_demo_image: demo_vect = tf.concat([demo_img_vect, self.input_demo_a_test], axis=1) # l of demo, -1 elif use_demo_image: demo_vect = demo_img_vect elif use_demo_action: demo_vect = self.input_demo_a_test hidden1 = model_utils.dense_layer(demo_vect, n_hidden, scope='dense1_demo') demo_feat = model_utils.dense_layer( hidden1, n_hidden, scope='dense2_demo') # l of demo, n_hidden tensor_array = tf.TensorArray(tf.float32, 0, dynamic_size=True, infer_shape=True, element_shape=[n_hidden]) demo_feat_array = tensor_array.unstack(demo_feat) seq_len = tf.shape(demo_feat)[0] mu_t = tf.zeros([n_hidden], name='mu_t') # demo_feat_list= tf.unstack(demo_feat_reshape, axis=1) # l of demo [1, n_hidden] # process observation img_vect = self.encode_image(self.input_img_test) # 1, -1 eta = tf.identity(self.input_eta_test, name='eta_in') gru_h_in = self.gru_h_in # mu_t_list = [] # for j, demo_feat in enumerate(demo_feat_list): # w_j = tf.exp(-tf.abs(eta - j)) #1 # w_j_expand = tf.expand_dims(w_j, axis=1) #1, 1 # w_j_tile = tf.tile(w_j_expand, multiples=[1, n_hidden]) #1, n_hidden # mu_t_list.append(demo_feat * w_j_tile) # mu_t = tf.add_n(mu_t_list) def body(demo_idx, mu_t_in): w_j = tf.exp(-tf.abs(eta - tf.cast(demo_idx, tf.float32))) w_j_expand = tf.expand_dims(w_j, axis=0) #1 w_j_tile = tf.tile(w_j_expand, multiples=[n_hidden]) #n_hidden demo_feat_t = demo_feat_array.read(demo_idx) # n_hidden return (demo_idx + 1, mu_t_in + demo_feat_t * w_j_tile) def condition(demo_idx, output): return demo_idx < seq_len demo_idx = 0 t_final, mu_t_final = tf.while_loop(cond=condition, body=body, loop_vars=[demo_idx, mu_t]) mu_t_expand = tf.expand_dims(mu_t_final, axis=0) # 1, n_hidden input_t = tf.concat([mu_t_expand, img_vect], axis=1) #1, n_hidden*2 gru_output, self.gru_h_out = gru_cell(input_t, gru_h_in) gru_h_in = self.gru_h_out increment = 1. + model_utils.dense_layer( gru_output, 1, activation=tf.nn.tanh, scope='dense_increment') #b, 1 increment = tf.squeeze(increment, axis=[1]) #1 self.eta = eta + increment action_linear = model_utils.dense_layer( gru_output, dim_a / 2, activation=tf.nn.sigmoid, scope='dense_a_linear') * action_range[0] #b,1 action_angular = model_utils.dense_layer( gru_output, dim_a / 2, activation=tf.nn.tanh, scope='dense_a_angular') * action_range[1] #b,1 self.action = tf.concat([action_linear, action_angular], axis=1)
def __init__(self, sess, batch_size, max_step, demo_len, n_layers, n_hidden, dim_a=2, dim_img=[64, 64, 3], action_range=[0.3, np.pi/6], learning_rate=1e-3, test_only=False, use_demo_action=False, use_demo_image=False, use_flownet=False): self.sess = sess self.batch_size = batch_size self.max_step = max_step self.demo_len = demo_len self.n_layers = n_layers self.n_hidden = n_hidden self.dim_a = dim_a self.dim_img = dim_img self.action_range = action_range self.learning_rate = learning_rate self.test_only = test_only self.use_demo_action = use_demo_action # training input self.input_ob = tf.placeholder(tf.float32, shape=[None, max_step]+dim_img, name='input_observation') #b,l,h,d,c self.input_demo = tf.placeholder(tf.float32, shape=[None, max_step]+dim_img, name='input_demo') #b,l,h,d,c self.label_a = tf.placeholder(tf.float32, shape=[None, max_step, dim_a], name='label_a') #b,l,2 self.gru_h_in = tf.placeholder(tf.float32, shape=[None, n_hidden], name='gru_h_in') #b,n_hidden seq_lens = tf.constant(batch_size,dtype=tf.int32, shape=[batch_size]) # create gru cell gru_cell = model_utils._gru_cell(n_hidden, 1, name='gru_cell') # process demo seq # input_img_pair = tf.concat([self.input_ob, self.input_demo], axis=4)# b,l,h,d,c*2 # input_img_pair_reshape = tf.reshape(input_img_pair, [-1, dim_img[0], dim_img[1], dim_img[2]*2]) # img_vector = self.encode_image(input_img_pair_reshape) # b*l, d input_ob_reshape = tf.reshape(self.input_ob, [-1]+dim_img) input_demo_reshape = tf.reshape(self.input_demo, [-1]+dim_img) concat_inputs = tf.concat([input_ob_reshape, input_demo_reshape], axis=3) #b*l,h,w,c*2 if use_flownet: img_vector = get_flownet_feature(concat_inputs) # b*l, d else: img_vector = self.encode_image(concat_inputs) # b*l, d with tf.variable_scope('memory', reuse=tf.AUTO_REUSE): shape = img_vector.get_shape().as_list() img_vector_seqs = tf.reshape(img_vector, [-1, max_step, shape[-1]]) gru_outputs, gru_state = tf.nn.dynamic_rnn(gru_cell, img_vector_seqs, initial_state=self.gru_h_in, sequence_length=seq_lens) gru_outputs_reshape = tf.reshape(gru_outputs, [-1, n_hidden]) action_linear = model_utils.dense_layer(gru_outputs, dim_a/2, activation=tf.nn.sigmoid, scope='dense_a_linear') * action_range[0] #b*l,1 action_angular = model_utils.dense_layer(gru_outputs, dim_a/2, activation=tf.nn.tanh, scope='dense_a_angular') * action_range[1] #b*l,1 action = tf.concat([action_linear, action_angular], axis=1) #b*l,2 self.action_seq = tf.reshape(action, [-1, max_step, 2]) # b,l,2 self.loss = tf.losses.mean_squared_error(labels=self.label_a, predictions=self.action_seq) self.opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)
def testing_model(self, inputs): input_demo_img, input_demo_cmd, input_img, input_prev_cmd, input_prev_action, rnn_h_in, demo_len = inputs # process observation input_img = tf.reshape(input_img, [-1] + self.dim_img) # b, dim_img img_vect = self.encode_image(input_img) # b, dim_img_feat prev_cmd_vect = tf.reshape( tf.nn.embedding_lookup(self.embedding_cmd, input_prev_cmd), [-1, self.dim_emb]) # b, dim_emb input_prev_action = tf.reshape(input_prev_action, [-1, self.dim_a]) # b, dim_a prev_a_vect = model_utils.dense_layer(input_prev_action, self.dim_emb, scope='a_embedding', activation=None) # b, dim_emb # process demo if self.demo_mode == 'sum': _, demo_dense = self.process_demo_sum(input_demo_img, input_demo_cmd, demo_len) # b, n_hidden att_pos = tf.zeros([1, 1], dtype=tf.int32) prob = tf.zeros([1, self.max_n_demo], dtype=tf.float32) l2_norm = tf.zeros([1, self.max_n_demo], dtype=tf.float32) elif self.demo_mode == 'hard': demo_dense, att_pos, att_logits, prob, l2_norm = self.process_demo_hard_att( input_demo_img, input_demo_cmd, img_vect, True, demo_len) if self.inputs_num <= 2: all_inputs = demo_dense elif self.inputs_num == 3: all_inputs = tf.concat([demo_dense, img_vect], axis=1) # b, n_hidden+dim_img_feat elif self.inputs_num == 4: all_inputs = tf.concat([demo_dense, img_vect, prev_cmd_vect], axis=1) # b, n_hidden+dim_img_feat elif self.inputs_num == 5: all_inputs = tf.concat( [demo_dense, img_vect, prev_cmd_vect, prev_a_vect], axis=1) # b, n_hidden+dim_img_feat+dim_emb*2 inputs_dense = model_utils.dense_layer( all_inputs, self.n_hidden, scope='inputs_dense') # b, n_hidden if self.post_att_model == 'gru': rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn/rnn_cell') rnn_output, rnn_h_out = rnn_cell( inputs_dense, rnn_h_in) # b, n_hidden | b, n_hidden logits = model_utils.dense_layer(rnn_output, self.n_cmd_type, scope='logits', activation=None) # b, n_cmd_type elif self.post_att_model == 'dense': dense = model_utils.dense_layer(inputs_dense, self.n_hidden / 2, scope='dense') # b, n_hidden/2 logits = model_utils.dense_layer(dense, self.n_cmd_type, scope='logits', activation=None) # b, n_cmd_type rnn_h_out = rnn_h_in predict = tf.argmax(logits, axis=1) # b max_prob = tf.reduce_max(prob) # b min_norm = tf.reduce_min(l2_norm) return predict, rnn_h_out, att_pos, max_prob, min_norm
def training_model(self, inputs): input_demo_img, input_demo_cmd, input_img, input_prev_cmd, input_prev_action, label_cmd, demo_len, seq_len = inputs # process observation input_img = tf.reshape(input_img, [-1] + self.dim_img) # b*l, dim_img img_vect = self.encode_image(input_img) # b*l, dim_img_feat prev_cmd_vect = tf.reshape( tf.nn.embedding_lookup(self.embedding_cmd, input_prev_cmd), [-1, self.dim_emb]) # b*l, dim_emb input_prev_action = tf.reshape(input_prev_action, [-1, self.dim_a]) # b*l, dim_a prev_a_vect = model_utils.dense_layer(input_prev_action, self.dim_emb, scope='a_embedding', activation=None) # b*l, dim_emb # process demo if self.demo_mode == 'sum': demo_dense_seq, _ = self.process_demo_sum( input_demo_img, input_demo_cmd, demo_len) # b*l, n_hidden att_pos = tf.zeros([self.batch_size, self.max_step], dtype=tf.int32) att_loss = tf.zeros([], dtype=tf.float32) elif self.demo_mode == 'hard': demo_dense_seq, att_pos, att_logits, prob, _ = self.process_demo_hard_att( input_demo_img, input_demo_cmd, img_vect, False, demo_len) # post-attention inputs # dropouts if not self.test: demo_dense_seq = tf.nn.dropout(demo_dense_seq, rate=1. - self.keep_prob) img_vect = tf.nn.dropout(img_vect, rate=1. - self.keep_prob) prev_cmd_vect = tf.nn.dropout(prev_cmd_vect, rate=1. - self.keep_prob) prev_a_vect = tf.nn.dropout(prev_a_vect, rate=1. - self.keep_prob) if self.inputs_num <= 2: all_inputs = demo_dense_seq elif self.inputs_num == 3: all_inputs = tf.concat([demo_dense_seq, img_vect], axis=1) # b*l, n_hidden+dim_img_feat elif self.inputs_num == 4: all_inputs = tf.concat([demo_dense_seq, img_vect, prev_cmd_vect], axis=1) # b*l, n_hidden+dim_img_feat elif self.inputs_num == 5: all_inputs = tf.concat( [demo_dense_seq, img_vect, prev_cmd_vect, prev_a_vect], axis=1) # b*l, n_hidden+dim_img_feat+dim_emb*2 inputs_dense = model_utils.dense_layer( all_inputs, self.n_hidden, scope='inputs_dense') # b*l, n_hidden # post-attention model if self.post_att_model == 'gru': print 'post attention model: gru' # rnn rnn_input = tf.reshape(inputs_dense, [-1, self.max_step, self.n_hidden]) rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='rnn_cell') rnn_output, _ = tf.nn.dynamic_rnn( rnn_cell, rnn_input, sequence_length=seq_len, dtype=tf.float32) # b, l, dim_emb # output = tf.reshape(rnn_output, [-1, 1, self.n_hidden]) # b*l, 1, dim_emb rnn_output = tf.reshape(rnn_output, [-1, self.n_hidden]) # b*l, dim_emb logits = model_utils.dense_layer( rnn_output, self.n_cmd_type, scope='logits', activation=None) # b*l, n_cmd_type elif self.post_att_model == 'dense': print 'post attention model: dense' dense_output = model_utils.dense_layer( inputs_dense, self.n_hidden, scope='dense') # b*l, n_hidden # output = tf.reshape(dense_output, [-1, 1, self.n_hidden]) # b*l, 1, dim_emb logits = model_utils.dense_layer( dense_output, self.n_cmd_type, scope='logits', activation=None) # b*l, n_cmd_type # predict pred_mask = tf.sequence_mask(seq_len, maxlen=self.max_step, dtype=tf.int32) # b, l pred = tf.argmax(tf.reshape(logits, [-1, self.max_step, self.n_cmd_type]), axis=2, output_type=tf.int32) * pred_mask # b, l # cmd_loss label_cmd = tf.reshape(label_cmd, [-1]) # b*l loss_mask = tf.reshape( tf.sequence_mask(seq_len, maxlen=self.max_step, dtype=tf.float32), [-1]) # b*l cmd_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_cmd, logits=logits) * loss_mask # b*l cmd_loss = tf.reduce_sum(cmd_loss) / tf.cast(tf.reduce_sum(seq_len), tf.float32) # accuracy correct_pred = tf.equal(pred, tf.reshape(label_cmd, [-1, self.max_step])) # b, l batch_correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32), axis=1) # b batch_accuracy = tf.cast( (batch_correct_num - tf.reduce_sum(1 - pred_mask, axis=1)), tf.float32) / tf.cast(tf.reduce_sum(pred_mask, axis=1), tf.float32) # b all_correct_num = tf.reduce_sum(tf.cast(correct_pred, tf.int32)) # scalar all_accuracy = tf.cast( (all_correct_num - tf.reduce_sum(1 - pred_mask)), tf.float32) / tf.cast(tf.reduce_sum(pred_mask), tf.float32) if self.demo_mode == 'hard': # reinforce sample_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=att_logits, labels=att_pos) * loss_mask # b*l sample_loss = tf.reduce_sum(sample_loss) / tf.reduce_sum(loss_mask) reward_estimate = model_utils.reward_estimate( all_inputs, all_accuracy) * loss_mask # b*l select_loss = sample_loss * tf.stop_gradient( reward_estimate) # b*l select_loss = tf.reduce_sum(select_loss / tf.reduce_sum(loss_mask)) # scalar baseline_loss = tf.reduce_sum( tf.square(reward_estimate)) / tf.reduce_sum(loss_mask) att_loss = select_loss + baseline_loss att_mask = tf.sequence_mask(seq_len, maxlen=self.max_step, dtype=att_pos.dtype) # b, l att_pos = tf.reshape(att_pos, [-1, self.max_step]) * att_mask # b, l return [all_accuracy, cmd_loss, att_loss, pred, att_pos]
def Model(self, inputs): input_depth, input_cmd, input_prev_a, input_action, gru_h_in, length = inputs # encode depth image conv1 = model_utils.conv2d(input_depth, 4, 5, 4, scope='conv1', max_pool=False) conv2 = model_utils.conv2d(conv1, 16, 5, 4, scope='conv2', max_pool=False) conv3 = model_utils.conv2d(conv2, 32, 3, 2, scope='conv3', max_pool=False) shape = conv3.get_shape().as_list() depth_vect = tf.reshape(conv3, shape=[-1, shape[1] * shape[2] * shape[3] ]) # b*l,d # encode cmd embedding_cmd = tf.get_variable('cmd_embedding', [self.n_cmd_type, self.dim_emb]) cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd), [-1, self.dim_emb]) # encode prev action and action embedding_w_action = tf.get_variable('embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) prev_a_vect = tf.matmul(input_prev_a, embedding_w_action) + embedding_b_action action_vect = tf.matmul(input_action, embedding_w_action) + embedding_b_action input_vect = tf.concat( [depth_vect, cmd_vect, prev_a_vect, action_vect], axis=1) rnn_cell = model_utils._gru_cell(self.n_hidden, 1, name='gru_cell') shape = input_vect.get_shape().as_list() input_vect_reshape = tf.reshape(input_vect, [-1, self.max_step, shape[-1]]) rnn_output, _ = tf.nn.dynamic_rnn(rnn_cell, input_vect_reshape, sequence_length=length, dtype=tf.float32) # b, l, h rnn_output_reshape = tf.reshape(rnn_output, [-1, self.n_hidden]) # b*l, h # q q = model_utils.dense_layer( rnn_output_reshape, 1, 'q', activation=None, w_init=tf.initializers.random_uniform(-0.003, 0.003), b_init=tf.initializers.random_uniform(-0.003, 0.003)) return [q]
def Model(self, inputs): input_depth, input_cmd, input_prev_a, gru_h_in, length = inputs # encode depth image conv1 = model_utils.conv2d(input_depth, 4, 5, 4, scope='conv1', max_pool=False) conv2 = model_utils.conv2d(conv1, 16, 5, 4, scope='conv2', max_pool=False) conv3 = model_utils.conv2d(conv2, 32, 3, 2, scope='conv3', max_pool=False) shape = conv3.get_shape().as_list() depth_vect = tf.reshape(conv3, shape=[-1, shape[1] * shape[2] * shape[3]]) # b,d # encode cmd embedding_cmd = tf.get_variable('cmd_embedding', [self.n_cmd_type, self.dim_emb]) cmd_vect = tf.reshape(tf.nn.embedding_lookup(embedding_cmd, input_cmd), [-1, self.dim_emb]) # encode prev action embedding_w_action = tf.get_variable('embedding_w_action', [self.dim_action, self.dim_emb]) embedding_b_action = tf.get_variable('embedding_b_action', [self.dim_emb]) prev_a_vect = tf.matmul(input_prev_a, embedding_w_action) + embedding_b_action input_vect = tf.concat([depth_vect, cmd_vect, prev_a_vect], axis=1) gru_cell = model_utils._gru_cell(self.n_hidden, 1, name='gru_cell') # training shape = input_vect.get_shape().as_list() input_vect_reshape = tf.reshape(input_vect, [-1, self.max_step, shape[-1]]) gru_output, _ = tf.nn.dynamic_rnn(gru_cell, input_vect_reshape, sequence_length=length, dtype=tf.float32) # b, l, h gru_output_reshape = tf.reshape(gru_output, [-1, self.n_hidden]) # b*l, h # action a_linear = model_utils.dense_layer( gru_output_reshape, 1, 'a_linear', activation=tf.nn.sigmoid) * self.action_range[0] a_angular = model_utils.dense_layer( gru_output_reshape, 1, 'a_angular', activation=tf.nn.tanh) * self.action_range[1] action = tf.concat([a_linear, a_angular], axis=1) # testing gru_output, gru_h_out = gru_cell(input_vect, gru_h_in) # action a_linear = model_utils.dense_layer( gru_output, 1, 'a_linear', activation=tf.nn.sigmoid) * self.action_range[0] a_angular = model_utils.dense_layer( gru_output, 1, 'a_angular', activation=tf.nn.tanh) * self.action_range[1] action_test = tf.concat([a_linear, a_angular], axis=1) return [action, action_test, gru_h_out]