def __init__(self, sess, reuse=True, num_unrolls=1, game_state=None, net_scope=None): if net_scope is None: with tf.name_scope('agent'): with tf.variable_scope('nav_global_network', reuse=reuse): self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, num_unrolls) self.network.create_net() else: with tf.variable_scope(net_scope, reuse=True): self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1) self.network.create_net(add_loss=False) if game_state is None: self.game_state = GameState(sess=sess) else: self.game_state = game_state self.action_util = self.game_state.action_util self.gt_graph = None self.sess = sess self.num_steps = 0 self.global_step_id = 0 self.num_unrolls = num_unrolls self.pose_indicator = np.zeros( (constants.TERMINAL_CHECK_PADDING * 2 + 1, constants.TERMINAL_CHECK_PADDING * 2 + 1)) self.times = np.zeros(2)
def run(): try: with tf.variable_scope('nav_global_network'): network = FreeSpaceNetwork(constants.GRU_SIZE, constants.BATCH_SIZE, constants.NUM_UNROLLS) network.create_net() training_step = network.training_op with tf.variable_scope('loss'): loss_summary_op = tf.summary.merge([ tf.summary.scalar('loss', network.loss), ]) summary_full = tf.summary.merge_all() conv_var_list = [ v for v in tf.trainable_variables() if 'conv' in v.name and 'weight' in v.name and (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1 ) ] for var in conv_var_list: tf_util.conv_variable_summaries(var, scope=var.name.replace('/', '_')[:-2]) summary_with_images = tf.summary.merge_all() # prepare session sess = tf_util.Session() seq_inds = np.zeros((constants.BATCH_SIZE, 2), dtype=np.int32) sequence_generators = [] for thread_index in range(constants.PARALLEL_SIZE): gpus = str(constants.GPU_ID).split(',') sequence_generator = SequenceGenerator(sess) sequence_generators.append(sequence_generator) sess.run(tf.global_variables_initializer()) if not (constants.DEBUG or constants.DRAWING): from utils import py_util time_str = py_util.get_time_str() summary_writer = tf.summary.FileWriter( os.path.join(constants.LOG_FILE, time_str), sess.graph) else: summary_writer = None saver = tf.train.Saver(max_to_keep=3) # init or load checkpoint start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR) sess.graph.finalize() data_lock = threading.Lock() def load_new_data(thread_index): global data_buffer global data_counts sequence_generator = sequence_generators[thread_index] counter = 0 while True: while not (len(data_buffer) < constants.REPLAY_BUFFER_SIZE or np.max(data_counts) > 0): time.sleep(1) counter += 1 if constants.DEBUG: print('\nThread %d' % thread_index) new_data, bounds, goal_pose = sequence_generator.generate_episode( ) new_data = { key: ([new_data[ii][key] for ii in range(len(new_data))]) for key in new_data[0] } new_data['goal_pose'] = goal_pose new_data['memory'] = np.zeros( (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH, constants.MEMORY_SIZE)) new_data['gru_state'] = np.zeros(constants.GRU_SIZE) if constants.DRAWING: new_data['debug_images'] = sequence_generator.debug_images data_lock.acquire() if len(data_buffer) < constants.REPLAY_BUFFER_SIZE: data_counts[len(data_buffer)] = 0 data_buffer.append(new_data) counts = data_counts[:len(data_buffer)] if counter % 10 == 0: print( 'Buffer size %d Num used %d Max used amount %d' % (len(data_buffer), len( counts[counts > 0]), np.max(counts))) else: max_count_ind = np.argmax(data_counts) data_buffer[max_count_ind] = new_data data_counts[max_count_ind] = 0 if counter % 10 == 0: print('Num used %d Max used amount %d' % (len(data_counts[data_counts > 0]), np.max(data_counts))) data_lock.release() threads = [] for i in range(constants.PARALLEL_SIZE): load_data_thread = threading.Thread(target=load_new_data, args=(i, )) load_data_thread.daemon = True load_data_thread.start() threads.append(load_data_thread) time.sleep(1) sequences = [None] * constants.BATCH_SIZE curr_it = 0 dataTimeTotal = 0.00001 solverTimeTotal = 0.00001 summaryTimeTotal = 0.00001 totalTimeTotal = 0.00001 chosen_inds = set() loc_to_chosen_ind = {} for iteration in range(start_it, constants.MAX_TIME_STEP): if iteration == start_it or iteration % 10 == 1: currentTimeStart = time.time() tStart = time.time() batch_data = [] batch_action = [] batch_memory = [] batch_gru_state = [] batch_labels = [] batch_pose = [] batch_mask = [] batch_goal_pose = [] batch_pose_indicator = [] batch_possible_label = [] batch_debug_images = [] for bb in range(constants.BATCH_SIZE): if seq_inds[bb, 0] == seq_inds[bb, 1]: # Pick a new random sequence pickable_inds = set( np.where(data_counts < 100)[0]) - chosen_inds count_size = len(pickable_inds) while count_size == 0: pickable_inds = set( np.where(data_counts < 100)[0]) - chosen_inds count_size = len(pickable_inds) time.sleep(1) random_ind = random.sample(pickable_inds, 1)[0] data_lock.acquire() sequences[bb] = data_buffer[random_ind] goal_pose = sequences[bb]['goal_pose'] sequences[bb]['memory'] = np.zeros( (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH, constants.MEMORY_SIZE)) sequences[bb]['gru_state'] = np.zeros(constants.GRU_SIZE) data_counts[random_ind] += 1 if bb in loc_to_chosen_ind: chosen_inds.remove(loc_to_chosen_ind[bb]) loc_to_chosen_ind[bb] = random_ind chosen_inds.add(random_ind) data_lock.release() seq_inds[bb, 0] = 0 seq_inds[bb, 1] = len(sequences[bb]['color']) data_len = min(constants.NUM_UNROLLS, seq_inds[bb, 1] - seq_inds[bb, 0]) ind0 = seq_inds[bb, 0] ind1 = seq_inds[bb, 0] + data_len data = sequences[bb]['color'][ind0:ind1] action = sequences[bb]['action'][ind0:ind1] labels = sequences[bb]['label'][ind0:ind1] memory = sequences[bb]['memory'].copy() gru_state = sequences[bb]['gru_state'].copy() pose = sequences[bb]['pose'][ind0:ind1] goal_pose = sequences[bb]['goal_pose'] mask = sequences[bb]['weight'][ind0:ind1] pose_indicator = sequences[bb]['pose_indicator'][ind0:ind1] possible_label = sequences[bb]['possible_label'][ind0:ind1] if constants.DRAWING: batch_debug_images.append( sequences[bb]['debug_images'][ind0:ind1]) if data_len < (constants.NUM_UNROLLS): seq_inds[bb, :] = 0 data.extend([ np.zeros_like(data[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) action.extend([ np.zeros_like(action[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) labels.extend([ np.zeros_like(labels[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) pose.extend([ pose[-1] for _ in range(constants.NUM_UNROLLS - data_len) ]) mask.extend([ np.zeros_like(mask[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) pose_indicator.extend([ np.zeros_like(pose_indicator[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) possible_label.extend([ np.zeros_like(possible_label[0]) for _ in range(constants.NUM_UNROLLS - data_len) ]) else: seq_inds[bb, 0] += constants.NUM_UNROLLS batch_data.append(data) batch_action.append(action) batch_memory.append(memory) batch_gru_state.append(gru_state) batch_pose.append(pose) batch_goal_pose.append(goal_pose) batch_labels.append(labels) batch_mask.append(mask) batch_pose_indicator.append(pose_indicator) batch_possible_label.append(possible_label) feed_dict = { network.image_placeholder: np.ascontiguousarray(batch_data), network.action_placeholder: np.ascontiguousarray(batch_action), network.gru_placeholder: np.ascontiguousarray(batch_gru_state), network.pose_placeholder: np.ascontiguousarray(batch_pose), network.goal_pose_placeholder: np.ascontiguousarray(batch_goal_pose), network.labels_placeholder: np.ascontiguousarray(batch_labels)[..., np.newaxis], network.mask_placeholder: np.ascontiguousarray(batch_mask), network.pose_indicator_placeholder: np.ascontiguousarray(batch_pose_indicator), network.possible_label_placeholder: np.ascontiguousarray(batch_possible_label), network.memory_placeholders: np.ascontiguousarray(batch_memory), } dataTEnd = time.time() summaryTime = 0 if constants.DEBUG or constants.DRAWING: outputs = sess.run([ training_step, network.loss, network.gru_state, network.patch_weights_sigm, network.gru_outputs_full, network.is_possible_sigm, network.pose_indicator_placeholder, network.terminal_patches, network.gru_outputs ], feed_dict=feed_dict) else: if iteration == start_it + 10: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() outputs = sess.run([ training_step, network.loss, network.gru_state, summary_with_images, network.gru_outputs ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) loss_summary = outputs[3] summary_writer.add_run_metadata(run_metadata, 'step_%07d' % iteration) summary_writer.add_summary(loss_summary, iteration) summary_writer.flush() elif iteration % 10 == 0: if iteration % 100 == 0: outputs = sess.run([ training_step, network.loss, network.gru_state, summary_with_images, network.gru_outputs ], feed_dict=feed_dict) elif iteration % 10 == 0: outputs = sess.run([ training_step, network.loss, network.gru_state, loss_summary_op, network.gru_outputs ], feed_dict=feed_dict) loss_summary = outputs[3] summaryTStart = time.time() summary_writer.add_summary(loss_summary, iteration) summary_writer.flush() summaryTime = time.time() - summaryTStart else: outputs = sess.run([ training_step, network.loss, network.gru_state, network.gru_outputs ], feed_dict=feed_dict) gru_state_out = outputs[2] memory_out = outputs[-1] for mm in range(constants.BATCH_SIZE): sequences[mm]['memory'] = memory_out[mm, ...] sequences[mm]['gru_state'] = gru_state_out[mm, ...] loss = outputs[1] solverTEnd = time.time() if constants.DEBUG or constants.DRAWING: # Look at outputs patch_weights = outputs[3] is_possible = outputs[5] pose_indicator = outputs[6] terminal_patches = outputs[7] data_lock.acquire() for bb in range(constants.BATCH_SIZE): for tt in range(constants.NUM_UNROLLS): if batch_mask[bb][tt] == 0: break if constants.DRAWING: import cv2 import scipy.misc from utils import drawing curr_image = batch_data[bb][tt] label = np.flipud(batch_labels[bb][tt]) debug_images = batch_debug_images[bb][tt] color_image = debug_images['color'] state_image = debug_images['state_image'] label_memory_image = debug_images[ 'label_memory'][:, :, 0] label_memory_image_class = np.argmax( debug_images['label_memory'][:, :, 1:], axis=2) label_memory_image_class[0, 0] = constants.NUM_CLASSES label_patch = debug_images['label'] print('Possible pred %.3f' % is_possible[bb, tt]) print('Possible label %.3f' % batch_possible_label[bb][tt]) patch = np.flipud(patch_weights[bb, tt, ...]) patch_occupancy = patch[:, :, 0] print('occ', patch_occupancy) print('label', label) terminal_patch = np.flipud( np.sum(terminal_patches[bb, tt, ...], axis=2)) image_list = [ debug_images['color'], state_image, debug_images['label_memory'][:, :, 0], debug_images['memory_map'][:, :, 0], label[:, :], patch_occupancy, np.flipud(pose_indicator[bb, tt]), terminal_patch, ] image = drawing.subplot(image_list, 4, 2, constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT) cv2.imshow('image', image[:, :, ::-1]) cv2.waitKey(0) else: pdb.set_trace() data_lock.release() if not (constants.DEBUG or constants.DRAWING) and ( iteration % 500 == 0 or iteration == constants.MAX_TIME_STEP - 1): saverTStart = time.time() tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration) saverTEnd = time.time() print('Saver: %.3f' % (saverTEnd - saverTStart)) curr_it += 1 dataTimeTotal += dataTEnd - tStart summaryTimeTotal += summaryTime solverTimeTotal += solverTEnd - dataTEnd - summaryTime totalTimeTotal += time.time() - tStart if iteration == start_it or (iteration) % 10 == 0: print('Iteration: %d' % (iteration)) print('Loss: %.3f' % loss) print('Data: %.3f' % (dataTimeTotal / curr_it)) print('Solver: %.3f' % (solverTimeTotal / curr_it)) print('Summary: %.3f' % (summaryTimeTotal / curr_it)) print('Total: %.3f' % (totalTimeTotal / curr_it)) print('Current: %.3f\n' % ((time.time() - currentTimeStart) / min(10, curr_it))) except: import traceback traceback.print_exc() finally: # Save final model if not (constants.DEBUG or constants.DRAWING): tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
def run(): global global_t global dataset global data_ind if constants.OBJECT_DETECTION: from darknet_object_detection import detector detector.setup_detectors(constants.PARALLEL_SIZE) os.environ["CUDA_VISIBLE_DEVICES"] = str(constants.GPU_ID) try: with tf.variable_scope('global_network'): if constants.END_TO_END_BASELINE: global_network = EndToEndBaselineNetwork() else: global_network = QAPlannerNetwork(constants.RL_GRU_SIZE, 1, 1) global_network.create_net() if constants.USE_NAVIGATION_AGENT: with tf.variable_scope('nav_global_network') as net_scope: free_space_network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1) free_space_network.create_net() else: net_scope = None conv_var_list = [v for v in tf.trainable_variables() if 'conv' in v.name and 'weight' in v.name and (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1)] for var in conv_var_list: tf_util.conv_variable_summaries(var, scope=var.name.replace('/', '_')[:-2]) conv_image_summary = tf.summary.merge_all() # prepare session sess = tf_util.Session() # Instantiate singletons without scope. if constants.PREDICT_DEPTH: from depth_estimation_network import depth_estimator with tf.variable_scope('') as depth_scope: depth_estimator = depth_estimator.get_depth_estimator(sess) else: depth_scope = None training_threads = [] learning_rate_input = tf.placeholder(tf.float32, name='learning_rate') grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=constants.RMSP_ALPHA, momentum=0.0, epsilon=constants.RMSP_EPSILON, clip_norm=constants.GRAD_NORM_CLIP) for i in range(constants.PARALLEL_SIZE): training_thread = A3CTrainingThread( i, sess, learning_rate_input, grad_applier, constants.MAX_TIME_STEP, net_scope, depth_scope) training_threads.append(training_thread) if constants.RUN_TEST: testing_thread = A3CTestingThread(constants.PARALLEL_SIZE + 1, sess, net_scope, depth_scope) sess.run(tf.global_variables_initializer()) # Initialize pretrained weights after init. if constants.PREDICT_DEPTH: depth_estimator.load_weights() episode_reward_input = tf.placeholder(tf.float32, name='ep_reward') episode_length_input = tf.placeholder(tf.float32, name='ep_length') exist_answer_correct_input = tf.placeholder(tf.float32, name='exist_ans') count_answer_correct_input = tf.placeholder(tf.float32, name='count_ans') contains_answer_correct_input = tf.placeholder(tf.float32, name='contains_ans') percent_invalid_actions_input = tf.placeholder(tf.float32, name='inv') scalar_summaries = [ tf.summary.scalar("Episode Reward", episode_reward_input), tf.summary.scalar("Episode Length", episode_length_input), tf.summary.scalar("Percent Invalid Actions", percent_invalid_actions_input), ] exist_summary = tf.summary.scalar("Answer Correct Existence", exist_answer_correct_input), count_summary = tf.summary.scalar("Answer Correct Counting", count_answer_correct_input), contains_summary = tf.summary.scalar("Answer Correct Containing", contains_answer_correct_input), exist_summary_op = tf.summary.merge(scalar_summaries + [exist_summary]) count_summary_op = tf.summary.merge(scalar_summaries + [count_summary]) contains_summary_op = tf.summary.merge(scalar_summaries + [contains_summary]) summary_ops = [exist_summary_op, count_summary_op, contains_summary_op] summary_placeholders = { "episode_reward_input": episode_reward_input, "episode_length_input": episode_length_input, "exist_answer_correct_input": exist_answer_correct_input, "count_answer_correct_input": count_answer_correct_input, "contains_answer_correct_input": contains_answer_correct_input, "percent_invalid_actions_input" : percent_invalid_actions_input, } if constants.RUN_TEST: test_episode_reward_input = tf.placeholder(tf.float32, name='test_ep_reward') test_episode_length_input = tf.placeholder(tf.float32, name='test_ep_length') test_exist_answer_correct_input = tf.placeholder(tf.float32, name='test_exist_ans') test_count_answer_correct_input = tf.placeholder(tf.float32, name='test_count_ans') test_contains_answer_correct_input = tf.placeholder(tf.float32, name='test_contains_ans') test_percent_invalid_actions_input = tf.placeholder(tf.float32, name='test_inv') test_scalar_summaries = [ tf.summary.scalar("Test Episode Reward", test_episode_reward_input), tf.summary.scalar("Test Episode Length", test_episode_length_input), tf.summary.scalar("Test Percent Invalid Actions", test_percent_invalid_actions_input), ] exist_summary = tf.summary.scalar("Test Answer Correct Existence", test_exist_answer_correct_input), count_summary = tf.summary.scalar("Test Answer Correct Counting", test_count_answer_correct_input), contains_summary = tf.summary.scalar("Test Answer Correct Containing", test_contains_answer_correct_input), test_exist_summary_op = tf.summary.merge(test_scalar_summaries + [exist_summary]) test_count_summary_op = tf.summary.merge(test_scalar_summaries + [count_summary]) test_contains_summary_op = tf.summary.merge(test_scalar_summaries + [contains_summary]) test_summary_ops = [test_exist_summary_op, test_count_summary_op, test_contains_summary_op] if not constants.DEBUG: time_str = py_util.get_time_str() summary_writer = tf.summary.FileWriter(os.path.join(constants.LOG_FILE, time_str), sess.graph) else: summary_writer = None # init or load checkpoint with saver vars_to_save = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='global_network') saver = tf.train.Saver(vars_to_save, max_to_keep=3) print('-------------- Looking for checkpoints in ', constants.CHECKPOINT_DIR) global_t = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR) if constants.USE_NAVIGATION_AGENT: print('now trying to restore nav model') tf_util.restore_from_dir(sess, 'logs/checkpoints/navigation', True) sess.graph.finalize() for i in range(constants.PARALLEL_SIZE): sess.run(training_threads[i].sync) training_threads[i].agent.reset() times = [] if not constants.DEBUG and constants.RECORD_FEED_DICT: import h5py NUM_RECORD = 10000 * len(constants.USED_QUESTION_TYPES) time_str = py_util.get_time_str() if not os.path.exists('question_data_dump'): os.mkdir('question_data_dump') dataset = h5py.File('question_data_dump/maps_' + time_str + '.h5', 'w') dataset.create_dataset('question_data/existence_answer_placeholder', (NUM_RECORD, 1), dtype=np.int32) dataset.create_dataset('question_data/counting_answer_placeholder', (NUM_RECORD, 1), dtype=np.int32) dataset.create_dataset('question_data/question_type_placeholder', (NUM_RECORD, 1), dtype=np.int32) dataset.create_dataset('question_data/question_object_placeholder', (NUM_RECORD, 1), dtype=np.int32) dataset.create_dataset('question_data/question_container_placeholder', (NUM_RECORD, 21), dtype=np.int32) dataset.create_dataset('question_data/pose_placeholder', (NUM_RECORD, 3), dtype=np.int32) dataset.create_dataset('question_data/image_placeholder', (NUM_RECORD, 300, 300, 3), dtype=np.uint8) dataset.create_dataset('question_data/map_mask_placeholder', (NUM_RECORD, constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH, 27), dtype=np.uint8) dataset.create_dataset('question_data/meta_action_placeholder', (NUM_RECORD, 7), dtype=np.int32) dataset.create_dataset('question_data/possible_move_placeholder', (NUM_RECORD, 31), dtype=np.int32) dataset.create_dataset('question_data/answer_weight', (NUM_RECORD, 1), dtype=np.int32) dataset.create_dataset('question_data/taken_action', (NUM_RECORD, 32), dtype=np.int32) dataset.create_dataset('question_data/new_episode', (NUM_RECORD, 1), dtype=np.int32) time_lock = threading.Lock() data_ind = 0 def train_function(parallel_index): global global_t global dataset global data_ind print('----------------------------------------thread', parallel_index, 'global_t', global_t) training_thread = training_threads[parallel_index] last_global_t = global_t last_global_t_image = global_t while global_t < constants.MAX_TIME_STEP: diff_global_t, ep_length, ep_reward, num_unrolls, feed_dict = training_thread.process(global_t, summary_writer, summary_ops, summary_placeholders) time_lock.acquire() if not constants.DEBUG and constants.RECORD_FEED_DICT: print(' NEW ENTRY: %d %s' % (data_ind, training_thread.agent.game_state.scene_name)) dataset['question_data/new_episode'][data_ind] = 1 for k, v in feed_dict.items(): key = 'question_data/' + k.name.split('/')[-1].split(':')[0] if any([s in key for s in { 'gru_placeholder', 'num_unrolls', 'reward_placeholder', 'td_placeholder', 'learning_rate', 'phi_hat_prev_placeholder', 'next_map_mask_placeholder', 'next_pose_placeholder', 'episode_length_placeholder', 'question_count_placeholder', 'supervised_action_labels', 'supervised_action_weights_sigmoid', 'supervised_action_weights', 'question_direction_placeholder', }]): continue v = np.ascontiguousarray(v) if v.shape[0] == 1: v = v[0, ...] if len(v.shape) == 1 and num_unrolls > 1: v = v[:, np.newaxis] if 'map_mask_placeholder' in key: v[:, :, :, :2] *= constants.STEPS_AHEAD v[:, :, :, :2] += 2 data_loc = dataset[key][data_ind:data_ind + num_unrolls, ...] data_len = data_loc.shape[0] dataset[key][data_ind:data_ind + num_unrolls, ...] = v[:data_len, ...] dataset.flush() data_ind += num_unrolls if data_ind >= NUM_RECORD: # Everything is done dataset.close() raise Exception('Fake exception to exit the process. Don\'t worry, everything is fine.') if ep_length > 0: times.append((ep_length, ep_reward)) print('Num episodes', len(times), 'Episode means', np.mean(times, axis=0)) time_lock.release() global_t += diff_global_t # periodically save checkpoints to disk if not (constants.DEBUG or constants.RECORD_FEED_DICT) and parallel_index == 0: if global_t - last_global_t_image > 10000: print('Ran conv image summary') summary_im_str = sess.run(conv_image_summary) summary_writer.add_summary(summary_im_str, global_t) last_global_t_image = global_t if global_t - last_global_t > 10000: print('Save checkpoint at timestamp %d' % global_t) tf_util.save(saver, sess, constants.CHECKPOINT_DIR, global_t) last_global_t = global_t def test_function(): global global_t last_global_t_test = 0 while global_t < constants.MAX_TIME_STEP: time.sleep(1) if global_t - last_global_t_test > 10000: # RUN TEST sess.run(testing_thread.sync) from game_state import QuestionGameState if testing_thread.agent.game_state is None: testing_thread.agent.game_state = QuestionGameState(sess=sess) for q_type in constants.USED_QUESTION_TYPES: answers_correct = 0.0 ep_lengths = 0.0 ep_rewards = 0.0 invalid_percents = 0.0 rows = list(range(len(testing_thread.agent.game_state.test_datasets[q_type]))) random.shuffle(rows) rows = rows[:16] print('()()()()()()()rows', rows) for rr,row in enumerate(rows): answer_correct, answer, gt_answer, ep_length, ep_reward, invalid_percent, scene_num, seed, required_interaction = testing_thread.process((row, q_type)) answers_correct += int(answer_correct) ep_lengths += ep_length ep_rewards += ep_reward invalid_percents += invalid_percent print('############################### TEST ITERATION ##################################') print('ep ', (rr + 1)) print('average correct', answers_correct / (rr + 1)) print('#################################################################################') answers_correct /= len(rows) ep_lengths /= len(rows) ep_rewards /= len(rows) invalid_percents /= len(rows) # Write the summary test_summary_str = sess.run(test_summary_ops[q_type], feed_dict={ test_episode_reward_input : ep_rewards, test_episode_length_input : ep_lengths, test_exist_answer_correct_input : answers_correct, test_count_answer_correct_input : answers_correct, test_contains_answer_correct_input : answers_correct, test_percent_invalid_actions_input : invalid_percents, }) summary_writer.add_summary(test_summary_str, global_t) summary_writer.flush() last_global_t_test = global_t testing_thread.agent.game_state.env.stop() testing_thread.agent.game_state = None train_threads = [] for i in range(constants.PARALLEL_SIZE): train_threads.append(threading.Thread(target=train_function, args=(i,))) train_threads[i].daemon = True for t in train_threads: t.start() if constants.RUN_TEST: test_thread = threading.Thread(target=test_function) test_thread.daemon = True test_thread.start() for t in train_threads: t.join() if constants.RUN_TEST: test_thread.join() if not constants.DEBUG: if not os.path.exists(constants.CHECKPOINT_DIR): os.makedirs(constants.CHECKPOINT_DIR) saver.save(sess, constants.CHECKPOINT_DIR + '/' + 'checkpoint', global_step = global_t) summary_writer.close() print('Saved.') except KeyboardInterrupt: print('Press Ctrl+C to stop') except: import traceback traceback.print_exc() finally: if not constants.DEBUG: print('Now saving data. Please wait') tf_util.save(saver, sess, constants.CHECKPOINT_DIR, global_t) summary_writer.close() print('Saved.')
def main(): if constants.OBJECT_DETECTION: from darknet_object_detection import detector detector.setup_detectors(constants.PARALLEL_SIZE) with tf.device('/gpu:' + str(constants.GPU_ID)): with tf.variable_scope('global_network'): if constants.END_TO_END_BASELINE: global_network = EndToEndBaselineNetwork() else: global_network = QAPlannerNetwork(constants.RL_GRU_SIZE, 1, 1) global_network.create_net() if constants.USE_NAVIGATION_AGENT: with tf.variable_scope('nav_global_network') as net_scope: free_space_network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1) free_space_network.create_net() else: net_scope = None # prepare session sess = tf_util.Session() if constants.PREDICT_DEPTH: from depth_estimation_network import depth_estimator with tf.variable_scope('') as depth_scope: depth_estimator = depth_estimator.get_depth_estimator(sess) else: depth_scope = None sess.run(tf.global_variables_initializer()) # Initialize pretrained weights after init. if constants.PREDICT_DEPTH: depth_estimator.load_weights() testing_threads = [] for i in range(constants.PARALLEL_SIZE): testing_thread = A3CTestingThread(i, sess, net_scope, depth_scope) testing_threads.append(testing_thread) tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR, True) if constants.USE_NAVIGATION_AGENT: print('now trying to restore nav model') tf_util.restore_from_dir( sess, os.path.join(constants.CHECKPOINT_PREFIX, 'navigation'), True) sess.graph.finalize() question_types = constants.USED_QUESTION_TYPES rows = [] for q_type in question_types: curr_rows = list( range(len(testing_thread.agent.game_state.test_datasets[q_type]))) #curr_rows = list(range(8)) rows.extend(list(zip(curr_rows, [q_type] * len(curr_rows)))) random.shuffle(rows) answers_correct = [] ep_lengths = [] ep_rewards = [] invalid_percents = [] time_lock = threading.Lock() if not os.path.exists(constants.LOG_FILE): os.makedirs(constants.LOG_FILE) out_file = open( constants.LOG_FILE + '/results_' + constants.TEST_SET + '_' + py_util.get_time_str() + '.csv', 'w') out_file.write(constants.LOG_FILE + '\n') out_file.write( 'question_type, answer_correct, answer, gt_answer, episode_length, invalid_action_percent, scene number, seed, required_interaction\n' ) def test_function(thread_ind): testing_thread = testing_threads[thread_ind] sess.run(testing_thread.sync) #from game_state import QuestionGameState #if testing_thread.agent.game_state is None: #testing_thread.agent.game_state = QuestionGameState(sess=sess) while len(rows) > 0: time_lock.acquire() if len(rows) == 0: break row = rows.pop() time_lock.release() answer_correct, answer, gt_answer, ep_length, ep_reward, invalid_percent, scene_num, seed, required_interaction = testing_thread.process( row) question_type = row[1] + 1 time_lock.acquire() output_str = ( '%d, %d, %d, %d, %d, %f, %d, %d, %d\n' % (question_type, answer_correct, answer, gt_answer, ep_length, invalid_percent, scene_num, seed, required_interaction)) out_file.write(output_str) out_file.flush() answers_correct.append(int(answer_correct)) ep_lengths.append(ep_length) ep_rewards.append(ep_reward) invalid_percents.append(invalid_percent) print('###############################') print('ep ', row) print('num episodes', len(answers_correct)) print('average correct', np.mean(answers_correct)) print('invalid percents', np.mean(invalid_percents), np.median(invalid_percents)) print('###############################') time_lock.release() test_threads = [] for i in range(constants.PARALLEL_SIZE): test_threads.append(threading.Thread(target=test_function, args=(i, ))) for t in test_threads: t.start() for t in test_threads: t.join() out_file.close()
class GraphAgent(object): def __init__(self, sess, reuse=True, num_unrolls=1, game_state=None, net_scope=None): if net_scope is None: with tf.name_scope('agent'): with tf.variable_scope('nav_global_network', reuse=reuse): self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, num_unrolls) self.network.create_net() else: with tf.variable_scope(net_scope, reuse=True): self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1) self.network.create_net(add_loss=False) if game_state is None: self.game_state = GameState(sess=sess) else: self.game_state = game_state self.action_util = self.game_state.action_util self.gt_graph = None self.sess = sess self.num_steps = 0 self.global_step_id = 0 self.num_unrolls = num_unrolls self.pose_indicator = np.zeros( (constants.TERMINAL_CHECK_PADDING * 2 + 1, constants.TERMINAL_CHECK_PADDING * 2 + 1)) self.times = np.zeros(2) def goto(self, action, step_num): # Look down start_angle = self.game_state.pose[3] if start_angle != 60: look_action = { 'action': 'TeleportFull', 'x': self.game_state.pose[0] * constants.AGENT_STEP_SIZE, 'y': self.game_state.agent_height, 'z': self.game_state.pose[1] * constants.AGENT_STEP_SIZE, 'rotateOnTeleport': True, 'rotation': self.game_state.pose[2] * 90, 'horizon': 60, } super(QuestionGameState, self.game_state).step(look_action) self.game_state.end_point = (action['x'], action['z'], action['rotation'] / 90) self.goal_pose = np.array([ self.game_state.end_point[0] - self.game_state.graph.xMin, self.game_state.end_point[1] - self.game_state.graph.yMin ], dtype=np.int32)[:2] self.pose = self.game_state.pose self.inference() plan, path = self.get_plan() steps = 0 invalid_actions = 0 self.reset(self.game_state.scene_name) self.game_state.board = None while steps < 20 and len( plan) > 0 and self.is_possible >= constants.POSSIBLE_THRESH: t_start = time.time() action = plan[0] self.step(action) invalid_actions += 1 - int( self.game_state.event.metadata['lastActionSuccess']) plan, path = self.get_plan() steps += 1 if constants.DRAWING: image = self.draw_state() if not os.path.exists('visualizations/images'): os.makedirs('visualizations/images') cv2.imwrite( 'visualizations/images/state_%05d.jpg' % (step_num + steps), image[:, :, ::-1]) self.times[0] += time.time() - t_start print('step time %.3f' % (self.times[0] / max(steps, 1))) self.times[0] = 0 # Look back if start_angle != 60: look_action = { 'action': 'TeleportFull', 'x': self.game_state.pose[0] * constants.AGENT_STEP_SIZE, 'y': self.game_state.agent_height, 'z': self.game_state.pose[1] * constants.AGENT_STEP_SIZE, 'rotateOnTeleport': True, 'rotation': self.game_state.pose[2] * 90, 'horizon': start_angle, } super(QuestionGameState, self.game_state).step(look_action) return steps, invalid_actions def inference(self): image = self.game_state.s_t[np.newaxis, np.newaxis, ...] self.pose_indicator = np.zeros( (constants.TERMINAL_CHECK_PADDING * 2 + 1, constants.TERMINAL_CHECK_PADDING * 2 + 1)) if (abs(self.pose[0] - self.game_state.end_point[0]) <= constants.TERMINAL_CHECK_PADDING and abs(self.pose[1] - self.game_state.end_point[1]) <= constants.TERMINAL_CHECK_PADDING): self.pose_indicator[self.pose[1] - self.game_state.end_point[1] + constants.TERMINAL_CHECK_PADDING, self.pose[0] - self.game_state.end_point[0] + constants.TERMINAL_CHECK_PADDING] = 1 self.feed_dict = { self.network.image_placeholder: image, self.network.action_placeholder: self.action[np.newaxis, np.newaxis, :], self.network.pose_placeholder: np.array(self.gt_graph.get_shifted_pose( self.pose))[np.newaxis, np.newaxis, :3], self.network.memory_placeholders: self.memory[np.newaxis, ...], self.network.gru_placeholder: self.gru_state, self.network.pose_indicator_placeholder: self.pose_indicator[np.newaxis, np.newaxis, ...], self.network.goal_pose_placeholder: self.goal_pose[np.newaxis, ...], } if self.num_unrolls is None: self.feed_dict[self.network.num_unrolls] = 1 outputs = self.sess.run([ self.network.patch_weights_clipped, self.network.gru_state, self.network.occupancy, self.network.gru_outputs_full, self.network.is_possible_sigm, ], feed_dict=self.feed_dict) self.map_weights = outputs[0][0, 0, ...] self.game_state.graph.update_graph( (self.map_weights, [1 + graph_obj.EPSILON]), self.pose, rows=[0]) self.gru_state = outputs[1] self.occupancy = outputs[2][0, :self.bounds[3], :self.bounds[2], 0] * ( self.game_state.graph.memory[:, :, 0] > 1) self.memory = outputs[3][0, 0, ...] self.is_possible = outputs[4][0, 0] def reset(self, scene_name=None, seed=None): if scene_name is not None: if self.game_state.env is not None and type( self.game_state) == GameState: self.game_state.reset(scene_name, use_gt=False, seed=seed) self.gt_graph = graph_obj.Graph('layouts/%s-layout.npy' % scene_name, use_gt=True) self.bounds = [ self.game_state.graph.xMin, self.game_state.graph.yMin, self.game_state.graph.xMax - self.game_state.graph.xMin + 1, self.game_state.graph.yMax - self.game_state.graph.yMin + 1 ] if len(self.game_state.end_point) == 0: self.game_state.end_point = (self.game_state.graph.xMin + constants.TERMINAL_CHECK_PADDING, self.game_state.graph.yMin + constants.TERMINAL_CHECK_PADDING, 0) self.action = np.zeros(self.action_util.num_actions) self.memory = np.zeros( (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH, constants.MEMORY_SIZE)) self.gru_state = np.zeros((1, constants.GRU_SIZE)) self.pose = self.game_state.pose self.is_possible = 1 self.num_steps = 0 self.times = np.zeros(2) self.impossible_spots = set() self.visited_spots = set() else: self.game_state.reset() self.goal_pose = np.array([ self.game_state.end_point[0] - self.game_state.graph.xMin, self.game_state.end_point[1] - self.game_state.graph.yMin ], dtype=np.int32)[:2] self.inference() def step(self, action): t_start = time.time() if type(self.game_state) == GameState: self.game_state.step(action) else: super(QuestionGameState, self.game_state).step(action) self.times[1] += time.time() - t_start if self.num_steps % 100 == 0: print('game state step time %.3f' % (self.times[1] / (self.num_steps + 1))) self.pose = self.game_state.pose self.action[:] = 0 self.action[self.action_util.action_dict_to_ind(action)] = 1 self.inference() self.num_steps += 1 self.global_step_id += 1 if not self.game_state.event.metadata['lastActionSuccess']: # Can't traverse here, make sure the weight is correct. if self.pose[2] == 0: self.gt_graph.update_weight(self.pose[0], self.pose[1] + 1, graph_obj.MAX_WEIGHT) spot = (self.pose[0], self.pose[1] + 1) elif self.pose[2] == 1: self.gt_graph.update_weight(self.pose[0] + 1, self.pose[1], graph_obj.MAX_WEIGHT) spot = (self.pose[0] + 1, self.pose[1]) elif self.pose[2] == 2: self.gt_graph.update_weight(self.pose[0], self.pose[1] - 1, graph_obj.MAX_WEIGHT) spot = (self.pose[0], self.pose[1] - 1) elif self.pose[2] == 3: self.gt_graph.update_weight(self.pose[0] - 1, self.pose[1], graph_obj.MAX_WEIGHT) spot = (self.pose[0] - 1, self.pose[1]) self.impossible_spots.add(spot) else: self.visited_spots.add((self.pose[0], self.pose[1])) for spot in self.impossible_spots: graph_max = self.gt_graph.memory[:, :, 0].max() self.game_state.graph.update_weight(spot[0], spot[1], graph_max) self.occupancy[spot[1], spot[0]] = 1 def get_plan(self): self.plan, self.path = self.game_state.graph.get_shortest_path( self.pose, self.game_state.end_point) return self.plan, self.path def get_label(self): patch, curr_point = self.gt_graph.get_graph_patch(self.pose) patch = patch[:, :, 0] patch[patch < 2] = 0 patch[patch > 1] = 1 return patch def draw_state(self, return_list=False): if not constants.DRAWING: return from utils import drawing curr_image = self.game_state.detection_image.copy() curr_depth = self.game_state.s_t_depth if curr_depth is not None: curr_depth = self.game_state.s_t_depth.copy() curr_depth[0, 0] = 0 curr_depth[0, 1] = constants.MAX_DEPTH label = np.flipud(self.get_label()) patch = np.flipud(self.game_state.graph.get_graph_patch(self.pose)[0]) state_image = self.game_state.draw_state().copy() memory_map = np.flipud(self.game_state.graph.memory.copy()) memory_map = np.concatenate( (memory_map[:, :, [0]], np.zeros( memory_map[:, :, [0]].shape), memory_map[:, :, 1:]), axis=2) images = [ curr_image, state_image, np.minimum(memory_map[:, :, 0], 200), np.argmax(memory_map[:, :, 1:], axis=2), label[:, :], np.minimum(patch[:, :, 0], 10), ] if return_list: return images action_str = 'action: %s possible %.3f' % ( self.action_util.actions[np.where( self.action == 1)[0].squeeze()]['action'], self.is_possible) titles = [ '%07d' % self.num_steps, action_str, 'Occupancy Map', 'Objects Map', 'Label Patch', 'Learned Patch' ] image = drawing.subplot(images, 4, 3, curr_image.shape[1], curr_image.shape[0], titles=titles, border=3) return image
goal_pose = np.array([ self.game_state.end_point[0] - self.game_state.graph.xMin, self.game_state.end_point[1] - self.game_state.graph.yMin ], dtype=np.int32)[:2] return (self.states, self.bounds, goal_pose) if __name__ == '__main__': from networks.free_space_network import FreeSpaceNetwork from utils import tf_util import tensorflow as tf sess = tf_util.Session() with tf.variable_scope('nav_global_network'): network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1) network.create_net() sess.run(tf.global_variables_initializer()) start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR) import cv2 sequence_generator = SequenceGenerator(sess) sequence_generator.planner_prob = 1 counter = 0 while True: states, bounds, goal_pose = sequence_generator.generate_episode() images = sequence_generator.debug_images for im_dict in images: counter += 1