Пример #1
0
    def __init__(self,
                 sess,
                 reuse=True,
                 num_unrolls=1,
                 game_state=None,
                 net_scope=None):
        if net_scope is None:
            with tf.name_scope('agent'):
                with tf.variable_scope('nav_global_network', reuse=reuse):
                    self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1,
                                                    num_unrolls)
                    self.network.create_net()
        else:
            with tf.variable_scope(net_scope, reuse=True):
                self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1)
                self.network.create_net(add_loss=False)

        if game_state is None:
            self.game_state = GameState(sess=sess)
        else:
            self.game_state = game_state
        self.action_util = self.game_state.action_util
        self.gt_graph = None
        self.sess = sess
        self.num_steps = 0
        self.global_step_id = 0
        self.num_unrolls = num_unrolls
        self.pose_indicator = np.zeros(
            (constants.TERMINAL_CHECK_PADDING * 2 + 1,
             constants.TERMINAL_CHECK_PADDING * 2 + 1))
        self.times = np.zeros(2)
Пример #2
0
def run():
    try:
        with tf.variable_scope('nav_global_network'):
            network = FreeSpaceNetwork(constants.GRU_SIZE,
                                       constants.BATCH_SIZE,
                                       constants.NUM_UNROLLS)
            network.create_net()
            training_step = network.training_op

        with tf.variable_scope('loss'):
            loss_summary_op = tf.summary.merge([
                tf.summary.scalar('loss', network.loss),
            ])
        summary_full = tf.summary.merge_all()
        conv_var_list = [
            v for v in tf.trainable_variables()
            if 'conv' in v.name and 'weight' in v.name and
            (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1
             )
        ]
        for var in conv_var_list:
            tf_util.conv_variable_summaries(var,
                                            scope=var.name.replace('/',
                                                                   '_')[:-2])
        summary_with_images = tf.summary.merge_all()

        # prepare session
        sess = tf_util.Session()

        seq_inds = np.zeros((constants.BATCH_SIZE, 2), dtype=np.int32)

        sequence_generators = []
        for thread_index in range(constants.PARALLEL_SIZE):
            gpus = str(constants.GPU_ID).split(',')
            sequence_generator = SequenceGenerator(sess)
            sequence_generators.append(sequence_generator)

        sess.run(tf.global_variables_initializer())

        if not (constants.DEBUG or constants.DRAWING):
            from utils import py_util
            time_str = py_util.get_time_str()
            summary_writer = tf.summary.FileWriter(
                os.path.join(constants.LOG_FILE, time_str), sess.graph)
        else:
            summary_writer = None

        saver = tf.train.Saver(max_to_keep=3)

        # init or load checkpoint
        start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR)

        sess.graph.finalize()

        data_lock = threading.Lock()

        def load_new_data(thread_index):
            global data_buffer
            global data_counts

            sequence_generator = sequence_generators[thread_index]
            counter = 0
            while True:
                while not (len(data_buffer) < constants.REPLAY_BUFFER_SIZE
                           or np.max(data_counts) > 0):
                    time.sleep(1)
                counter += 1
                if constants.DEBUG:
                    print('\nThread %d' % thread_index)
                new_data, bounds, goal_pose = sequence_generator.generate_episode(
                )
                new_data = {
                    key: ([new_data[ii][key] for ii in range(len(new_data))])
                    for key in new_data[0]
                }
                new_data['goal_pose'] = goal_pose
                new_data['memory'] = np.zeros(
                    (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH,
                     constants.MEMORY_SIZE))
                new_data['gru_state'] = np.zeros(constants.GRU_SIZE)
                if constants.DRAWING:
                    new_data['debug_images'] = sequence_generator.debug_images
                data_lock.acquire()
                if len(data_buffer) < constants.REPLAY_BUFFER_SIZE:
                    data_counts[len(data_buffer)] = 0
                    data_buffer.append(new_data)
                    counts = data_counts[:len(data_buffer)]
                    if counter % 10 == 0:
                        print(
                            'Buffer size %d  Num used %d  Max used amount %d' %
                            (len(data_buffer), len(
                                counts[counts > 0]), np.max(counts)))
                else:
                    max_count_ind = np.argmax(data_counts)
                    data_buffer[max_count_ind] = new_data
                    data_counts[max_count_ind] = 0
                    if counter % 10 == 0:
                        print('Num used %d  Max used amount %d' %
                              (len(data_counts[data_counts > 0]),
                               np.max(data_counts)))
                data_lock.release()

        threads = []
        for i in range(constants.PARALLEL_SIZE):
            load_data_thread = threading.Thread(target=load_new_data,
                                                args=(i, ))
            load_data_thread.daemon = True
            load_data_thread.start()
            threads.append(load_data_thread)
            time.sleep(1)

        sequences = [None] * constants.BATCH_SIZE

        curr_it = 0
        dataTimeTotal = 0.00001
        solverTimeTotal = 0.00001
        summaryTimeTotal = 0.00001
        totalTimeTotal = 0.00001

        chosen_inds = set()
        loc_to_chosen_ind = {}
        for iteration in range(start_it, constants.MAX_TIME_STEP):
            if iteration == start_it or iteration % 10 == 1:
                currentTimeStart = time.time()
            tStart = time.time()
            batch_data = []
            batch_action = []
            batch_memory = []
            batch_gru_state = []
            batch_labels = []
            batch_pose = []
            batch_mask = []
            batch_goal_pose = []
            batch_pose_indicator = []
            batch_possible_label = []
            batch_debug_images = []
            for bb in range(constants.BATCH_SIZE):
                if seq_inds[bb, 0] == seq_inds[bb, 1]:
                    # Pick a new random sequence
                    pickable_inds = set(
                        np.where(data_counts < 100)[0]) - chosen_inds
                    count_size = len(pickable_inds)
                    while count_size == 0:
                        pickable_inds = set(
                            np.where(data_counts < 100)[0]) - chosen_inds
                        count_size = len(pickable_inds)
                        time.sleep(1)
                    random_ind = random.sample(pickable_inds, 1)[0]
                    data_lock.acquire()
                    sequences[bb] = data_buffer[random_ind]
                    goal_pose = sequences[bb]['goal_pose']
                    sequences[bb]['memory'] = np.zeros(
                        (constants.SPATIAL_MAP_HEIGHT,
                         constants.SPATIAL_MAP_WIDTH, constants.MEMORY_SIZE))
                    sequences[bb]['gru_state'] = np.zeros(constants.GRU_SIZE)
                    data_counts[random_ind] += 1
                    if bb in loc_to_chosen_ind:
                        chosen_inds.remove(loc_to_chosen_ind[bb])
                    loc_to_chosen_ind[bb] = random_ind
                    chosen_inds.add(random_ind)
                    data_lock.release()
                    seq_inds[bb, 0] = 0
                    seq_inds[bb, 1] = len(sequences[bb]['color'])
                data_len = min(constants.NUM_UNROLLS,
                               seq_inds[bb, 1] - seq_inds[bb, 0])
                ind0 = seq_inds[bb, 0]
                ind1 = seq_inds[bb, 0] + data_len
                data = sequences[bb]['color'][ind0:ind1]
                action = sequences[bb]['action'][ind0:ind1]
                labels = sequences[bb]['label'][ind0:ind1]
                memory = sequences[bb]['memory'].copy()
                gru_state = sequences[bb]['gru_state'].copy()
                pose = sequences[bb]['pose'][ind0:ind1]
                goal_pose = sequences[bb]['goal_pose']
                mask = sequences[bb]['weight'][ind0:ind1]
                pose_indicator = sequences[bb]['pose_indicator'][ind0:ind1]
                possible_label = sequences[bb]['possible_label'][ind0:ind1]
                if constants.DRAWING:
                    batch_debug_images.append(
                        sequences[bb]['debug_images'][ind0:ind1])
                if data_len < (constants.NUM_UNROLLS):
                    seq_inds[bb, :] = 0
                    data.extend([
                        np.zeros_like(data[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    action.extend([
                        np.zeros_like(action[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    labels.extend([
                        np.zeros_like(labels[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    pose.extend([
                        pose[-1]
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    mask.extend([
                        np.zeros_like(mask[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    pose_indicator.extend([
                        np.zeros_like(pose_indicator[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                    possible_label.extend([
                        np.zeros_like(possible_label[0])
                        for _ in range(constants.NUM_UNROLLS - data_len)
                    ])
                else:
                    seq_inds[bb, 0] += constants.NUM_UNROLLS
                batch_data.append(data)
                batch_action.append(action)
                batch_memory.append(memory)
                batch_gru_state.append(gru_state)
                batch_pose.append(pose)
                batch_goal_pose.append(goal_pose)
                batch_labels.append(labels)
                batch_mask.append(mask)
                batch_pose_indicator.append(pose_indicator)
                batch_possible_label.append(possible_label)

            feed_dict = {
                network.image_placeholder:
                np.ascontiguousarray(batch_data),
                network.action_placeholder:
                np.ascontiguousarray(batch_action),
                network.gru_placeholder:
                np.ascontiguousarray(batch_gru_state),
                network.pose_placeholder:
                np.ascontiguousarray(batch_pose),
                network.goal_pose_placeholder:
                np.ascontiguousarray(batch_goal_pose),
                network.labels_placeholder:
                np.ascontiguousarray(batch_labels)[..., np.newaxis],
                network.mask_placeholder:
                np.ascontiguousarray(batch_mask),
                network.pose_indicator_placeholder:
                np.ascontiguousarray(batch_pose_indicator),
                network.possible_label_placeholder:
                np.ascontiguousarray(batch_possible_label),
                network.memory_placeholders:
                np.ascontiguousarray(batch_memory),
            }
            dataTEnd = time.time()
            summaryTime = 0
            if constants.DEBUG or constants.DRAWING:
                outputs = sess.run([
                    training_step, network.loss, network.gru_state,
                    network.patch_weights_sigm, network.gru_outputs_full,
                    network.is_possible_sigm,
                    network.pose_indicator_placeholder,
                    network.terminal_patches, network.gru_outputs
                ],
                                   feed_dict=feed_dict)
            else:
                if iteration == start_it + 10:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    outputs = sess.run([
                        training_step, network.loss, network.gru_state,
                        summary_with_images, network.gru_outputs
                    ],
                                       feed_dict=feed_dict,
                                       options=run_options,
                                       run_metadata=run_metadata)
                    loss_summary = outputs[3]
                    summary_writer.add_run_metadata(run_metadata,
                                                    'step_%07d' % iteration)
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                elif iteration % 10 == 0:
                    if iteration % 100 == 0:
                        outputs = sess.run([
                            training_step, network.loss, network.gru_state,
                            summary_with_images, network.gru_outputs
                        ],
                                           feed_dict=feed_dict)
                    elif iteration % 10 == 0:
                        outputs = sess.run([
                            training_step, network.loss, network.gru_state,
                            loss_summary_op, network.gru_outputs
                        ],
                                           feed_dict=feed_dict)
                    loss_summary = outputs[3]
                    summaryTStart = time.time()
                    summary_writer.add_summary(loss_summary, iteration)
                    summary_writer.flush()
                    summaryTime = time.time() - summaryTStart
                else:
                    outputs = sess.run([
                        training_step, network.loss, network.gru_state,
                        network.gru_outputs
                    ],
                                       feed_dict=feed_dict)

            gru_state_out = outputs[2]
            memory_out = outputs[-1]
            for mm in range(constants.BATCH_SIZE):
                sequences[mm]['memory'] = memory_out[mm, ...]
                sequences[mm]['gru_state'] = gru_state_out[mm, ...]

            loss = outputs[1]
            solverTEnd = time.time()

            if constants.DEBUG or constants.DRAWING:
                # Look at outputs
                patch_weights = outputs[3]
                is_possible = outputs[5]
                pose_indicator = outputs[6]
                terminal_patches = outputs[7]
                data_lock.acquire()
                for bb in range(constants.BATCH_SIZE):
                    for tt in range(constants.NUM_UNROLLS):
                        if batch_mask[bb][tt] == 0:
                            break
                        if constants.DRAWING:
                            import cv2
                            import scipy.misc
                            from utils import drawing
                            curr_image = batch_data[bb][tt]
                            label = np.flipud(batch_labels[bb][tt])
                            debug_images = batch_debug_images[bb][tt]
                            color_image = debug_images['color']
                            state_image = debug_images['state_image']
                            label_memory_image = debug_images[
                                'label_memory'][:, :, 0]
                            label_memory_image_class = np.argmax(
                                debug_images['label_memory'][:, :, 1:], axis=2)
                            label_memory_image_class[0,
                                                     0] = constants.NUM_CLASSES

                            label_patch = debug_images['label']

                            print('Possible pred %.3f' % is_possible[bb, tt])
                            print('Possible label %.3f' %
                                  batch_possible_label[bb][tt])
                            patch = np.flipud(patch_weights[bb, tt, ...])
                            patch_occupancy = patch[:, :, 0]
                            print('occ', patch_occupancy)
                            print('label', label)
                            terminal_patch = np.flipud(
                                np.sum(terminal_patches[bb, tt, ...], axis=2))
                            image_list = [
                                debug_images['color'],
                                state_image,
                                debug_images['label_memory'][:, :, 0],
                                debug_images['memory_map'][:, :, 0],
                                label[:, :],
                                patch_occupancy,
                                np.flipud(pose_indicator[bb, tt]),
                                terminal_patch,
                            ]

                            image = drawing.subplot(image_list, 4, 2,
                                                    constants.SCREEN_WIDTH,
                                                    constants.SCREEN_HEIGHT)
                            cv2.imshow('image', image[:, :, ::-1])
                            cv2.waitKey(0)
                        else:
                            pdb.set_trace()
                data_lock.release()

            if not (constants.DEBUG or constants.DRAWING) and (
                    iteration % 500 == 0
                    or iteration == constants.MAX_TIME_STEP - 1):
                saverTStart = time.time()
                tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
                saverTEnd = time.time()
                print('Saver:     %.3f' % (saverTEnd - saverTStart))

            curr_it += 1

            dataTimeTotal += dataTEnd - tStart
            summaryTimeTotal += summaryTime
            solverTimeTotal += solverTEnd - dataTEnd - summaryTime
            totalTimeTotal += time.time() - tStart

            if iteration == start_it or (iteration) % 10 == 0:
                print('Iteration: %d' % (iteration))
                print('Loss:      %.3f' % loss)
                print('Data:      %.3f' % (dataTimeTotal / curr_it))
                print('Solver:    %.3f' % (solverTimeTotal / curr_it))
                print('Summary:   %.3f' % (summaryTimeTotal / curr_it))
                print('Total:     %.3f' % (totalTimeTotal / curr_it))
                print('Current:   %.3f\n' %
                      ((time.time() - currentTimeStart) / min(10, curr_it)))

    except:
        import traceback
        traceback.print_exc()
    finally:
        # Save final model
        if not (constants.DEBUG or constants.DRAWING):
            tf_util.save(saver, sess, constants.CHECKPOINT_DIR, iteration)
Пример #3
0
def run():
    global global_t
    global dataset
    global data_ind
    if constants.OBJECT_DETECTION:
        from darknet_object_detection import detector
        detector.setup_detectors(constants.PARALLEL_SIZE)

    os.environ["CUDA_VISIBLE_DEVICES"] = str(constants.GPU_ID)

    try:
        with tf.variable_scope('global_network'):
            if constants.END_TO_END_BASELINE:
                global_network = EndToEndBaselineNetwork()
            else:
                global_network = QAPlannerNetwork(constants.RL_GRU_SIZE, 1, 1)
            global_network.create_net()
        if constants.USE_NAVIGATION_AGENT:
            with tf.variable_scope('nav_global_network') as net_scope:
                free_space_network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1)
                free_space_network.create_net()
        else:
            net_scope = None

        conv_var_list = [v for v in tf.trainable_variables() if 'conv' in v.name and 'weight' in v.name and
                        (v.get_shape().as_list()[0] != 1 or v.get_shape().as_list()[1] != 1)]
        for var in conv_var_list:
            tf_util.conv_variable_summaries(var, scope=var.name.replace('/', '_')[:-2])
        conv_image_summary = tf.summary.merge_all()

        # prepare session
        sess = tf_util.Session()

        # Instantiate singletons without scope.
        if constants.PREDICT_DEPTH:
            from depth_estimation_network import depth_estimator
            with tf.variable_scope('') as depth_scope:
                depth_estimator = depth_estimator.get_depth_estimator(sess)
        else:
            depth_scope = None

        training_threads = []

        learning_rate_input = tf.placeholder(tf.float32, name='learning_rate')
        grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                decay=constants.RMSP_ALPHA,
                momentum=0.0,
                epsilon=constants.RMSP_EPSILON,
                clip_norm=constants.GRAD_NORM_CLIP)

        for i in range(constants.PARALLEL_SIZE):
            training_thread = A3CTrainingThread(
                    i, sess,
                    learning_rate_input,
                    grad_applier,
                    constants.MAX_TIME_STEP,
                    net_scope,
                    depth_scope)
            training_threads.append(training_thread)

        if constants.RUN_TEST:
            testing_thread = A3CTestingThread(constants.PARALLEL_SIZE + 1, sess, net_scope, depth_scope)

        sess.run(tf.global_variables_initializer())

        # Initialize pretrained weights after init.
        if constants.PREDICT_DEPTH:
            depth_estimator.load_weights()


        episode_reward_input = tf.placeholder(tf.float32, name='ep_reward')
        episode_length_input = tf.placeholder(tf.float32, name='ep_length')
        exist_answer_correct_input = tf.placeholder(tf.float32, name='exist_ans')
        count_answer_correct_input = tf.placeholder(tf.float32, name='count_ans')
        contains_answer_correct_input = tf.placeholder(tf.float32, name='contains_ans')
        percent_invalid_actions_input = tf.placeholder(tf.float32, name='inv')

        scalar_summaries = [
            tf.summary.scalar("Episode Reward", episode_reward_input),
            tf.summary.scalar("Episode Length", episode_length_input),
            tf.summary.scalar("Percent Invalid Actions", percent_invalid_actions_input),
        ]
        exist_summary = tf.summary.scalar("Answer Correct Existence", exist_answer_correct_input),
        count_summary = tf.summary.scalar("Answer Correct Counting", count_answer_correct_input),
        contains_summary = tf.summary.scalar("Answer Correct Containing", contains_answer_correct_input),
        exist_summary_op = tf.summary.merge(scalar_summaries + [exist_summary])
        count_summary_op = tf.summary.merge(scalar_summaries + [count_summary])
        contains_summary_op = tf.summary.merge(scalar_summaries + [contains_summary])
        summary_ops = [exist_summary_op, count_summary_op, contains_summary_op]

        summary_placeholders = {
            "episode_reward_input": episode_reward_input,
            "episode_length_input": episode_length_input,
            "exist_answer_correct_input": exist_answer_correct_input,
            "count_answer_correct_input": count_answer_correct_input,
            "contains_answer_correct_input": contains_answer_correct_input,
            "percent_invalid_actions_input" : percent_invalid_actions_input,
            }

        if constants.RUN_TEST:
            test_episode_reward_input = tf.placeholder(tf.float32, name='test_ep_reward')
            test_episode_length_input = tf.placeholder(tf.float32, name='test_ep_length')
            test_exist_answer_correct_input = tf.placeholder(tf.float32, name='test_exist_ans')
            test_count_answer_correct_input = tf.placeholder(tf.float32, name='test_count_ans')
            test_contains_answer_correct_input = tf.placeholder(tf.float32, name='test_contains_ans')
            test_percent_invalid_actions_input = tf.placeholder(tf.float32, name='test_inv')

            test_scalar_summaries = [
                tf.summary.scalar("Test Episode Reward", test_episode_reward_input),
                tf.summary.scalar("Test Episode Length", test_episode_length_input),
                tf.summary.scalar("Test Percent Invalid Actions", test_percent_invalid_actions_input),
            ]
            exist_summary = tf.summary.scalar("Test Answer Correct Existence", test_exist_answer_correct_input),
            count_summary = tf.summary.scalar("Test Answer Correct Counting", test_count_answer_correct_input),
            contains_summary = tf.summary.scalar("Test Answer Correct Containing", test_contains_answer_correct_input),
            test_exist_summary_op = tf.summary.merge(test_scalar_summaries + [exist_summary])
            test_count_summary_op = tf.summary.merge(test_scalar_summaries + [count_summary])
            test_contains_summary_op = tf.summary.merge(test_scalar_summaries + [contains_summary])
            test_summary_ops = [test_exist_summary_op, test_count_summary_op, test_contains_summary_op]

        if not constants.DEBUG:
            time_str = py_util.get_time_str()
            summary_writer = tf.summary.FileWriter(os.path.join(constants.LOG_FILE, time_str), sess.graph)
        else:
            summary_writer = None

        # init or load checkpoint with saver
        vars_to_save = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='global_network')
        saver = tf.train.Saver(vars_to_save, max_to_keep=3)
        print('-------------- Looking for checkpoints in ', constants.CHECKPOINT_DIR)
        global_t = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR)

        if constants.USE_NAVIGATION_AGENT:
            print('now trying to restore nav model')
            tf_util.restore_from_dir(sess, 'logs/checkpoints/navigation', True)

        sess.graph.finalize()

        for i in range(constants.PARALLEL_SIZE):
            sess.run(training_threads[i].sync)
            training_threads[i].agent.reset()

        times = []

        if not constants.DEBUG and constants.RECORD_FEED_DICT:
            import h5py
            NUM_RECORD = 10000 * len(constants.USED_QUESTION_TYPES)
            time_str = py_util.get_time_str()
            if not os.path.exists('question_data_dump'):
                os.mkdir('question_data_dump')
            dataset = h5py.File('question_data_dump/maps_' + time_str + '.h5', 'w')

            dataset.create_dataset('question_data/existence_answer_placeholder', (NUM_RECORD, 1), dtype=np.int32)
            dataset.create_dataset('question_data/counting_answer_placeholder', (NUM_RECORD, 1), dtype=np.int32)

            dataset.create_dataset('question_data/question_type_placeholder', (NUM_RECORD, 1), dtype=np.int32)
            dataset.create_dataset('question_data/question_object_placeholder', (NUM_RECORD, 1), dtype=np.int32)
            dataset.create_dataset('question_data/question_container_placeholder', (NUM_RECORD, 21), dtype=np.int32)

            dataset.create_dataset('question_data/pose_placeholder', (NUM_RECORD, 3), dtype=np.int32)
            dataset.create_dataset('question_data/image_placeholder', (NUM_RECORD, 300, 300, 3), dtype=np.uint8)
            dataset.create_dataset('question_data/map_mask_placeholder',
                    (NUM_RECORD, constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH, 27), dtype=np.uint8)
            dataset.create_dataset('question_data/meta_action_placeholder', (NUM_RECORD, 7), dtype=np.int32)
            dataset.create_dataset('question_data/possible_move_placeholder', (NUM_RECORD, 31), dtype=np.int32)

            dataset.create_dataset('question_data/answer_weight', (NUM_RECORD, 1), dtype=np.int32)
            dataset.create_dataset('question_data/taken_action', (NUM_RECORD, 32), dtype=np.int32)
            dataset.create_dataset('question_data/new_episode', (NUM_RECORD, 1), dtype=np.int32)

        time_lock = threading.Lock()

        data_ind = 0

        def train_function(parallel_index):
            global global_t
            global dataset
            global data_ind
            print('----------------------------------------thread', parallel_index, 'global_t', global_t)
            training_thread = training_threads[parallel_index]
            last_global_t = global_t
            last_global_t_image = global_t

            while global_t < constants.MAX_TIME_STEP:
                diff_global_t, ep_length, ep_reward, num_unrolls, feed_dict = training_thread.process(global_t, summary_writer,
                        summary_ops, summary_placeholders)
                time_lock.acquire()

                if not constants.DEBUG and constants.RECORD_FEED_DICT:
                    print('    NEW ENTRY: %d %s' % (data_ind, training_thread.agent.game_state.scene_name))
                    dataset['question_data/new_episode'][data_ind] = 1
                    for k, v in feed_dict.items():
                        key = 'question_data/' + k.name.split('/')[-1].split(':')[0]
                        if any([s in key for s in {
                            'gru_placeholder', 'num_unrolls',
                            'reward_placeholder', 'td_placeholder',
                            'learning_rate', 'phi_hat_prev_placeholder',
                            'next_map_mask_placeholder', 'next_pose_placeholder',
                            'episode_length_placeholder', 'question_count_placeholder',
                            'supervised_action_labels', 'supervised_action_weights_sigmoid',
                            'supervised_action_weights', 'question_direction_placeholder',
                        }]):
                            continue
                        v = np.ascontiguousarray(v)
                        if v.shape[0] == 1:
                            v = v[0, ...]
                        if len(v.shape) == 1 and num_unrolls > 1:
                            v = v[:, np.newaxis]
                        if 'map_mask_placeholder' in key:
                            v[:, :, :, :2] *= constants.STEPS_AHEAD
                            v[:, :, :, :2] += 2
                        data_loc = dataset[key][data_ind:data_ind + num_unrolls, ...]
                        data_len = data_loc.shape[0]
                        dataset[key][data_ind:data_ind + num_unrolls, ...] = v[:data_len, ...]
                    dataset.flush()

                    data_ind += num_unrolls
                    if data_ind >= NUM_RECORD:
                        # Everything is done
                        dataset.close()
                        raise Exception('Fake exception to exit the process. Don\'t worry, everything is fine.')
                if ep_length > 0:
                    times.append((ep_length, ep_reward))
                    print('Num episodes', len(times), 'Episode means', np.mean(times, axis=0))
                time_lock.release()
                global_t += diff_global_t
                # periodically save checkpoints to disk
                if not (constants.DEBUG or constants.RECORD_FEED_DICT) and parallel_index == 0:
                    if global_t - last_global_t_image > 10000:
                        print('Ran conv image summary')
                        summary_im_str = sess.run(conv_image_summary)
                        summary_writer.add_summary(summary_im_str, global_t)
                        last_global_t_image = global_t
                    if global_t - last_global_t > 10000:
                        print('Save checkpoint at timestamp %d' % global_t)
                        tf_util.save(saver, sess, constants.CHECKPOINT_DIR, global_t)
                        last_global_t = global_t

        def test_function():
            global global_t
            last_global_t_test = 0
            while global_t < constants.MAX_TIME_STEP:
                time.sleep(1)
                if global_t - last_global_t_test > 10000:
                    # RUN TEST
                    sess.run(testing_thread.sync)
                    from game_state import QuestionGameState
                    if testing_thread.agent.game_state is None:
                        testing_thread.agent.game_state = QuestionGameState(sess=sess)
                    for q_type in constants.USED_QUESTION_TYPES:
                        answers_correct = 0.0
                        ep_lengths = 0.0
                        ep_rewards = 0.0
                        invalid_percents = 0.0
                        rows = list(range(len(testing_thread.agent.game_state.test_datasets[q_type])))
                        random.shuffle(rows)
                        rows = rows[:16]
                        print('()()()()()()()rows', rows)
                        for rr,row in enumerate(rows):
                            answer_correct, answer, gt_answer, ep_length, ep_reward, invalid_percent, scene_num, seed, required_interaction = testing_thread.process((row, q_type))
                            answers_correct += int(answer_correct)
                            ep_lengths += ep_length
                            ep_rewards += ep_reward
                            invalid_percents += invalid_percent
                            print('############################### TEST ITERATION ##################################')
                            print('ep ', (rr + 1))
                            print('average correct', answers_correct / (rr + 1))
                            print('#################################################################################')
                        answers_correct /= len(rows)
                        ep_lengths /= len(rows)
                        ep_rewards /= len(rows)
                        invalid_percents /= len(rows)

                        # Write the summary
                        test_summary_str = sess.run(test_summary_ops[q_type], feed_dict={
                            test_episode_reward_input : ep_rewards,
                            test_episode_length_input : ep_lengths,
                            test_exist_answer_correct_input : answers_correct,
                            test_count_answer_correct_input : answers_correct,
                            test_contains_answer_correct_input : answers_correct,
                            test_percent_invalid_actions_input : invalid_percents,
                            })
                        summary_writer.add_summary(test_summary_str, global_t)
                        summary_writer.flush()
                        last_global_t_test = global_t
                    testing_thread.agent.game_state.env.stop()
                    testing_thread.agent.game_state = None



        train_threads = []
        for i in range(constants.PARALLEL_SIZE):
            train_threads.append(threading.Thread(target=train_function, args=(i,)))
            train_threads[i].daemon = True

        for t in train_threads:
            t.start()

        if constants.RUN_TEST:
            test_thread = threading.Thread(target=test_function)
            test_thread.daemon = True
            test_thread.start()

        for t in train_threads:
            t.join()

        if constants.RUN_TEST:
            test_thread.join()

        if not constants.DEBUG:
            if not os.path.exists(constants.CHECKPOINT_DIR):
                os.makedirs(constants.CHECKPOINT_DIR)
            saver.save(sess, constants.CHECKPOINT_DIR + '/' + 'checkpoint', global_step = global_t)
            summary_writer.close()
            print('Saved.')

    except KeyboardInterrupt:
        print('Press Ctrl+C to stop')
    except:
        import traceback
        traceback.print_exc()
    finally:
        if not constants.DEBUG:
            print('Now saving data. Please wait')
            tf_util.save(saver, sess, constants.CHECKPOINT_DIR, global_t)
            summary_writer.close()
            print('Saved.')
Пример #4
0
def main():
    if constants.OBJECT_DETECTION:
        from darknet_object_detection import detector
        detector.setup_detectors(constants.PARALLEL_SIZE)

    with tf.device('/gpu:' + str(constants.GPU_ID)):
        with tf.variable_scope('global_network'):
            if constants.END_TO_END_BASELINE:
                global_network = EndToEndBaselineNetwork()
            else:
                global_network = QAPlannerNetwork(constants.RL_GRU_SIZE, 1, 1)
            global_network.create_net()
        if constants.USE_NAVIGATION_AGENT:
            with tf.variable_scope('nav_global_network') as net_scope:
                free_space_network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1)
                free_space_network.create_net()
        else:
            net_scope = None

        # prepare session
        sess = tf_util.Session()

        if constants.PREDICT_DEPTH:
            from depth_estimation_network import depth_estimator
            with tf.variable_scope('') as depth_scope:
                depth_estimator = depth_estimator.get_depth_estimator(sess)
        else:
            depth_scope = None

        sess.run(tf.global_variables_initializer())

        # Initialize pretrained weights after init.
        if constants.PREDICT_DEPTH:
            depth_estimator.load_weights()

        testing_threads = []

        for i in range(constants.PARALLEL_SIZE):
            testing_thread = A3CTestingThread(i, sess, net_scope, depth_scope)
            testing_threads.append(testing_thread)

        tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR, True)

        if constants.USE_NAVIGATION_AGENT:
            print('now trying to restore nav model')
            tf_util.restore_from_dir(
                sess, os.path.join(constants.CHECKPOINT_PREFIX, 'navigation'),
                True)

    sess.graph.finalize()

    question_types = constants.USED_QUESTION_TYPES
    rows = []
    for q_type in question_types:
        curr_rows = list(
            range(len(testing_thread.agent.game_state.test_datasets[q_type])))
        #curr_rows = list(range(8))
        rows.extend(list(zip(curr_rows, [q_type] * len(curr_rows))))

    random.shuffle(rows)

    answers_correct = []
    ep_lengths = []
    ep_rewards = []
    invalid_percents = []
    time_lock = threading.Lock()
    if not os.path.exists(constants.LOG_FILE):
        os.makedirs(constants.LOG_FILE)
    out_file = open(
        constants.LOG_FILE + '/results_' + constants.TEST_SET + '_' +
        py_util.get_time_str() + '.csv', 'w')
    out_file.write(constants.LOG_FILE + '\n')
    out_file.write(
        'question_type, answer_correct, answer, gt_answer, episode_length, invalid_action_percent, scene number, seed, required_interaction\n'
    )

    def test_function(thread_ind):
        testing_thread = testing_threads[thread_ind]
        sess.run(testing_thread.sync)
        #from game_state import QuestionGameState
        #if testing_thread.agent.game_state is None:
        #testing_thread.agent.game_state = QuestionGameState(sess=sess)
        while len(rows) > 0:
            time_lock.acquire()
            if len(rows) == 0:
                break
            row = rows.pop()
            time_lock.release()

            answer_correct, answer, gt_answer, ep_length, ep_reward, invalid_percent, scene_num, seed, required_interaction = testing_thread.process(
                row)
            question_type = row[1] + 1

            time_lock.acquire()
            output_str = (
                '%d, %d, %d, %d, %d, %f, %d, %d, %d\n' %
                (question_type, answer_correct, answer, gt_answer, ep_length,
                 invalid_percent, scene_num, seed, required_interaction))
            out_file.write(output_str)
            out_file.flush()
            answers_correct.append(int(answer_correct))
            ep_lengths.append(ep_length)
            ep_rewards.append(ep_reward)
            invalid_percents.append(invalid_percent)
            print('###############################')
            print('ep ', row)
            print('num episodes', len(answers_correct))
            print('average correct', np.mean(answers_correct))
            print('invalid percents', np.mean(invalid_percents),
                  np.median(invalid_percents))
            print('###############################')
            time_lock.release()

    test_threads = []
    for i in range(constants.PARALLEL_SIZE):
        test_threads.append(threading.Thread(target=test_function, args=(i, )))

    for t in test_threads:
        t.start()

    for t in test_threads:
        t.join()

    out_file.close()
Пример #5
0
class GraphAgent(object):
    def __init__(self,
                 sess,
                 reuse=True,
                 num_unrolls=1,
                 game_state=None,
                 net_scope=None):
        if net_scope is None:
            with tf.name_scope('agent'):
                with tf.variable_scope('nav_global_network', reuse=reuse):
                    self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1,
                                                    num_unrolls)
                    self.network.create_net()
        else:
            with tf.variable_scope(net_scope, reuse=True):
                self.network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1)
                self.network.create_net(add_loss=False)

        if game_state is None:
            self.game_state = GameState(sess=sess)
        else:
            self.game_state = game_state
        self.action_util = self.game_state.action_util
        self.gt_graph = None
        self.sess = sess
        self.num_steps = 0
        self.global_step_id = 0
        self.num_unrolls = num_unrolls
        self.pose_indicator = np.zeros(
            (constants.TERMINAL_CHECK_PADDING * 2 + 1,
             constants.TERMINAL_CHECK_PADDING * 2 + 1))
        self.times = np.zeros(2)

    def goto(self, action, step_num):
        # Look down
        start_angle = self.game_state.pose[3]
        if start_angle != 60:
            look_action = {
                'action': 'TeleportFull',
                'x': self.game_state.pose[0] * constants.AGENT_STEP_SIZE,
                'y': self.game_state.agent_height,
                'z': self.game_state.pose[1] * constants.AGENT_STEP_SIZE,
                'rotateOnTeleport': True,
                'rotation': self.game_state.pose[2] * 90,
                'horizon': 60,
            }
            super(QuestionGameState, self.game_state).step(look_action)

        self.game_state.end_point = (action['x'], action['z'],
                                     action['rotation'] / 90)
        self.goal_pose = np.array([
            self.game_state.end_point[0] - self.game_state.graph.xMin,
            self.game_state.end_point[1] - self.game_state.graph.yMin
        ],
                                  dtype=np.int32)[:2]
        self.pose = self.game_state.pose
        self.inference()
        plan, path = self.get_plan()
        steps = 0
        invalid_actions = 0

        self.reset(self.game_state.scene_name)

        self.game_state.board = None
        while steps < 20 and len(
                plan) > 0 and self.is_possible >= constants.POSSIBLE_THRESH:
            t_start = time.time()
            action = plan[0]
            self.step(action)
            invalid_actions += 1 - int(
                self.game_state.event.metadata['lastActionSuccess'])

            plan, path = self.get_plan()
            steps += 1
            if constants.DRAWING:
                image = self.draw_state()
                if not os.path.exists('visualizations/images'):
                    os.makedirs('visualizations/images')
                cv2.imwrite(
                    'visualizations/images/state_%05d.jpg' %
                    (step_num + steps), image[:, :, ::-1])
            self.times[0] += time.time() - t_start

        print('step time %.3f' % (self.times[0] / max(steps, 1)))
        self.times[0] = 0

        # Look back
        if start_angle != 60:
            look_action = {
                'action': 'TeleportFull',
                'x': self.game_state.pose[0] * constants.AGENT_STEP_SIZE,
                'y': self.game_state.agent_height,
                'z': self.game_state.pose[1] * constants.AGENT_STEP_SIZE,
                'rotateOnTeleport': True,
                'rotation': self.game_state.pose[2] * 90,
                'horizon': start_angle,
            }
            super(QuestionGameState, self.game_state).step(look_action)
        return steps, invalid_actions

    def inference(self):
        image = self.game_state.s_t[np.newaxis, np.newaxis, ...]

        self.pose_indicator = np.zeros(
            (constants.TERMINAL_CHECK_PADDING * 2 + 1,
             constants.TERMINAL_CHECK_PADDING * 2 + 1))
        if (abs(self.pose[0] - self.game_state.end_point[0]) <=
                constants.TERMINAL_CHECK_PADDING
                and abs(self.pose[1] - self.game_state.end_point[1]) <=
                constants.TERMINAL_CHECK_PADDING):
            self.pose_indicator[self.pose[1] - self.game_state.end_point[1] +
                                constants.TERMINAL_CHECK_PADDING,
                                self.pose[0] - self.game_state.end_point[0] +
                                constants.TERMINAL_CHECK_PADDING] = 1

        self.feed_dict = {
            self.network.image_placeholder:
            image,
            self.network.action_placeholder:
            self.action[np.newaxis, np.newaxis, :],
            self.network.pose_placeholder:
            np.array(self.gt_graph.get_shifted_pose(
                self.pose))[np.newaxis, np.newaxis, :3],
            self.network.memory_placeholders:
            self.memory[np.newaxis, ...],
            self.network.gru_placeholder:
            self.gru_state,
            self.network.pose_indicator_placeholder:
            self.pose_indicator[np.newaxis, np.newaxis, ...],
            self.network.goal_pose_placeholder:
            self.goal_pose[np.newaxis, ...],
        }
        if self.num_unrolls is None:
            self.feed_dict[self.network.num_unrolls] = 1

        outputs = self.sess.run([
            self.network.patch_weights_clipped,
            self.network.gru_state,
            self.network.occupancy,
            self.network.gru_outputs_full,
            self.network.is_possible_sigm,
        ],
                                feed_dict=self.feed_dict)

        self.map_weights = outputs[0][0, 0, ...]
        self.game_state.graph.update_graph(
            (self.map_weights, [1 + graph_obj.EPSILON]), self.pose, rows=[0])

        self.gru_state = outputs[1]
        self.occupancy = outputs[2][0, :self.bounds[3], :self.bounds[2], 0] * (
            self.game_state.graph.memory[:, :, 0] > 1)
        self.memory = outputs[3][0, 0, ...]
        self.is_possible = outputs[4][0, 0]

    def reset(self, scene_name=None, seed=None):
        if scene_name is not None:
            if self.game_state.env is not None and type(
                    self.game_state) == GameState:
                self.game_state.reset(scene_name, use_gt=False, seed=seed)
            self.gt_graph = graph_obj.Graph('layouts/%s-layout.npy' %
                                            scene_name,
                                            use_gt=True)
            self.bounds = [
                self.game_state.graph.xMin, self.game_state.graph.yMin,
                self.game_state.graph.xMax - self.game_state.graph.xMin + 1,
                self.game_state.graph.yMax - self.game_state.graph.yMin + 1
            ]
            if len(self.game_state.end_point) == 0:
                self.game_state.end_point = (self.game_state.graph.xMin +
                                             constants.TERMINAL_CHECK_PADDING,
                                             self.game_state.graph.yMin +
                                             constants.TERMINAL_CHECK_PADDING,
                                             0)
            self.action = np.zeros(self.action_util.num_actions)
            self.memory = np.zeros(
                (constants.SPATIAL_MAP_HEIGHT, constants.SPATIAL_MAP_WIDTH,
                 constants.MEMORY_SIZE))
            self.gru_state = np.zeros((1, constants.GRU_SIZE))
            self.pose = self.game_state.pose
            self.is_possible = 1
            self.num_steps = 0
            self.times = np.zeros(2)
            self.impossible_spots = set()
            self.visited_spots = set()
        else:
            self.game_state.reset()

        self.goal_pose = np.array([
            self.game_state.end_point[0] - self.game_state.graph.xMin,
            self.game_state.end_point[1] - self.game_state.graph.yMin
        ],
                                  dtype=np.int32)[:2]
        self.inference()

    def step(self, action):
        t_start = time.time()
        if type(self.game_state) == GameState:
            self.game_state.step(action)
        else:
            super(QuestionGameState, self.game_state).step(action)
        self.times[1] += time.time() - t_start
        if self.num_steps % 100 == 0:
            print('game state step time %.3f' % (self.times[1] /
                                                 (self.num_steps + 1)))
        self.pose = self.game_state.pose
        self.action[:] = 0
        self.action[self.action_util.action_dict_to_ind(action)] = 1
        self.inference()
        self.num_steps += 1
        self.global_step_id += 1

        if not self.game_state.event.metadata['lastActionSuccess']:
            # Can't traverse here, make sure the weight is correct.
            if self.pose[2] == 0:
                self.gt_graph.update_weight(self.pose[0], self.pose[1] + 1,
                                            graph_obj.MAX_WEIGHT)
                spot = (self.pose[0], self.pose[1] + 1)
            elif self.pose[2] == 1:
                self.gt_graph.update_weight(self.pose[0] + 1, self.pose[1],
                                            graph_obj.MAX_WEIGHT)
                spot = (self.pose[0] + 1, self.pose[1])
            elif self.pose[2] == 2:
                self.gt_graph.update_weight(self.pose[0], self.pose[1] - 1,
                                            graph_obj.MAX_WEIGHT)
                spot = (self.pose[0], self.pose[1] - 1)
            elif self.pose[2] == 3:
                self.gt_graph.update_weight(self.pose[0] - 1, self.pose[1],
                                            graph_obj.MAX_WEIGHT)
                spot = (self.pose[0] - 1, self.pose[1])
            self.impossible_spots.add(spot)
        else:
            self.visited_spots.add((self.pose[0], self.pose[1]))
        for spot in self.impossible_spots:
            graph_max = self.gt_graph.memory[:, :, 0].max()
            self.game_state.graph.update_weight(spot[0], spot[1], graph_max)
            self.occupancy[spot[1], spot[0]] = 1

    def get_plan(self):
        self.plan, self.path = self.game_state.graph.get_shortest_path(
            self.pose, self.game_state.end_point)
        return self.plan, self.path

    def get_label(self):
        patch, curr_point = self.gt_graph.get_graph_patch(self.pose)
        patch = patch[:, :, 0]
        patch[patch < 2] = 0
        patch[patch > 1] = 1
        return patch

    def draw_state(self, return_list=False):
        if not constants.DRAWING:
            return
        from utils import drawing
        curr_image = self.game_state.detection_image.copy()
        curr_depth = self.game_state.s_t_depth
        if curr_depth is not None:
            curr_depth = self.game_state.s_t_depth.copy()
            curr_depth[0, 0] = 0
            curr_depth[0, 1] = constants.MAX_DEPTH

        label = np.flipud(self.get_label())
        patch = np.flipud(self.game_state.graph.get_graph_patch(self.pose)[0])
        state_image = self.game_state.draw_state().copy()
        memory_map = np.flipud(self.game_state.graph.memory.copy())
        memory_map = np.concatenate(
            (memory_map[:, :, [0]], np.zeros(
                memory_map[:, :, [0]].shape), memory_map[:, :, 1:]),
            axis=2)

        images = [
            curr_image,
            state_image,
            np.minimum(memory_map[:, :, 0], 200),
            np.argmax(memory_map[:, :, 1:], axis=2),
            label[:, :],
            np.minimum(patch[:, :, 0], 10),
        ]
        if return_list:
            return images
        action_str = 'action: %s possible %.3f' % (
            self.action_util.actions[np.where(
                self.action == 1)[0].squeeze()]['action'], self.is_possible)
        titles = [
            '%07d' % self.num_steps, action_str, 'Occupancy Map',
            'Objects Map', 'Label Patch', 'Learned Patch'
        ]
        image = drawing.subplot(images,
                                4,
                                3,
                                curr_image.shape[1],
                                curr_image.shape[0],
                                titles=titles,
                                border=3)

        return image
        goal_pose = np.array([
            self.game_state.end_point[0] - self.game_state.graph.xMin,
            self.game_state.end_point[1] - self.game_state.graph.yMin
        ],
                             dtype=np.int32)[:2]
        return (self.states, self.bounds, goal_pose)


if __name__ == '__main__':
    from networks.free_space_network import FreeSpaceNetwork
    from utils import tf_util
    import tensorflow as tf
    sess = tf_util.Session()

    with tf.variable_scope('nav_global_network'):
        network = FreeSpaceNetwork(constants.GRU_SIZE, 1, 1)
        network.create_net()
    sess.run(tf.global_variables_initializer())
    start_it = tf_util.restore_from_dir(sess, constants.CHECKPOINT_DIR)

    import cv2

    sequence_generator = SequenceGenerator(sess)
    sequence_generator.planner_prob = 1
    counter = 0
    while True:
        states, bounds, goal_pose = sequence_generator.generate_episode()
        images = sequence_generator.debug_images
        for im_dict in images:
            counter += 1