Пример #1
0
def main(args):
    aggregator = Aggregator(_base_dir = args.base_dir,
                            _img_dir = args.img_dir,
                            _label_dir = args.label_dir,
                            _inf_dir = args.inf_dir,
                            _dag_dir = args.dag_dir,
                            _poses_dir = args.poses_dir)
    print('Num of ground truth labeled images %d\n\n' % len(aggregator.agg_list))

    for i in range(0, 100):
        print('\nDAgger Iteration: %d\n' % aggregator.dag_it_num)
        # creates new directories each iteration
        aggregator.on_new_iter()
        # returns the training, validation lists and knowledge of index at which index evaluation in agg_list starts
        train, val, idx_eval = aggregator.get_training_data()
        # set directory to save predictions of inference
        inf_dir = aggregator.dag_dir + '%02d/inf/' % aggregator.dag_it_num
        # initiates the evaluator
        evaluator = Evaluator(aggregator.base_dir, inf_dir, aggregator.label_dir,
                              _agg_list = aggregator.agg_list)
        # estimating a batch each process should evaluate later to don't exceed a given process number
        evaluator.estimate_batch_size(len(aggregator.agg_list[idx_eval:]))

        print('Evaluating %d images in %d threads' % (evaluator.batch_size, evaluator.num_max_threads))

        if aggregator.dag_done or evaluator.stop_dagger:
            aggregator.save_list()

            print('DAgger stopped!')
            break

        aggregator.save_list(train, 'train')
        aggregator.save_list(val, 'val')

        # Training and Prediction

        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = '0'
        with tf.Graph().as_default():
            session = tf.Session('')
            KTF.set_session(session)
            KTF.set_learning_phase(1)
            # initializes a trainer with already separated training and validation list
            # an inference is done for all images in the agg_list after the idx_eval
            trainer = Trainer(_train_list = train,
                              _val_list = val,
                              _inf_list = aggregator.agg_list[idx_eval:],
                              _base_dir = aggregator.base_dir,
                              _img_dir = aggregator.img_dir,
                              _label_dir = aggregator.label_dir,
                              _inf_dir = aggregator.inf_dir,
                              _dag_dir = aggregator.dag_dir,
                              _log_dir = 'log/')
            # setting of some hyper parameters
            trainer.batch_size = 8
            # increasing epoch steps so the net has the chance to see all images of the training set in an epoch
            # else it is observable that the validation loss doesn't decrease and the training is stopped
            trainer.epoch_steps = len(train) // trainer.batch_size
            trainer.val_steps = len(val) // trainer.batch_size
            trainer.n_epochs = 25
            trainer.dag_it = aggregator.dag_it_num
            trainer.update_callback()
            # trains model for defined number of epochs with the actual dataset
            trainer.train()
            print('\nTraining done!\nStarting Prediction\n')
            # safes inferences of images that are unseen by the net
            trainer.predict()
            session.close()

        print('\nInference done!\n')
        print('Evaluating %d images' % len(aggregator.agg_list[idx_eval:]))
        # Training and prediction done

        # Evaluation

        aggregator.agg_list = evaluator.process_prediction(agg_chunk = aggregator.agg_list,
                                                           idx_eval = idx_eval)
        print('Evaluation done. Saving evaluated data.')
        aggregator.save_list(aggregator.agg_list[idx_eval:], 'eval')
        # Evaluation done and saved for next iteration

        # save full aggregation list with all information of all iterations until this in iteration's folder
        aggregator.save_list()
        # delete all images of inference step to save space on the drive
        aggregator.delete_inf()
        aggregator.prepare_next_it()