def main(args):
    # Verify the arguments when we train on multiple environments
    # No need to check for the length of len(args.multi_env) in case, for some reason, we need to validate on other envs
    if args.multi_env is not None:
        assert len(args.multi_demos) == len(args.multi_episodes)

    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = ([
        "update", "frames", "FPS", "duration", "entropy", "policy_loss",
        "train_accuracy"
    ] + ["validation_accuracy"])
    if args.multi_env is None:
        header.extend(["validation_return", "validation_success_rate"])
    else:
        header.extend(
            ["validation_return_{}".format(env) for env in args.multi_env])
        header.extend([
            "validation_success_rate_{}".format(env) for env in args.multi_env
        ])

    if args.weigh_corrections:
        header.extend(["correction_weight_loss"])

    if args.compute_cic:
        header.extend(["val_cic"])

    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_writer = None
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Get the status path
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.model)

    il_learn.train(il_learn.train_demos, writer, csv_writer, status_path,
                   header)
示例#2
0
def main(args):
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = (["update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy"]
              + ["validation_accuracy", "validation_return", "validation_success_rate"])
    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Get the status path
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.acmodel)
    train_demos = []

    # Generate the initial set of training demos
    if not args.dagger or args.dagger_start_with_bot_demos:
        train_demos += generate_demos(args.env, range(args.seed, args.seed + args.start_demos))
    # Seed at which evaluation will begin
    eval_seed = args.seed + args.start_demos

    model_name = args.model

    if args.dagger:
        mean_steps = get_bot_mean(args.env, args.episodes_to_evaluate_mean, args.seed)
    else:
        mean_steps = None

    for phase_no in range(0, args.phases):
        logger.info("Starting phase {} with {} demos".format(phase_no, len(train_demos)))

        if not args.finetune:
            # Create a new model to be trained from scratch
            logging.info("Creating new model to be trained from scratch")
            args.model = model_name + ('_phase_%d' % phase_no)
            il_learn = ImitationLearning(args)

        # Train the imitation learning agent
        if len(train_demos) > 0:
            il_learn.train(train_demos, writer, csv_writer, status_path, header, reset_status=True)

        # Stopping criterion
        valid_log = il_learn.validate(args.val_episodes)
        success_rate = np.mean([1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']])

        if success_rate >= 0.99:
            logger.info("Reached target success rate with {} demos, stopping".format(len(train_demos)))
            break

        eval_seed = grow_training_set(il_learn, train_demos, eval_seed, args.demo_grow_factor, args.num_eval_demos,
                                      args.dagger, mean_steps)
def main(args):
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = ([
        "update", "frames", "FPS", "duration", "entropy", "policy_loss",
        "train_accuracy"
    ] + [
        "validation_accuracy", "validation_return", "validation_success_rate"
    ])
    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.acmodel)

    # Seed at which demo evaluation/generation will begin
    eval_seed = args.seed + len(il_learn.train_demos)

    # Phase at which we start
    cur_phase = 0

    # Try to load the status (if resuming)
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
            eval_seed = status.get('eval_seed', eval_seed)
            cur_phase = status.get('cur_phase', cur_phase)

    model_name = args.model

    for phase_no in range(cur_phase, args.phases):
        logger.info("Starting phase {} with {} demos, eval_seed={}".format(
            phase_no, len(il_learn.train_demos), eval_seed))

        # Each phase trains a different model from scratch
        args.model = model_name + ('_phase_%d' % phase_no)
        il_learn = ImitationLearning(args)

        # Train the imitation learning agent
        if len(il_learn.train_demos) > 0:
            train_status_path = os.path.join(utils.get_log_dir(args.model),
                                             'status.json')
            il_learn.train(il_learn.train_demos, writer, csv_writer,
                           train_status_path, header)

        # Stopping criterion
        valid_log = il_learn.validate(args.val_episodes)
        success_rate = np.mean(
            [1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']])

        if success_rate >= 0.99:
            logger.info(
                "Reached target success rate with {} demos, stopping".format(
                    len(il_learn.train_demos)))
            break

        eval_seed = grow_training_set(il_learn, il_learn.train_demos,
                                      eval_seed, args.demo_grow_factor,
                                      args.num_eval_demos)

        # Save the current demo generation seed
        with open(status_path, 'w') as dst:
            status = {'eval_seed': eval_seed, 'cur_phase': phase_no + 1}
            json.dump(status, dst)

        # Save the demos
        demos_path = utils.get_demos_path(args.demos,
                                          args.env,
                                          args.demos_origin,
                                          valid=False)
        print('saving demos to:', demos_path)
        utils.save_demos(il_learn.train_demos, demos_path)
示例#4
0
    default=1,
    help="number of epochs between two validation checks (default: 1)")
parser.add_argument(
    "--val-episodes",
    type=int,
    default=500,
    help="number of episodes used to evaluate the agent, and to evaluate v")

if __name__ == '__main__':

    args = parser.parse_args()

    torch.manual_seed(222)
    torch.cuda.manual_seed_all(222)
    np.random.seed(222)
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    logger = logging.getLogger(__name__)

    device = torch.device('cuda')
    maml = EvalLearner(args).to(device)

    tmp = filter(lambda x: x.requires_grad, maml.parameters())
    num = sum(map(lambda x: np.prod(x.shape), tmp))
    print(maml)
    print('Total trainable tensors:', num)

    logs = maml.validate(maml.val_demos)
    H = sum([log['entropy'] for log in logs]) / float(len(logs))

    PL = sum([log['policy_loss'] for log in logs]) / float(len(logs))