예제 #1
0
def offpolicy_main(variant):
    print("offpolicy main")

    if args.algo == 'sac':
        algo = "SAC"
    elif args.algo == 'td3':
        algo = "TD3"

    setup_logger('{0}_{1}'.format(args.env_name, args.save_name),
                 variant=variant)
    ptu.set_gpu_mode(True)  # optionally set the GPU (default=True)

    expl_env, eval_env, env_obj = prepare_env(args.env_name,
                                              args.visionmodel_path,
                                              **env_kwargs)
    obs_dim = expl_env.observation_space.low.size
    action_dim = expl_env.action_space.low.size
    expl_policy, eval_policy, trainer = prepare_trainer(
        algo, expl_env, obs_dim, action_dim, args.pretrained_policy_load,
        variant)

    if args.env_name.find('doorenv') > -1:
        expl_policy.knob_noisy = eval_policy.knob_noisy = args.knob_noisy
        expl_policy.nn = eval_policy.nn = env_obj.nn
        expl_policy.visionnet_input = eval_policy.visionnet_input = env_obj.visionnet_input

    if args.visionnet_input:
        visionmodel = load_visionmodel(expl_env._wrapped_env.xml_path,
                                       args.visionmodel_path, VisionModelXYZ())
        visionmodel.to(ptu.device)
        expl_policy.visionmodel = visionmodel.eval()
    else:
        expl_policy.visionmodel = None

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
        doorenv=args.env_name.find('doorenv') > -1,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
        doorenv=args.env_name.find('doorenv') > -1,
    )

    if not args.replaybuffer_load:
        replay_buffer = EnvReplayBuffer(
            variant['replay_buffer_size'],
            expl_env,
        )
    else:
        replay_buffer = pickle.load(open(args.replaybuffer_load, "rb"))
        replay_buffer._env_info_keys = replay_buffer.env_info_sizes.keys()
        print("Loaded the replay buffer that has length of {}".format(
            replay_buffer.get_diagnostics()))

    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs'])

    algorithm.save_interval = args.save_interval
    algorithm.save_dir = args.save_dir
    algorithm.algo = args.algo
    algorithm.env_name = args.env_name
    algorithm.save_name = args.save_name
    algorithm.env_kwargs = env_kwargs
    summary_name = args.log_dir + '{0}_{1}'
    writer = SummaryWriter(summary_name.format(args.env_name, args.save_name))
    algorithm.writer = writer

    algorithm.to(ptu.device)
    algorithm.train()
예제 #2
0
def offpolicy_inference():
    import time
    from gym import wrappers

    filename = str(uuid.uuid4())

    gpu = True

    env, _, _ = prepare_env(args.env_name, args.visionmodel_path, **env_kwargs)

    snapshot = torch.load(args.load_name)
    policy = snapshot['evaluation/policy']
    if args.env_name.find('doorenv') > -1:
        policy.knob_noisy = args.knob_noisy
        policy.nn = env._wrapped_env.nn
        policy.visionnet_input = env_kwargs['visionnet_input']

    epi_counter = 1
    dooropen_counter = 0
    total_time = 0
    test_num = 100

    if evaluation:
        render = False
    else:
        if not args.unity:
            render = True
        else:
            render = False

    start_time = int(time.mktime(time.localtime()))

    if gpu:
        set_gpu_mode(True)
    while True:
        if args.env_name.find('doorenv') > -1:
            path, door_opened, opening_time = rollout(
                env,
                policy,
                max_path_length=512,
                doorenv=True,
                render=render,
                evaluate=True,
            )
            print("done first")
            if hasattr(env, "log_diagnostics"):
                env.log_diagnostics([path])
            logger.dump_tabular()
            if evaluation:
                env, _, _ = prepare_env(args.env_name, args.visionmodel_path,
                                        **env_kwargs)
                if door_opened:
                    dooropen_counter += 1
                    total_time += opening_time
                    eval_print(dooropen_counter, epi_counter, start_time,
                               total_time)

        else:
            path = rollout(
                env,
                policy,
                max_path_length=512,
                doorenv=False,
                render=render,
            )
            if hasattr(env, "log_diagnostics"):
                env.log_diagnostics([path])
            logger.dump_tabular()

        if evaluation:
            print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(epi_counter))
            epi_counter += 1

            if args.env_name.find('door') > -1 and epi_counter > test_num:
                eval_print(dooropen_counter, epi_counter, start_time,
                           total_time)
                break
예제 #3
0
파일: enjoy.py 프로젝트: jonygao621/DoorGym
def offpolicy_inference(seed,
                        env_name,
                        det,
                        load_name,
                        evaluation,
                        render,
                        knob_noisy,
                        visionnet_input,
                        env_kwargs,
                        actor_critic=None,
                        verbose=True,
                        pos_control=True,
                        step_skip=4):

    import time
    from gym import wrappers

    print("evaluatin started!")

    filename = str(uuid.uuid4())

    gpu = True

    env, _, _ = prepare_env(env_name, **env_kwargs)

    if not actor_critic:
        snapshot = torch.load(load_name)
        policy = snapshot['evaluation/policy']
    else:
        policy = actor_critic
    if env_name.find('doorenv') > -1:
        policy.knob_noisy = knob_noisy
        policy.nn = env._wrapped_env.nn
        policy.visionnet_input = env_kwargs['visionnet_input']

    epi_counter = 1
    dooropen_counter = 0
    total_time = 0
    test_num = 100

    start_time = int(time.mktime(time.localtime()))

    if gpu:
        set_gpu_mode(True)
    while True:
        # print("new env")
        if env_name.find('doorenv') > -1:
            if evaluation:
                path, door_opened, opening_time = rollout(
                    env,
                    policy,
                    max_path_length=512,
                    render=render,
                    evaluate=evaluation,
                    verbose=True,
                    doorenv=True,
                    pos_control=pos_control,
                    step_skip=step_skip,
                )
                if hasattr(env, "log_diagnostics"):
                    env.log_diagnostics([path])
                logger.dump_tabular()
                # if evaluation:
                # print("1")
                env, _, _ = prepare_env(env_name, **env_kwargs)
                if door_opened:
                    dooropen_counter += 1
                    total_time += opening_time
                    if verbose:
                        print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(
                            epi_counter))
                        eval_print(dooropen_counter, epi_counter, start_time,
                                   total_time)
            else:
                path = rollout(
                    env,
                    policy,
                    max_path_length=512,
                    render=render,
                    evaluate=evaluation,
                    verbose=True,
                    doorenv=True,
                    pos_control=pos_control,
                    step_skip=step_skip,
                )
                if hasattr(env, "log_diagnostics"):
                    env.log_diagnostics([path])
                logger.dump_tabular()

        else:
            path = rollout(
                env,
                policy,
                max_path_length=512,
                doorenv=False,
                render=render,
            )
            if hasattr(env, "log_diagnostics"):
                env.log_diagnostics([path])
            logger.dump_tabular()

        if evaluation:
            if verbose:
                print("{} ep end >>>>>>>>>>>>>>>>>>>>>>>>".format(epi_counter))
                eval_print(dooropen_counter, epi_counter, start_time,
                           total_time)
            epi_counter += 1

            if env_name.find('door') > -1 and epi_counter > test_num:
                if verbose:
                    print("dooropening counter:", dooropen_counter,
                          " epi counter:", epi_counter)
                    eval_print(dooropen_counter, epi_counter, start_time,
                               total_time)
                break

    opening_rate, opening_timeavg = eval_print(dooropen_counter,
                                               epi_counter - 1, start_time,
                                               total_time)
    return opening_rate, opening_timeavg