예제 #1
0
def main(exp_traj_fn, rep_as_str, from_scratch):
    env_name = f"zelda-{rep_as_str}-v0"
    log_dir = f'runs/{rep_as_str}'

    kwargs_dict = {'resume': False, 'render': True}

    if rep_as_str == 'wide':
        policy = FullyConvPolicyBigMap
    else:
        policy = CustomPolicyBigMap

    env = make_vec_envs(env_name, rep_as_str, log_dir, n_cpu=1, **kwargs_dict)

    model = PPO2(policy,
                 env,
                 verbose=1,
                 tensorboard_log=f"./runs/{rep_as_str}")
    if not from_scratch:
        model.load(f'models/{rep_as_str}/zelda_{rep_as_str}', env=env)

    dataset = ExpertDataset(
        expert_path=f'expert_trajectories/{rep_as_str}/{exp_traj_fn}.npz',
        traj_limitation=-1,
        batch_size=15)
    start_time = time.process_time()
    model.set_env(env)
    model.pretrain(dataset, n_epochs=15)
    end_time = time.process_time()
    print(f"training took {end_time - start_time} seconds")
    model.save(f'models/{rep_as_str}/zelda_{rep_as_str}')
예제 #2
0
def infer(game, representation, model_path, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    env_name = '{}-{}-v0'.format(game, representation)
    if game == "binary":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        model.FullyConvPolicy = model.FullyConvPolicySmallMap
        kwargs['cropped_size'] = 10
    kwargs['render'] = True

    agent = PPO2.load(model_path)
    env = make_vec_envs(env_name, representation, None, 1, **kwargs)
    obs = env.reset()
    obs = env.reset()
    dones = False
    for i in range(kwargs.get('trials', 1)):
        while not dones:
            action, _ = agent.predict(obs)
            obs, _, dones, info = env.step(action)
            if kwargs.get('verbose', False):
                print(info[0])
            if dones:
                break
        time.sleep(0.2)
예제 #3
0
def infer(game, representation, model_path, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    env_name = '{}-{}-v0'.format(game, representation)
    if "small" in game:
        model.FullyConvPolicy = model.FullyConvPolicySmallMap
        kwargs['cropped_size'] = 8
    elif "medium" in game:
        model.FullyConvPolicy = model.FullyConvPolicySmallMap
        kwargs['cropped_size'] = 12
    elif "large" in game:
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
        kwargs['cropped_size'] = 16

    kwargs['render'] = False
    # agent = PPO2.load(model_path)
    agent = getattr(settings, model_path, None)
    fixed_tiles = process(kwargs.get('tiles', []))
    initial_map = createMap(kwargs['cropped_size'], fixed_tiles)
    kwargs['old_map'] = initial_map
    change_limit = kwargs.get('change_limit', 5000)
    # if not canCreateMap(fixed_tiles, game.split("_")[0], game.split("_")[1]):
    #     return False
    sug_info = {}
    for i in range(kwargs.get('trials', 1)):
        sug_info[i] = {}
        env = make_vec_envs(env_name, representation, None, 1, **kwargs)
        info = None
        obs = env.reset()
        dones = False
        cur_pos = {'x': None, 'y': None}
        while not dones:
            if i == 0:
                action, _ = agent.predict(obs)
                obs, _, dones, info = env.step(action)
            else:
                obs, _, dones, info = step(cur_pos, fixed_tiles, representation, env, agent, obs)
            cur_pos['x'] = info[0]['pos'][0]
            cur_pos['y'] = info[0]['pos'][1]
            if kwargs.get('verbose', False):
                print(info[0])
            if dones:
                break
            # if info[0]['changes'] > change_limit:
            #     return False
        sug_info[i]["info"] = info[0]
    sug_info["range"] = get_range(game.split("_")[0], game.split("_")[1])
    return sug_info
예제 #4
0
def main(game, representation, experiment, steps, n_cpu, render, logging,
         **kwargs):
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    resume = kwargs.get('resume', False)
    if representation == 'wide':
        policy = FullyConvPolicyBigMap
        if game == "sokoban":
            policy = FullyConvPolicySmallMap
    else:
        policy = CustomPolicyBigMap
        if game == "sokoban":
            policy = CustomPolicySmallMap
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
    n = max_exp_idx(exp_name)
    global log_dir
    if not resume:
        n = n + 1
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    if not resume:
        os.mkdir(log_dir)
    else:
        model = load_model(log_dir)
    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }
    used_dir = log_dir
    if not logging:
        used_dir = None
    env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs)
    if not resume or model is None:
        model = PPO2(policy, env, verbose=1, tensorboard_log="./runs")
    else:
        model.set_env(env)
    if not logging:
        model.learn(total_timesteps=int(steps), tb_log_name=exp_name)
    else:
        model.learn(total_timesteps=int(steps),
                    tb_log_name=exp_name,
                    callback=callback)
예제 #5
0
def infer(game, representation, model_path, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    env_name = '{}-{}-v0'.format(game, representation)
    if game == "binary":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        model.FullyConvPolicy = model.FullyConvPolicyBigMap
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        model.FullyConvPolicy = model.FullyConvPolicySmallMap
        kwargs['cropped_size'] = 10

    env = make_vec_envs(env_name, representation, None, 1, **kwargs)
    agent = PPO2.load(model_path, env=env)
    obs = env.reset()

    obs = env.reset()
    dones = False
    successful_levels = 0.0
    total_iterations = 0.0
    for i in range(kwargs.get('trials', 1)):
        while not dones:
            total_iterations += 1
            action, _ = agent.predict(obs)
            obs, _, dones, info = env.step(action)
            if kwargs.get('verbose', False):
                # print(info[0])
                pass
            if info[0]['solved']:
                successful_levels += 1
                dones = True
            if dones:
                break
    return successful_levels / total_iterations
예제 #6
0
from model import FullyConvPolicyBigMap, CustomPolicyBigMap

from utils import make_vec_envs

from stable_baselines.gail import generate_expert_traj, ExpertDataset
from stable_baselines import PPO2
import time
import numpy as np

# THIS SECTION IS FOR GEN EXP TRAJ
kwargs_dict = {'resume': False, 'render': False}
log_dir = f'runs/wide'

env_name = f"zelda-wide-v0"
policy = FullyConvPolicyBigMap
env = make_vec_envs(env_name, "wide", log_dir, n_cpu=1, **kwargs_dict)

model = PPO2(policy, env, verbose=1, tensorboard_log=f"./runs/wide")
a_dict = generate_expert_traj(model,
                              'expert_wide',
                              n_timesteps=int(0),
                              n_episodes=1)
print(a_dict)

numpy_dict = np.load('expert_wide.npz')
print(type(numpy_dict))
print(list(numpy_dict.keys()))

# ['actions', 'obs', 'rewards', 'episode_returns', 'episode_starts']
print(f"ACTIONS")
print(f"=============================")
def bootstrap_envs_and_buffer(args: Namespace):
    """Method to bootstrap the envs, buffer and related objects"""

    logbook = make_logbook(args=args)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    utils.make_dir(args.work_dir)

    with open(os.path.join(args.work_dir, "args.json"), "w") as f:
        json.dump(vars(args), f, sort_keys=True, indent=4)

    dummy_env = utils.make_dummy_env(args=args)

    pixel_space_obs = dummy_env.env.env._get_observation_space_for_pixel_space(
        args.image_size, args.image_size
    )
    state_space_obs = dummy_env.env.env._get_observation_space_for_state_space()

    action_size = dummy_env.action_space.shape[0]

    train_replay_buffer = create_multi_env_replay_buffer(
        args=args, env=dummy_env, device=device, num_envs=args.num_train_envs
    )

    eval_replay_buffer = create_multi_env_replay_buffer(
        args=args, env=dummy_env, device=device, num_envs=args.num_eval_envs
    )

    (
        fns_to_make_train_envs,
        fns_to_make_eval_envs,
    ) = make_fns_to_make_train_and_eval_envs(args=args)

    max_episode_steps = dummy_env._max_episode_steps

    vec_train_envs = utils.make_vec_envs(
        fns_to_make_envs=fns_to_make_train_envs, device=None,
    )

    vec_eval_envs = utils.make_vec_envs(
        fns_to_make_envs=fns_to_make_eval_envs, device=None,
    )

    logging_dict = {
        "steps": [],
        "model_error_in_latent_state": [],
        "model_error_in_eta_state": [],
        "reward_error": [],
        "decoding_error": [],
        "test_model_error_in_latent_state": [],
        "test_model_error_in_eta_state": [],
        "test_reward_error": [],
        "test_decoding_error": [],
        "discriminator_loss": [],
        "encoder_discriminator_loss": [],
        "test_encoder_discriminator_loss": [],
    }

    return (
        logbook,
        device,
        vec_train_envs,
        vec_eval_envs,
        state_space_obs,
        pixel_space_obs,
        action_size,
        train_replay_buffer,
        eval_replay_buffer,
        logging_dict,
        max_episode_steps,
    )
예제 #8
0
def bootstrap_expert(args: Namespace):

    utils.set_seed_everywhere(args.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    fns_to_make_train_envs = [
        utils.fn_to_make_env(args=args,
                             seed=seed,
                             resource_files=None,
                             camera_id=0)
        for seed in range(args.num_train_envs)
    ]

    fns_to_make_eval_envs = [
        utils.fn_to_make_env(args=args,
                             seed=seed,
                             resource_files=None,
                             camera_id=0) for seed in range(args.num_eval_envs)
    ]

    vec_train_envs = utils.make_vec_envs(
        fns_to_make_envs=fns_to_make_train_envs, device=None)

    vec_eval_envs = utils.make_vec_envs(fns_to_make_envs=fns_to_make_eval_envs,
                                        device=None)

    dummy_env = utils.make_env(args, 0, resource_files=None, camera_id=0)

    video_dir, model_dir, buffer_dir, video = make_dirs_and_recorders(
        args=args)

    validate_env(dummy_env)

    replay_buffer = utils.MultiEnvReplayBuffer(
        obs_shape=dummy_env.observation_space.shape,
        action_shape=dummy_env.action_space.shape,
        capacity=args.replay_buffer_capacity,
        batch_size=args.batch_size,
        device=device,
        num_envs=args.num_train_envs,
    )

    agent = make_expert(
        obs_shape=dummy_env.observation_space.shape,
        action_shape=dummy_env.action_space.shape,
        args=args,
        device=device,
    )

    L = VecLogger(args.work_dir,
                  use_tb=args.save_tb,
                  num_envs=args.num_train_envs)

    max_episode_steps = dummy_env._max_episode_steps
    return (
        vec_train_envs,
        vec_eval_envs,
        max_episode_steps,
        video_dir,
        model_dir,
        buffer_dir,
        video,
        device,
        replay_buffer,
        agent,
        L,
    )
예제 #9
0
def infer(game, representation, experiment, infer_kwargs, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    infer_kwargs = {
            **infer_kwargs,
            'inference': True,
            'render': True,
            }
    max_trials = kwargs.get('max_trials', -1)
    n = kwargs.get('n', None)
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    if n is None:
        n = max_exp_idx(exp_name)
    if n == 0:
        raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name))
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    model = load_model(log_dir)
    # no log dir, 1 parallel environment
    n_cpu = infer_kwargs.get('n_cpu', 12)
    env = make_vec_envs(env_name, representation, None, n_cpu, **infer_kwargs)
    obs = env.reset()
    # Record final values of each trial
    if 'binary' in env_name:
        path_lengths = []
        changes = []
        regions = []
        infer_info = {
            'path_lengths': [],
            'changes': [],
            'regions': [],
            }
    n_trials = 0
    while n_trials != max_trials:
       #action = get_action(obs, env, model)
        action, _ = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        reward = rewards[0]
        n_regions = info[0]['regions']
        readouts = []
        if 'binary' in env_name:
            curr_path_length = info[0]['path-length']
            readouts.append('path length: {}'.format(curr_path_length) )
            path_lengths.append(curr_path_length)
            changes.append(info[0]['changes'])
            regions.append(info[0]['regions'])

        readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)]
        stringexec = ""
        m=0
        y0, dy = 50, 40
        img = np.zeros((256,512,3), np.uint8)
        scale_percent = 60 # percent of original size
        width = int(img.shape[1] * scale_percent / 100)
        height = int(img.shape[0] * scale_percent / 100)
        dim = (width, height)
        # resize image
        for i, line in enumerate(readouts):
            y = y0 + i*dy
            cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType)
           #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n"
           #m += 100
        #cv2.putText(
        #    img,readout,
        #    topLeftCornerOfText,
        #    font,
        #    fontScale,
        #    fontColor,
        #    lineType)
        #Display the image
        resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
        cv2.imshow("img",resized)
        cv2.waitKey(1)
       #for p, v in model.get_parameters().items():
       #    print(p, v.shape)
        if dones:
           #show_state(env, path_lengths, changes, regions, n_step)
            if 'binary' in env_name:
                infer_info['path_lengths'] = path_lengths[-1]
                infer_info['changes'] = changes[-1]
                infer_info['regions'] = regions[-1]
            n_trials += 1
    return infer_info
예제 #10
0
def bootstrap_setup_for_rl(args: argparse.Namespace):
    """Method to bootstrap the setup"""

    utils.set_seed_everywhere(args.seed)

    (
        logbook,
        device,
        train_envs,
        eval_envs,
        obs_shape,
        action_size,
        train_replay_buffer,
        eval_replay_buffer,
        logging_dict,
    ) = bootstrap_setup(args)

    args.video_dir = utils.make_dir(os.path.join(args.work_dir, "video"))
    args.model_dir = utils.make_dir(os.path.join(args.work_dir, "model"))
    args.buffer_dir = utils.make_dir(os.path.join(args.work_dir, "buffer"))

    # video = VideoRecorder(video_dir if args.save_video else None)

    logging_dict = {
        "steps": [],
        "model_error_in_latent_state": [],
        "model_error_in_eta_state": [],
        "reward_error": [],
        "decoding_error": [],
        "test_model_error_in_latent_state": [],
        "test_model_error_in_eta_state": [],
        "test_reward_error": [],
        "test_decoding_error": [],
    }

    logger = Logger(args.work_dir, use_tb=args.save_tb, logbook=logbook)

    # train_envs =  utils.make_vec_envs(envs = train_envs,
    #               device=None,
    #               num_frame_stack=args.frame_stack)

    # eval_envs =  utils.make_vec_envs(envs = eval_envs,
    #               device=None,
    #               num_frame_stack=args.frame_stack)

    (
        fns_to_make_train_envs,
        fns_to_make_eval_envs,
    ) = make_fns_to_make_train_and_eval_envs(args=args)

    max_episode_steps = train_envs[0]._max_episode_steps

    train_envs = utils.make_vec_envs(
        fns_to_make_envs=fns_to_make_train_envs,
        device=None,
        num_frame_stack=args.frame_stack,
    )

    eval_envs = utils.make_vec_envs(
        fns_to_make_envs=fns_to_make_eval_envs,
        device=None,
        num_frame_stack=args.frame_stack,
    )

    return (
        logbook,
        device,
        train_envs,
        eval_envs,
        obs_shape,
        action_size,
        train_replay_buffer,
        eval_replay_buffer,
        logging_dict,
        logger,
        max_episode_steps,
    )