Exemplo n.º 1
0
def run_task(vv, log_dir=None, exp_name=None):
    if log_dir or logger.get_dir() is None:
        logger.configure(dir=log_dir, exp_name=exp_name, format_strs=['csv'])
    logdir = logger.get_dir()
    assert logdir is not None
    os.makedirs(logdir, exist_ok=True)
    updated_vv = copy.copy(DEFAULT_CONFIG)
    updated_vv.update(**vv)
    main(vv_to_args(updated_vv))
Exemplo n.º 2
0
def run_task(vv, log_dir=None, exp_name=None):
    if log_dir or logger.get_dir() is None:
        logger.configure(dir=log_dir, exp_name=exp_name, format_strs=['csv'])
    logdir = logger.get_dir()
    assert logdir is not None
    os.makedirs(logdir, exist_ok=True)

    default_cfg = yaml.load(open('drq/config.yml', 'r'))
    cfg = update_config(default_cfg, vv)
    cfg = update_env_kwargs(cfg)
    workspace = Workspace(vv_to_args(cfg))
    workspace.run()
Exemplo n.º 3
0
def run_task(arg_vv, log_dir, exp_name):
    if arg_vv['algorithm'] == 'planet':
        from planet.config import DEFAULT_PARAMS
    elif arg_vv['algorithm'] == 'dreamer':
        from dreamer.config import DEFAULT_PARAMS
    else:
        raise NotImplementedError

    vv = DEFAULT_PARAMS
    vv.update(**arg_vv)
    vv = update_env_kwargs(vv)
    vv['max_episode_length'] = vv['env_kwargs']['horizon']

    # Configure logger
    logger.configure(dir=log_dir, exp_name=exp_name)
    logdir = logger.get_dir()
    assert logdir is not None
    os.makedirs(logdir, exist_ok=True)

    # Configure torch
    if torch.cuda.is_available():
        device = torch.device('cuda:1') if torch.cuda.device_count(
        ) > 1 else torch.device('cuda:0')
        torch.cuda.manual_seed(vv['seed'])
    else:
        device = torch.device('cpu')

    # Dump parameters
    with open(osp.join(logger.get_dir(), 'variant.json'), 'w') as f:
        json.dump(vv, f, indent=2, sort_keys=True)
    env = Env(vv['env_name'],
              vv['symbolic_env'],
              vv['seed'],
              vv['max_episode_length'],
              vv['action_repeat'],
              vv['bit_depth'],
              vv['image_dim'],
              env_kwargs=vv['env_kwargs'])

    if vv['algorithm'] == 'planet':
        from planet.planet_agent import PlaNetAgent
        agent = PlaNetAgent(env, vv, device)
        agent.train(train_epoch=vv['train_epoch'])
        env.close()
    elif vv['algorithm'] == 'dreamer':
        from dreamer.dreamer_agent import DreamerAgent
        agent = DreamerAgent(env, vv, device)
        agent.train(train_episode=vv['train_episode'])
        env.close()
Exemplo n.º 4
0
def run_task(vv, log_dir, exp_name):
    mp.set_start_method('spawn')
    env_name = vv['env_name']
    vv['algorithm'] = 'CEM'
    vv['env_kwargs'] = env_arg_dict[env_name]  # Default env parameters
    vv['plan_horizon'] = cem_plan_horizon[env_name]  # Planning horizon

    vv['population_size'] = vv['timestep_per_decision'] // vv['max_iters']
    if vv['use_mpc']:
        vv['population_size'] = vv['population_size'] // vv['plan_horizon']
    vv['num_elites'] = vv['population_size'] // 10
    vv = update_env_kwargs(vv)

    # Configure logger
    logger.configure(dir=log_dir, exp_name=exp_name)
    logdir = logger.get_dir()
    assert logdir is not None
    os.makedirs(logdir, exist_ok=True)

    # Configure torch
    if torch.cuda.is_available():
        torch.cuda.manual_seed(vv['seed'])

    # Dump parameters
    with open(osp.join(logger.get_dir(), 'variant.json'), 'w') as f:
        json.dump(vv, f, indent=2, sort_keys=True)

    env_symbolic = vv['env_kwargs']['observation_mode'] != 'cam_rgb'

    env_class = Env
    env_kwargs = {
        'env': vv['env_name'],
        'symbolic': env_symbolic,
        'seed': vv['seed'],
        'max_episode_length': 200,
        'action_repeat':
        1,  # Action repeat for env wrapper is 1 as it is already inside the env
        'bit_depth': 8,
        'image_dim': None,
        'env_kwargs': vv['env_kwargs']
    }
    env = env_class(**env_kwargs)

    env_kwargs_render = copy.deepcopy(env_kwargs)
    env_kwargs_render['env_kwargs']['render'] = True
    env_render = env_class(**env_kwargs_render)

    policy = CEMPolicy(env,
                       env_class,
                       env_kwargs,
                       vv['use_mpc'],
                       plan_horizon=vv['plan_horizon'],
                       max_iters=vv['max_iters'],
                       population_size=vv['population_size'],
                       num_elites=vv['num_elites'])
    # Run policy
    initial_states, action_trajs, configs, all_infos = [], [], [], []
    for i in range(vv['test_episodes']):
        logger.log('episode ' + str(i))
        obs = env.reset()
        policy.reset()
        initial_state = env.get_state()
        action_traj = []
        infos = []
        for j in range(env.horizon):
            logger.log('episode {}, step {}'.format(i, j))
            action = policy.get_action(obs)
            action_traj.append(copy.copy(action))
            obs, reward, _, info = env.step(action)
            infos.append(info)
        all_infos.append(infos)
        initial_states.append(initial_state.copy())
        action_trajs.append(action_traj.copy())
        configs.append(env.get_current_config().copy())

        # Log for each episode
        transformed_info = transform_info([infos])
        for info_name in transformed_info:
            logger.record_tabular('info_' + 'final_' + info_name,
                                  transformed_info[info_name][0, -1])
            logger.record_tabular('info_' + 'avarage_' + info_name,
                                  np.mean(transformed_info[info_name][0, :]))
            logger.record_tabular(
                'info_' + 'sum_' + info_name,
                np.sum(transformed_info[info_name][0, :], axis=-1))
        logger.dump_tabular()

    # Dump trajectories
    traj_dict = {
        'initial_states': initial_states,
        'action_trajs': action_trajs,
        'configs': configs
    }
    with open(osp.join(log_dir, 'cem_traj.pkl'), 'wb') as f:
        pickle.dump(traj_dict, f)

    # Dump video
    cem_make_gif(env_render, initial_states, action_trajs, configs,
                 logger.get_dir(), vv['env_name'] + '.gif')
Exemplo n.º 5
0
parser.add_argument('--test-interval', type=int, default=25, metavar='I', help='Test interval (episodes)')
parser.add_argument('--test-episodes', type=int, default=10, metavar='E', help='Number of test episodes')
parser.add_argument('--checkpoint-interval', type=int, default=50, metavar='I', help='Checkpoint interval (episodes)')
parser.add_argument('--checkpoint-experience', action='store_true', help='Checkpoint experience replay')
parser.add_argument('--models', type=str, default='', metavar='M', help='Load model checkpoint')
parser.add_argument('--experience-replay', type=str, default='', metavar='ER', help='Load experience replay')
parser.add_argument('--render', action='store_true', help='Render environment')
args = parser.parse_args()
args.overshooting_distance = min(args.chunk_size,
                                 args.overshooting_distance)  # Overshooting distance cannot be greater than chunk size
print(' ' * 26 + 'Options')
for k, v in vars(args).items():
    print(' ' * 26 + k + ': ' + str(v))

# Setup chester logging
logger.configure('./data', exp_name='test_logger_debug')

# Setup
results_dir = os.path.join('results', args.id)
os.makedirs(results_dir, exist_ok=True)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available() and not args.disable_cuda:
    args.device = torch.device('cuda')
    torch.cuda.manual_seed(args.seed)
else:
    args.device = torch.device('cpu')
metrics = {'steps': [], 'episodes': [], 'train_rewards': [], 'test_episodes': [], 'test_rewards': [],
           'observation_loss': [], 'reward_loss': [], 'kl_loss': []}

# Initialise training environment and experience replay memory
Exemplo n.º 6
0
def run_task(vv, log_dir, exp_name):
    import torch
    import numpy as np
    import copy
    import os, sys
    import time
    import math
    import random
    import json

    from get_args import get_args
    from DDPG.train_util import DDPG_train, DDPG_test
    from DDPG.DDPG_new import DDPG
    from DDPG.util import GaussNoise
    from chester import logger
    from BurgersEnv.Burgers import Burgers
    import utils.ptu as ptu

    if torch.cuda.is_available():
        ptu.set_gpu_mode(True)

    ### dump vv
    logger.configure(dir=log_dir, exp_name=exp_name)
    with open(os.path.join(logger.get_dir(), 'variant.json'), 'w') as f:
        json.dump(vv, f, indent=2, sort_keys=True)

    ### load vv
    ddpg_load_epoch = None
    if vv['load_path'] is not None:
        solution_data_path = vv['solution_data_path']
        dx = vv['dx']
        test_interval = vv['test_interval']
        load_path = os.path.join('data/local', vv['load_path'])
        ddpg_load_epoch = str(vv['load_epoch'])
        with open(os.path.join(load_path, 'variant.json'), 'r') as f:
            vv = json.load(f)
        vv['noise_beg'] = 0.1
        vv['solution_data_path'] = solution_data_path
        vv['test_interval'] = test_interval
        if vv.get('dx') is None:
            vv['dx'] = dx

    ### Important: fix numpy and torch seed!
    seed = vv['seed']
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

    ### Initialize RL agents
    ddpg = DDPG(
        vv, GaussNoise(initial_sig=vv['noise_beg'], final_sig=vv['noise_end']))
    agent = ddpg
    if ddpg_load_epoch is not None:
        print("load ddpg models from {}".format(
            os.path.join(load_path, ddpg_load_epoch)))
        agent.load(os.path.join(load_path, ddpg_load_epoch))

    ### Initialize training and testing encironments
    env = Burgers(vv, agent=agent)

    ### train models
    print('begining training!')
    DDPG_train(vv, env, agent)