def _define_planner(planner, horizon, config, params): if planner == 'cem': planner_fn = tools.bind( control.planning.cross_entropy_method, beams=params.get('planner_beams', 1000), iterations=params.get('planner_iterations', 10), topk=params.get('planner_topk', 100), horizon=horizon) elif planner == 'policy_sample': planner_fn = tools.bind( control.planning.action_head_policy, strategy='sample', config=config) elif planner == 'policy_mode': planner_fn = tools.bind( control.planning.action_head_policy, strategy='mode', config=config) elif planner == 'curious_sample': planner_fn = tools.bind( control.planning.action_head_policy, strategy='curious_sample', config=config) elif planner == 'random_sample': planner_fn = tools.bind( control.planning.action_head_policy, strategy='random_sample', config=config) else: raise NotImplementedError(planner) return planner_fn
def gym_cheetah(config, params): # Works with `isolate_envs: process`. action_repeat = params.get('action_repeat', 1) state_components = ['state'] env_ctor = tools.bind(_gym_env, 'HalfCheetah-v3', config, params, action_repeat) return Task('gym_cheetah', env_ctor, state_components)
def _tasks(config, params): config.isolate_envs = params.get('isolate_envs', 'thread') train_tasks, test_tasks = [], [] for name in params.get('tasks', ['cheetah_run']): try: train_tasks.append(getattr(tasks_lib, name)(config, params, 'train')) test_tasks.append(getattr(tasks_lib, name)(config, params, 'test')) except TypeError: train_tasks.append(getattr(tasks_lib, name)(config, params)) test_tasks.append(getattr(tasks_lib, name)(config, params)) def common_spaces_ctor(task, action_spaces): env = task.env_ctor() env = control.wrappers.SelectObservations(env, ['image']) env = control.wrappers.PadActions(env, action_spaces) return env if len(train_tasks) > 1: action_spaces = [task.env_ctor().action_space for task in train_tasks] for index, task in enumerate(train_tasks): env_ctor = tools.bind(common_spaces_ctor, task, action_spaces) train_tasks[index] = tasks_lib.Task(task.name, env_ctor, []) if len(test_tasks) > 1: action_spaces = [task.env_ctor().action_space for task in test_tasks] for index, task in enumerate(test_tasks): env_ctor = tools.bind(common_spaces_ctor, task, action_spaces) test_tasks[index] = tasks_lib.Task(task.name, env_ctor, []) if config.gradient_heads == 'all_but_image': config.gradient_heads = train_tasks[0].state_components diags = params.get('state_diagnostics', True) for name in train_tasks[0].state_components + ['reward', 'pcont']: if name not in config.gradient_heads + ['reward', 'pcont'] and not diags: continue kwargs = {} kwargs['stop_gradient'] = name not in config.gradient_heads if name == 'pcont': kwargs['dist'] = 'binary' default = dict(reward=2).get(name, config.num_layers) kwargs['num_layers'] = params.get(name + '_layers', default) config.heads[name] = tools.bind(config.head_network, **kwargs) config.loss_scales[name] = 1.0 if config.vanilla_curious_run: if name=='reward': config.heads['reward_int'] = tools.bind(config.head_network, **kwargs) config.loss_scales['reward_int'] = 1.0 config.train_tasks = train_tasks config.test_tasks = test_tasks return config
def finger_turn_hard(config, params): action_repeat = params.get('action_repeat', 2) state_components = [ 'reward', 'position', 'velocity', 'touch', 'target_position', 'dist_to_target' ] env_ctor = tools.bind(_dm_control_env, 'finger', 'turn_hard', config, params, action_repeat) return Task('finger_turn_hard', env_ctor, state_components)
def humanoid_walk(config, params): action_repeat = params.get('action_repeat', 2) state_components = [ 'reward', 'com_velocity', 'extremities', 'head_height', 'joint_angles', 'torso_vertical', 'velocity' ] env_ctor = tools.bind(_dm_control_env, 'humanoid', 'walk', config, params, action_repeat) return Task('humanoid_walk', env_ctor, state_components)
def quadruped_fetch(config, params): action_repeat = params.get('action_repeat', 2) env_ctor = tools.bind(_dm_control_env, 'quadruped', 'fetch', config, params, action_repeat, camera_id=2, normalize_actions=True) return Task('quadruped_fetch', env_ctor, [])
def gym_racecar(config, params): # Works with `isolate_envs: thread`. action_repeat = params.get('action_repeat', 1) env_ctor = tools.bind(_gym_env, 'CarRacing-v0', config, params, action_repeat, select_obs=[], obs_is_image=False, render_mode='state_pixels') return Task('gym_racing', env_ctor, [])
def _active_collection(tasks, collects, defaults, config, params): sims = tools.AttrDict() for task in tasks: for user_collect in collects: for key in user_collect: if key not in defaults: message = 'Invalid key {} in activation collection config.' raise KeyError(message.format(key)) collect = tools.AttrDict(defaults, _unlocked=True) collect.update(user_collect) collect.planner = _define_planner( collect.planner, collect.horizon, config, params) collect.objective = tools.bind( getattr(objectives_lib, collect.objective), params=params) adaptation_condition = (collect.prefix=='train' and config.curious_run and config.adaptation) or (collect.prefix=='train' and config.random_run and config.adaptation) or (collect.prefix=='train' and config.vanilla_curious_run and config.adaptation) if adaptation_condition: collect.secondary_planner = _define_planner( collect.secondary_planner, collect.horizon, config, params) collect.secondary_objective = tools.bind( getattr(objectives_lib, collect.secondary_objective), params=params) if collect.give_rewards: collect.task = task else: env_ctor = tools.bind( lambda ctor: control.wrappers.NoRewardHint(ctor()), task.env_ctor) collect.task = tasks_lib.Task(task.name, env_ctor) collect.exploration = tools.AttrDict( scale=collect.action_noise_scale, type=collect.action_noise_type, schedule=tools.bind( tools.schedule.linear, ramp=collect.action_noise_ramp, min=collect.action_noise_min), factors=collect.action_noise_factors) name = '{}_{}_{}'.format(collect.prefix, collect.name, task.name) assert name not in sims, (set(sims.keys()), name) sims[name] = collect return sims
def _data_processing(config, params): # config.batch_shape = params.get('batch_shape', (50, 50)) config.batch_shape = params.get('batch_shape', (10, 10)) config.num_chunks = params.get('num_chunks', 1) image_bits = params.get('image_bits', 8) config.preprocess_fn = tools.bind( tools.preprocess.preprocess, bits=image_bits) config.postprocess_fn = tools.bind( tools.preprocess.postprocess, bits=image_bits) config.open_loop_context = 5 config.data_reader = tools.bind( tools.numpy_episodes.episode_reader, clip_rewards=params.get('clip_rewards', False), pcont_scale=params.get('pcont_scale', 0.99)) config.data_loader = { 'cache': tools.bind( tools.numpy_episodes.cache_loader, every=params.get('loader_every', 1000)), 'recent': tools.bind( tools.numpy_episodes.recent_loader, every=params.get('loader_every', 1000)), 'window': tools.bind( tools.numpy_episodes.window_loader, window=params.get('loader_window', 400), every=params.get('loader_every', 1000)), 'reload': tools.numpy_episodes.reload_loader, 'dummy': tools.numpy_episodes.dummy_loader, }[params.get('loader', 'cache')] config.gpu_prefetch = params.get('gpu_prefetch', False) return config
def carbongym_pickplace(config, params): # Works with `isolate_envs: process`. state_components = ['state'] env_ctor = tools.bind( _carbongym_env, '', config, params, GymFrankaPickAndPlaceVecEnv, # '/home/murtaza/research/hrl-exp/cfg/run_franka_pick_and_place.yaml' '/home/pathak-visitor1/projects/hrl-exp-proj/hrl-exp/cfg/run_franka_pick_and_place.yaml' ) return Task('carbongym_pickplace', env_ctor, state_components)
def quadruped_walk(config, params): action_repeat = params.get('action_repeat', 2) state_components = [ 'reward', 'egocentric_state', 'force_torque', 'imu', 'torso_upright', 'torso_velocity' ] env_ctor = tools.bind(_dm_control_env, 'quadruped', 'walk', config, params, action_repeat, camera_id=2, normalize_actions=True) return Task('quadruped_walk', env_ctor, state_components)
def process(logdir, rolloutdir, args): with args.params.unlocked: args.params.logdir = logdir args.params.rolloutdir = rolloutdir config = configs.make_config(args.params) logdir = pathlib.Path(logdir) metrics = tools.Metrics(logdir / 'metrics', workers=5) training.utility.collect_initial_episodes(metrics, config) tf.reset_default_graph() dataset = tools.numpy_episodes.numpy_episodes( config.train_dir, config.test_dir, config.batch_shape, reader=config.data_reader, loader=config.data_loader, config=config, num_chunks=config.num_chunks, preprocess_fn=config.preprocess_fn, gpu_prefetch=config.gpu_prefetch) metrics = tools.InGraphMetrics(metrics) build_graph = tools.bind(training.define_model, logdir, metrics) for score in training.utility.train(build_graph, dataset, logdir, config): yield score
def reacher_hard(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['position', 'velocity', 'to_target'] env_ctor = tools.bind(_dm_control_env, 'reacher', 'hard', config, params, action_repeat) return Task('reacher_hard', env_ctor, state_components)
def _loss_functions(config, params): for head in config.gradient_heads: assert head in config.heads, head config.imagination_horizon = params.get('imagination_horizon', 15) if config.curious_run or config.vanilla_curious_run: config.exploration_imagination_horizon = params.get('exploration_imagination_horizon',15) config.imagination_skip_last = params.get('imagination_skip_last', None) config.imagination_include_initial = params.get( 'imagination_include_initial', True) config.action_source = params.get('action_source', 'model') config.action_model_horizon = params.get('action_model_horizon', None) config.action_bootstrap = params.get('action_bootstrap', True) if config.curious_run or config.vanilla_curious_run: config.curious_action_bootstrap = params.get('curious_action_bootstrap', True) config.action_discount = params.get('action_discount', 0.99) config.action_lambda = params.get('action_lambda', 0.95) config.action_target_update = params.get('action_target_update', 1) config.action_target_period = params.get('action_target_period', 50000) config.action_loss_pcont = params.get('action_loss_pcont', False) config.action_pcont_stop_grad = params.get('action_pcont_stop_grad', False) config.action_pcont_weight = params.get('action_pcont_weight', True) config.action_lr = params.get('action_lr', 8e-5) config.value_source = params.get('value_source', 'model') config.value_model_horizon = params.get('value_model_horizon', None) config.value_discount = params.get('value_discount', 0.99) config.value_lambda = params.get('value_lambda', 0.95) config.value_bootstrap = params.get('value_bootstrap', True) if config.curious_run or config.vanilla_curious_run: config.curious_value_bootstrap = params.get('curious_value_bootstrap', True) config.value_target_update = params.get('value_target_update', 1) config.value_target_period = params.get('value_target_period', 50000) config.value_loss_pcont = params.get('value_loss_pcont', False) config.value_pcont_weight = params.get('value_pcont_weight', True) config.value_maxent = params.get('value_maxent', False) config.value_lr = params.get('value_lr', 8e-5) config.action_beta = params.get('action_beta', 0.0) config.action_beta_dims_value = params.get('action_beta_dims_value', None) config.state_beta = params.get('state_beta', 0.0) config.stop_grad_pre_action = params.get('stop_grad_pre_action', True) config.pcont_label_weight = params.get('pcont_label_weight', None) config.loss_scales.divergence = params.get('divergence_scale', 1.0) config.loss_scales.global_divergence = params.get('global_div_scale', 0.0) config.loss_scales.overshooting = params.get('overshooting_scale', 0.0) for head in config.heads: if head in ('value_target', 'action_target'): # Untrained. continue config.loss_scales[head] = params.get(head + '_loss_scale', 1.0) if config.curious_run or config.combination_run: for mdl in range(config.num_models): config.loss_scales['one_step_model_'+str(mdl)] = params.get('one_step_model_scale',1.0) config.free_nats = params.get('free_nats', 3.0) config.overshooting_distance = params.get('overshooting_distance', 0) config.os_stop_posterior_grad = params.get('os_stop_posterior_grad', True) config.cpc_contrast = params.get('cpc_contrast', 'window') config.cpc_batch_amount = params.get('cpc_batch_amount', 10) config.cpc_time_amount = params.get('cpc_time_amount', 30) optimizer_cls = tools.bind( tf.train.AdamOptimizer, epsilon=params.get('optimizer_epsilon', 1e-4)) config.optimizers = tools.AttrDict() config.optimizers.default = tools.bind( tools.CustomOptimizer, optimizer_cls=optimizer_cls, # schedule=tools.bind(tools.schedule.linear, ramp=0), learning_rate=params.get('default_lr', 1e-3), clipping=params.get('default_gradient_clipping', 1000.0)) config.optimizers.model = config.optimizers.default.copy( learning_rate=params.get('model_lr', 6e-4), clipping=params.get('model_gradient_clipping', 100.0)) config.optimizers.value = config.optimizers.default.copy( learning_rate=params.get('value_lr', 8e-5), clipping=params.get('value_gradient_clipping', 100.0)) if params.get('curious_value_head', False): config.optimizers.curious_value = config.optimizers.default.copy( learning_rate=params.get('curious_value_lr', 8e-5), clipping=params.get('value_gradient_clipping', 100.0)) config.optimizers.action = config.optimizers.default.copy( learning_rate=params.get('action_lr', 8e-5), clipping=params.get('action_gradient_clipping', 100.0)) if params.get('curious_action_head', False): config.optimizers.curious_action = config.optimizers.default.copy( learning_rate=params.get('curious_action_lr', 8e-5), clipping=params.get('action_gradient_clipping', 100.0)) return config
def task_fn(name, game, config, params, mode): action_repeat = params.get('action_repeat', 4) env_ctor = tools.bind(_atari_env, game, mode, config, params, action_repeat) return Task(name, env_ctor, [])
def task_fn(name, level, config, params): action_repeat = params.get('action_repeat', 4) env_ctor = tools.bind(_dm_lab_env, level, config, params, action_repeat) return Task(name, env_ctor, [])
def acrobot_swingup(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['orientations', 'velocity'] env_ctor = tools.bind(_dm_control_env, 'acrobot', 'swingup', config, params, action_repeat) return Task('acrobot_swingup', env_ctor, state_components)
def manipulator_bring(config, params): action_repeat = params.get('action_repeat', 2) env_ctor = tools.bind(_dm_control_env, 'manipulator', 'bring_ball', config, params, action_repeat) return Task('manipulator_bring_ball', env_ctor, [])
def _model_components(config, params): config.gradient_heads = params.get('gradient_heads', ['image', 'reward']) if 'prediction_error' in params.get('defaults', ['dreamer']): config.gradient_heads.append('reward_int') config.activation = ACTIVATIONS[params.get('activation', 'elu')] config.num_layers = params.get('num_layers', 3) config.num_units = params.get('num_units', 400) encoder = params.get('encoder', 'conv') if encoder == 'conv': config.encoder = networks.conv.encoder elif encoder == 'proprio': config.encoder = tools.bind( networks.proprio.encoder, keys=params.get('proprio_encoder_keys'), num_layers=params.get('proprio_encoder_num_layers', 3), units=params.get('proprio_encoder_units', 300)) else: raise NotImplementedError(encoder) config.head_network = tools.bind( networks.feed_forward, num_layers=config.num_layers, units=config.num_units, activation=config.activation) config.heads = tools.AttrDict() config.vanilla_curious_run = False config.curious_run = False config.combination_run = False config.adaptation = params.get('adaptation', False) config.encoder_feature_shape = params.get('encoder_feature_shape', 1024) if 'prediction_error' in params.get('defaults', ['dreamer']): config.vanilla_curious_run = True config.freeze_extrinsic_heads = params.get('freeze_extrinsic_heads', True) config.adaptation = params.get('adaptation', False) config.use_data_ratio = params.get('use_data_ratio', False) config.encoder_feature_shape = params.get('encoder_feature_shape', 1024) config.exploration_episodes = params.get('exploration_episodes', 0) config.use_max_objective = params.get('use_max_objective', False) config.use_reinforce = params.get('use_reinforce',False) if config.adaptation: config.adaptation_step = params.get('adaptation_step', 5e6) config.secondary_horizon = params.get('secondary_horizon', 15) config.secondary_action_lr = params.get('secondary_action_lr', 8e-5) config.secondary_value_lr = params.get('secondary_value_lr', 8e-5) config.secondary_train_step = params.get('secondary_train_step', 50000) if 'disagree' in params.get('defaults', ['dreamer']): config.one_step_model = getattr(networks, 'one_step_model').one_step_model config.num_models = params.get('num_models', 5) config.curious_run = True config.bootstrap = params.get('bootstrap', True) config.intrinsic_reward_scale = params.get('intrinsic_reward_scale', 10000) config.ensemble_loss_scale = params.get('ensemble_loss_scale', 1) config.freeze_extrinsic_heads = params.get('freeze_extrinsic_heads', True) config.ensemble_model_type = params.get('ensemble_model_type', 1) config.model_width_factor = params.get('model_width_factor', 1) config.extrinsic_train_frequency = params.get('extrinsic_train_frequency', 1) config.slower_extrinsic_train = config.extrinsic_train_frequency>1 config.adaptation = params.get('adaptation', False) config.use_data_ratio = params.get('use_data_ratio', False) config.exploration_episodes = params.get('exploration_episodes', 0) config.encoder_feature_shape = params.get('encoder_feature_shape', 1024) config.normalize_reward = params.get('normalize_reward', False) config.use_max_objective = params.get('use_max_objective', False) config.use_reinforce = params.get('use_reinforce',False) if not config.use_max_objective: assert config.use_reinforce == False if config.normalize_reward: config.moving_average_stats = params.get('moving_average_stats', False) if config.adaptation: config.adaptation_step = params.get('adaptation_step', 5e6) config.secondary_horizon = params.get('secondary_horizon', 15) config.secondary_action_lr = params.get('secondary_action_lr', 8e-5) config.secondary_value_lr = params.get('secondary_value_lr', 8e-5) config.secondary_train_step = params.get('secondary_train_step', 50000) if 'combination' in params.get('defaults', ['dreamer']): config.combination_run = True config.extrinsic_coeff = params.get('extrinsic_coeff',1.0) config.intrinsic_coeff = params.get('intrinsic_coeff',0.01) config.one_step_model = getattr(networks, 'one_step_model').one_step_model config.num_models = params.get('num_models', 5) config.bootstrap = params.get('bootstrap', True) config.intrinsic_reward_scale = params.get('intrinsic_reward_scale', 10000) config.ensemble_loss_scale = params.get('ensemble_loss_scale', 1) config.freeze_extrinsic_heads = params.get('freeze_extrinsic_heads', True) config.ensemble_model_type = params.get('ensemble_model_type', 1) config.model_width_factor = params.get('model_width_factor', 1) config.adaptation = params.get('adaptation', False) config.encoder_feature_shape = params.get('encoder_feature_shape', 1024) config.normalize_reward = params.get('normalize_reward', False) config.use_max_objective = params.get('use_max_objective', False) config.use_reinforce = params.get('use_reinforce',False) if not config.use_max_objective: assert config.use_reinforce == False config.random_run = False if 'random' in params.get('defaults', ['dreamer']): config.random_run = True config.freeze_extrinsic_heads = params.get('freeze_extrinsic_heads', True) config.encoder_feature_shape = params.get('encoder_feature_shape', 1024) config.adaptation = params.get('adaptation', False) config.use_data_ratio = params.get('use_data_ratio', False) config.exploration_episodes = params.get('exploration_episodes', 0) if config.adaptation: config.adaptation_step = params.get('adaptation_step', 5e6) config.secondary_horizon = params.get('secondary_horizon', 15) config.secondary_action_lr = params.get('secondary_action_lr', 8e-5) config.secondary_value_lr = params.get('secondary_value_lr', 8e-5) config.secondary_train_step = params.get('secondary_train_step', 50000) if params.get('value_head', True): config.heads.value = tools.bind( config.head_network, num_layers=params.get('value_layers', 3), data_shape=[], dist=params.get('value_dist', 'normal')) if params.get('curious_value_head', False): config.heads.curious_value = tools.bind( config.head_network, num_layers=params.get('value_layers', 3), data_shape=[], dist=params.get('value_dist', 'normal')) if params.get('value_target_head', False): config.heads.value_target = tools.bind( config.head_network, num_layers=params.get('value_layers', 3), data_shape=[], stop_gradient=True, dist=params.get('value_dist', 'normal')) if params.get('return_head', False): config.heads['return'] = tools.bind( config.head_network, activation=config.activation) if params.get('action_head', True): config.heads.action = tools.bind( config.head_network, num_layers=params.get('action_layers', 4), mean_activation=ACTIVATIONS[ params.get('action_mean_activation', 'none')], dist=params.get('action_head_dist', 'tanh_normal_tanh'), std=params.get('action_head_std', 'learned'), min_std=params.get('action_head_min_std', 1e-4), init_std=params.get('action_head_init_std', 5.0)) if params.get('curious_action_head', False): config.heads.curious_action = tools.bind( config.head_network, num_layers=params.get('action_layers', 4), mean_activation=ACTIVATIONS[ params.get('action_mean_activation', 'none')], dist=params.get('action_head_dist', 'tanh_normal_tanh'), std=params.get('action_head_std', 'learned'), min_std=params.get('action_head_min_std', 1e-4), init_std=params.get('action_head_init_std', 5.0)) if params.get('action_target_head', False): config.heads.action_target = tools.bind( config.head_network, num_layers=params.get('action_layers', 4), stop_gradient=True, mean_activation=ACTIVATIONS[ params.get('action_mean_activation', 'none')], dist=params.get('action_head_dist', 'tanh_normal_tanh'), std=params.get('action_head_std', 'learned'), min_std=params.get('action_head_min_std', 1e-4), init_std=params.get('action_head_init_std', 5.0)) if params.get('cpc_head', False): config.heads.cpc = config.head_network.copy( dist=params.get('cpc_head_dist', 'normal'), std=params.get('cpc_head_std', 'learned'), num_layers=params.get('cpc_head_layers', 3)) image_head = params.get('image_head', 'conv') if image_head == 'conv': config.heads.image = tools.bind( networks.conv.decoder, std=params.get('image_head_std', 1.0)) else: raise NotImplementedError(image_head) hidden_size = params.get('model_size', 400) state_size = params.get('state_size', 60) model = params.get('model', 'rssm') if model == 'rssm': config.cell = tools.bind( models.RSSM, state_size, hidden_size, hidden_size, params.get('future_rnn', True), params.get('mean_only', False), params.get('min_stddev', 1e-1), config.activation, params.get('model_layers', 1)) else: raise NotImplementedError(model) return config
def cheetah_flip_backward(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['position', 'velocity'] env_ctor = tools.bind(_dm_control_env, 'cheetah', 'flip_backward', config, params, action_repeat) return Task('cheetah_flip_forward', env_ctor, state_components)
def cartpole_swingup(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['position', 'velocity'] env_ctor = tools.bind(_dm_control_env, 'cartpole', 'swingup', config, params, action_repeat) return Task('cartpole_swingup', env_ctor, state_components)
'dmlab_keys': 'rooms_keys_doors_puzzle', 'dmlab_lasertag_1': 'lasertag_one_opponent_small', 'dmlab_lasertag_3': 'lasertag_three_opponents_small', 'dmlab_recognize': 'psychlab_visual_search', 'dmlab_watermaze': 'rooms_watermaze', } for name, level in DMLAB_TASKS.items(): def task_fn(name, level, config, params): action_repeat = params.get('action_repeat', 4) env_ctor = tools.bind(_dm_lab_env, level, config, params, action_repeat) return Task(name, env_ctor, []) locals()[name] = tools.bind(task_fn, name, level) ATARI_TASKS = [ 'Alien', 'Amidar', 'Assault', 'Asterix', 'Asteroids', 'Atlantis', 'BankHeist', 'BattleZone', 'BeamRider', 'Berzerk', 'Bowling', 'Boxing', 'Breakout', 'Centipede', 'ChopperCommand', 'CrazyClimber', 'Defender', 'DemonAttack', 'DoubleDunk', 'Enduro', 'FishingDerby', 'Freeway', 'Frostbite', 'Gopher', 'Gravitar', 'Hero', 'IceHockey', 'Jamesbond', 'Kangaroo', 'Krull', 'KungFuMaster', 'MontezumaRevenge', 'MsPacman', 'NameThisGame', 'Phoenix', 'Pitfall', 'Pong', 'PrivateEye', 'Qbert', 'Riverraid', 'RoadRunner', 'Robotank', 'Seaquest', 'Skiing', 'Solaris', 'SpaceInvaders', 'StarGunner', 'Surround', 'Tennis', 'TimePilot', 'Tutankham', 'UpNDown', 'Venture', 'VideoPinball', 'WizardOfWor', 'YarsRevenge', 'Zaxxon' ] ATARI_TASKS = {'atari_{}'.format(game.lower()): game for game in ATARI_TASKS}
def walker_stand(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['height', 'orientations', 'velocity'] env_ctor = tools.bind(_dm_control_env, 'walker', 'stand', config, params, action_repeat) return Task('walker_stand', env_ctor, state_components)
def finger_spin(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['position', 'velocity', 'touch'] env_ctor = tools.bind(_dm_control_env, 'finger', 'spin', config, params, action_repeat) return Task('finger_spin', env_ctor, state_components)
def fish_upright(config, params): action_repeat = params.get('action_repeat', 2) env_ctor = tools.bind(_dm_control_env, 'fish', 'upright', config, params, action_repeat) return Task('fish_upright', env_ctor, [])
def cup_catch(config, params): action_repeat = params.get('action_repeat', 2) state_components = ['position', 'velocity'] env_ctor = tools.bind(_dm_control_env, 'ball_in_cup', 'catch', config, params, action_repeat) return Task('cup_catch', env_ctor, state_components)
def pointmass_easy(config, params): action_repeat = params.get('action_repeat', 2) env_ctor = tools.bind(_dm_control_env, 'point_mass', 'easy', config, params, action_repeat) return Task('pointmass_easy', env_ctor, [])