def get_variant_spec_base(universe, domain, task, policy, algorithm): algorithm_params = deep_update(ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {})) algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})) variant_spec = { 'domain': domain, 'task': task, 'universe': universe, 'git_sha': get_git_rev(), 'env_params': ENV_PARAMS.get(domain, {}).get(task, {}), 'policy_params': deep_update(POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': 1e6, } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, } }, 'run_params': { 'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, }, } return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm, sampler, replay_pool): algorithm_params = deep_update( ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, DEFAULT_ALGORITHM_DOMAIN_PARAMS)) algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})) variant_spec = { 'git_sha': get_git_rev(), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': (ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})), }, 'evaluation': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'environment_params']['training'])), }, 'policy_params': deep_update(POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': algorithm_params, 'replay_pool_params': deep_update(REPLAY_POOL_PARAMS_BASE[replay_pool]), 'sampler_params': deep_update( SAMPLER_PARAMS_BASE[sampler], SAMPLER_PARAMS_PER_DOMAIN.get(domain, DEFAULT_SAMPLER_DOMAIN_PARAMS)), 'run_params': { 'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get(domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, }, } return variant_spec
def get_variant_spec(args): algorithm = args.algorithm layer_size = 128 variant_spec = { 'seed': 1, 'layer_size': layer_size, 'policy_params': { 'type': 'GaussianPolicy', 'kwargs': { 'hidden_layer_sizes': (layer_size, layer_size), 'squash': True, }, }, 'algorithm_params': deep_update( ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}) ), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (layer_size, layer_size), } }, 'run_params': {} } return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm): algorithm_params = deep_update( ALGORITHM_PARAMS_BASE, get_algorithm_params(universe, domain, task), ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}), ) variant_spec = { 'git_sha': get_git_rev(__file__), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': get_environment_params(universe, domain, task), }, 'evaluation': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'environment_params']['training'])), }, 'policy_params': get_policy_params(universe, domain, task), 'exploration_policy_params': { 'type': 'UniformPolicy', 'kwargs': { 'observation_keys': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'policy_params']['kwargs'].get('observation_keys'))) }, }, 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), 'observation_keys': None, 'observation_preprocessors_params': {} } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': int(1e6) } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': get_max_path_length(universe, domain, task), 'min_pool_size': get_max_path_length(universe, domain, task), 'batch_size': 256, } }, 'run_params': { 'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': tune.sample_from(get_checkpoint_frequency), 'checkpoint_replay_pool': False, }, } return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm, env_params): algorithm_params = deep_update( env_params, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) ) algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}) ) variant_spec = { # 'git_sha': get_git_rev(), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': ( ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})), }, 'evaluation': lambda spec: ( spec['environment_params']['training']), }, 'policy_params': deep_update( POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}) ), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': lambda spec: ( { 'SimpleReplayPool': int(1e6), 'TrajectoryReplayPool': int(1e4), }.get(spec['replay_pool_params']['type'], int(1e6)) ), } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, } }, 'run_params': { 'seed': 88, 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get( domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, 'info': '' }, } return variant_spec
def get_variant_spec_base(universe, domain, task, task_eval, policy, algorithm, from_vision): algorithm_params = ALGORITHM_PARAMS_BASE algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})) variant_spec = { 'git_sha': get_git_rev(), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': get_environment_params(universe, domain, task, from_vision), }, 'evaluation': { 'domain': domain, 'task': task_eval, 'universe': universe, 'kwargs': (tune.sample_from(lambda spec: (spec.get('config', spec)[ 'environment_params']['training'].get('kwargs'))) if task == task_eval else get_environment_params( universe, domain, task_eval, from_vision)), }, }, 'policy_params': deep_update(POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})), 'exploration_policy_params': { 'type': 'UniformPolicy', 'kwargs': { 'observation_keys': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'policy_params']['kwargs'].get('observation_keys'))) }, }, 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), 'observation_keys': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'policy_params']['kwargs'].get('observation_keys'))), 'observation_preprocessors_params': {} } }, 'distance_fn_params': get_distance_fn_params(universe, domain, task), 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { # 'max_size': int(5e5), 'max_size': tune.grid_search([int(5e4)]), } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': get_max_path_length(universe, domain, task), 'min_pool_size': 50, 'batch_size': 256, # tune.grid_search([128, 256]), 'store_last_n_paths': 20, } }, 'run_params': { 'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': False, 'checkpoint_frequency': tune.sample_from(get_checkpoint_frequency), 'checkpoint_replay_pool': False, }, } # Filter out parts of the state relating to the object when training from pixels env_kwargs = variant_spec['environment_params']['training']['kwargs'] if from_vision and "device_path" not in env_kwargs.keys(): env_obs_keys = env_kwargs.get('observation_keys', tuple()) non_image_obs_keys = tuple(key for key in env_obs_keys if key != 'pixels') variant_spec['replay_pool_params']['kwargs'][ 'obs_save_keys'] = non_image_obs_keys non_object_obs_keys = tuple(key for key in env_obs_keys if 'object' not in key) variant_spec['policy_params']['kwargs'][ 'observation_keys'] = variant_spec['exploration_policy_params'][ 'kwargs']['observation_keys'] = variant_spec['Q_params'][ 'kwargs']['observation_keys'] = variant_spec[ 'distance_fn_params']['kwargs'][ 'observation_keys'] = non_object_obs_keys return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm): algorithm_params = deep_update( ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) ) algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}) ) variant_spec = { 'git_sha': get_git_rev(__file__), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': ( ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})), }, 'evaluation': tune.sample_from(lambda spec: ( spec.get('config', spec) ['environment_params'] ['training'] )), }, 'policy_params': deep_update( POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}) ), 'exploration_policy_params': { 'type': 'UniformPolicy', 'kwargs': { 'observation_keys': tune.sample_from(lambda spec: ( spec.get('config', spec) ['policy_params'] ['kwargs'] .get('observation_keys') )) }, }, 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), 'observation_keys': None, 'observation_preprocessors_params': {} } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': tune.sample_from(lambda spec: ( { 'SimpleReplayPool': int(1e6), 'TrajectoryReplayPool': int(1e4), }.get( spec.get('config', spec) ['replay_pool_params'] ['type'], int(1e6)) )), } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, } }, 'run_params': { 'seed': tune.sample_from( lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get( domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, }, } return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm): algorithm_params = ALGORITHM_PARAMS_BASE algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}), ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) ) variant_spec = { 'git_sha': get_git_rev(__file__), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': ( ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})), }, 'evaluation': tune.sample_from(lambda spec: ( spec.get('config', spec) ['environment_params'] ['training'] )), }, 'policy_params': deep_update( POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}) ), 'exploration_policy_params': { 'type': 'UniformPolicy', 'kwargs': { 'observation_keys': tune.sample_from(lambda spec: ( spec.get('config', spec) ['policy_params'] ['kwargs'] .get('observation_keys') )) }, }, 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), 'observation_keys': None, 'observation_preprocessors_params': { 'observations': None, } } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': tune.sample_from(lambda spec: ( { 'SimpleReplayPool': int(5e5), 'TrajectoryReplayPool': int(1e4), }.get( spec.get('config', spec) ['replay_pool_params'] ['type'], int(1e6)) )), } }, 'sampler_params': deep_update({ 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': 1000, 'batch_size': 256, } }, SAMPLER_PARAMS_PER_DOMAIN.get(domain, {})), 'run_params': { 'seed': tune.sample_from( lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': tune.sample_from(lambda spec: ( 25000 // (spec.get('config', spec) ['algorithm_params'] ['kwargs'] ['epoch_length']) )), }, } if task == 'InfoScrewV2-v0': variant_spec['replay_pool_params']['kwargs']['include_images'] = True if task == 'ImageScrewV2-v0' and ENVIRONMENT_PARAMS['DClaw3']['ImageScrewV2-v0']['state_reward']: variant_spec['replay_pool_params']['kwargs']['super_observation_space_shape'] = (9+9+2+1+2,) if domain == 'HardwareDClaw3': variant_spec['sampler_params']['type'] == 'RemoteSampler' variant_spec['algorithm_params']['kwargs']['max_train_repeat_per_timestep'] = 1 return variant_spec
def get_variant_spec_base(universe, domain, task, task_evaluation, policy, algorithm): # algorithm_params = deep_update( # ALGORITHM_PARAMS_BASE, # ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) # ) # algorithm_params = deep_update( # algorithm_params, # ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}) # ) algorithm_params = ALGORITHM_PARAMS_BASE algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {})) variant_spec = { 'domain': domain, 'task': task, 'task_evaluation': task_evaluation, 'universe': universe, 'git_sha': get_git_rev(), 'env_params': ENV_PARAMS.get(domain, {}).get(task, {}), 'policy_params': deep_update(POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {})), 'exploration_policy_params': { 'type': 'UniformPolicy', 'kwargs': { 'observation_keys': tune.sample_from(lambda spec: (spec.get('config', spec)[ 'policy_params']['kwargs'].get('observation_keys'))) }, }, 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': algorithm_params, 'replay_pool_params': { # 'type': 'SimpleReplayPool', # 'type': 'RelabelReplayPool', 'type': 'HindsightExperienceReplayPool', 'kwargs': { 'max_size': 200000, # implement this 'update_batch_fn': tune.function(REPLACE_FLAT_OBSERVATION), #'reward_fn': tune.function(SACClassifier._reward_relabeler), 'reward_fn': None, 'terminal_fn': None, 'her_strategy': { 'resampling_probability': 0., # tune.grid_search([.5, 0.8]), 'type': 'future', } } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get(domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get(domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, 'store_last_n_paths': 20, } }, 'run_params': { 'seed': tune.sample_from(lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': DEFAULT_NUM_EPOCHS // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, }, } return variant_spec
def get_variant_spec_base(universe, domain, task, policy, algorithm, env_params): algorithm_params = deep_update( env_params, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) ) algorithm_params = deep_update( algorithm_params, ALGORITHM_PARAMS_ADDITIONAL.get(algorithm, {}) ) variant_spec = { 'git_sha': get_git_rev(), 'environment_params': { 'training': { 'domain': domain, 'task': task, 'universe': universe, 'kwargs': ( ENVIRONMENT_PARAMS.get(domain, {}).get(task, {})), }, 'evaluation': tune.sample_from(lambda spec: ( spec.get('config', spec) ['environment_params'] ['training'] )), }, 'policy_params': deep_update( POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}), {'log_dir':env_params['log_dir']}, ), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': algorithm_params, 'replay_pool_params': REPLAY_POOL_PARAMS_PER_ALGO.get(algorithm, REPLAY_POOL_PARAMS_PER_ALGO['default']), 'sampler_params': { 'type': SAMPLER_TYPES_PER_ALGO.get(algorithm, SAMPLER_TYPES_PER_ALGO['default']), 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, 'preprocess_type': 'default'#'default'#'pointgoal0' }, }, 'run_params': { 'seed': tune.sample_from( lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get( #@anyboby uncomment domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, # 'checkpoint_frequency': 1, 'checkpoint_replay_pool': False, }, } return variant_spec