def create_tasks(number_of_runs, log_dir, env_id): ts = list() alg = 'iwpg' defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id) task_number = 0 gen = hyper_parameter_generator(number_of_runs=number_of_runs) hms_time = time.strftime("%Y-%m-%d__%H-%M-%S") try: while True: generated_params = next(gen) task_number += 1 kwargs = defaults.copy() experiment_path = os.path.join('experiments_a', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, hms_time=hms_time, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: print(f'Created {task_number} tasks.') return ts
def create_tasks(number_of_runs, log_dir, env_id): ts = list() alg = 'iwpg' task_number = 0 gen = hyper_parameter_generator(number_of_runs) try: while True: generated_params = next(gen) task_number += 1 kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id) experiment_path = os.path.join('experiments_c', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) # deactivate reward scaling for manipulation tasks env_type, _ = utils.get_env_type(env_id=env_id) if env_type == 'gym_manipulator_envs' or env_type == 'bullet': kwargs['use_reward_scaling'] = False target_fn = run_iwpg_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: print(f'Created {task_number} tasks.') return ts
def __init__(self, alg: str, env_id: str, log_dir: str, seed: int, unparsed_args: list = () ) -> None: """ Class Constructor """ self.alg = alg self.env_id = env_id self.log_dir = log_dir self.seed = seed self.multi_thread = False self.num_runs = 1 self.training = False self.compiled = False self.trained = False self.default_kwargs = utils.get_defaults_kwargs(alg=alg, env_id=env_id) self.kwargs = self.default_kwargs.copy() # update algorithm kwargs with unparsed arguments from command line keys = [k[2:] for k in unparsed_args[0::2]] # remove -- from argument values = [eval(v) for v in unparsed_args[1::2]] unparsed_dict = {k: v for k, v in zip(keys, values)} self.kwargs.update(**unparsed_dict) self.logger_kwargs = None # defined by compile (a specific seed might be passed) self.exp_name = os.path.join(self.env_id, self.alg) # assigned by class methods self.model = None self.env = None self.scheduler = None
def learn(env_id, **kwargs) -> tuple: defaults = utils.get_defaults_kwargs(alg='npg', env_id=env_id) defaults.update(**kwargs) alg = NaturalPolicyGradientAlgorithm(env_id=env_id, **kwargs) ac, env = alg.learn() return ac, env
def create_tasks(number_of_runs, log_dir, env_id): ts = list() task_number = 1000 # === seed number alg = 'iwpg' if env_id == 'all': env_ids = [ 'HalfCheetahBulletEnv-v0', 'AntBulletEnv-v0', 'HopperBulletEnv-v0', 'Walker2DBulletEnv-v0', 'HumanoidBulletEnv-v0', 'ReacherBulletEnv-v0', 'PusherBulletEnv-v0', 'KukaBulletEnv-v0', ] else: env_ids = [ env_id, ] for env_id in env_ids: defaults = utils.get_defaults_kwargs(alg=alg, env_id=env_id) gen = hyper_parameter_generator(number_of_runs) try: while True: generated_params = next(gen) task_number += 1 kwargs = defaults.copy() experiment_path = os.path.join('experiments_d', env_id) logger_kwargs = setup_logger_kwargs(base_dir=log_dir, exp_name=experiment_path, seed=task_number, use_tensor_board=False, verbose=False) kwargs.update(logger_kwargs=logger_kwargs, seed=task_number, env_id=env_id, alg=alg, **generated_params) # deactivate reward scaling for manipulation tasks env_type, _ = utils.get_env_type(env_id=env_id) if env_type == 'gym_manipulator_envs' or env_type == 'bullet': kwargs['use_reward_scaling'] = False target_fn = run_training t = Task(id=task_number, target_function=target_fn, kwargs=kwargs) ts.append(t) except StopIteration: pass print(f'Created {task_number} tasks.') return ts
def check_alg(alg_name, env_id): """" Run one epoch update with algorithm.""" print(f'Run {alg_name}.') defaults = U.get_defaults_kwargs(alg=alg_name, env_id=env_id) defaults['epochs'] = 1 defaults['num_mini_batches'] = 4 defaults['steps_per_epoch'] = 1000 defaults['verbose'] = False learn_fn = U.get_learn_function(alg_name) defaults['logger_kwargs'] = setup_logger_kwargs(exp_name='unittest', seed=None, base_dir='/var/tmp/', datestamp=True, use_tensor_board=True, verbose=False) return learn_fn(env_id, **defaults)