def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) prepare_cudnn(args.deterministic, args.benchmark) if args.logdir is not None: os.makedirs(args.logdir, exist_ok=True) dump_environment(config, args.logdir, args.configs) if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 if args.logdir is not None: dump_code(args.expdir, args.logdir) env = ENVIRONMENTS.get_from_params(**config["environment"]) algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS trainer_fn = OffpolicyTrainer sync_epoch = False elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES: ALGORITHMS = ONPOLICY_ALGORITHMS trainer_fn = OnpolicyTrainer sync_epoch = True else: # @TODO: add registry for algorithms, trainers, samplers raise NotImplementedError() db_server = DATABASES.get_from_params( **config.get("db", {}), sync_epoch=sync_epoch ) algorithm_fn = ALGORITHMS.get(algorithm_name) algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config) if args.resume is not None: checkpoint = utils.load_checkpoint(filepath=args.resume) checkpoint = utils.any2device(checkpoint, utils.get_device()) algorithm.unpack_checkpoint( checkpoint=checkpoint, with_optimizer=False ) monitoring_params = config.get("monitoring_params", None) trainer = trainer_fn( algorithm=algorithm, env_spec=env, db_server=db_server, logdir=args.logdir, monitoring_params=monitoring_params, **config["trainer"], ) trainer.run()
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) if args.logdir is not None: os.makedirs(args.logdir, exist_ok=True) dump_config(config, args.logdir, args.configs) if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 env = ENVIRONMENTS.get_from_params(**config["environment"]) algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS trainer_fn = OffpolicyTrainer sync_epoch = False weights_sync_mode = "critic" if env.discrete_actions else "actor" elif algorithm_name in ONPOLICY_ALGORITHMS_NAMES: ALGORITHMS = ONPOLICY_ALGORITHMS trainer_fn = OnpolicyTrainer sync_epoch = True weights_sync_mode = "actor" else: # @TODO: add registry for algorithms, trainers, samplers raise NotImplementedError() db_server = DATABASES.get_from_params( **config.get("db", {}), sync_epoch=sync_epoch ) algorithm_fn = ALGORITHMS.get(algorithm_name) algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config) if args.resume is not None: algorithm.load_checkpoint(filepath=args.resume) trainer = trainer_fn( algorithm=algorithm, env_spec=env, db_server=db_server, logdir=args.logdir, weights_sync_mode=weights_sync_mode, **config["trainer"], ) trainer.run()
def main(args, unknown_args): args, config = parse_args_uargs(args, unknown_args) set_global_seed(args.seed) if args.logdir is not None: os.makedirs(args.logdir, exist_ok=True) dump_config(args.configs, args.logdir) if args.expdir is not None: module = import_module(expdir=args.expdir) # noqa: F841 algorithm_name = config["algorithm"].pop("algorithm") if algorithm_name in OFFPOLICY_ALGORITHMS_NAMES: ALGORITHMS = OFFPOLICY_ALGORITHMS trainer_fn = OffpolicyTrainer sync_epoch = False else: ALGORITHMS = ONPOLICY_ALGORITHMS trainer_fn = OnpolicyTrainer sync_epoch = True db_server = DATABASES.get_from_params(**config.get("db", {}), sync_epoch=sync_epoch) env = ENVIRONMENTS.get_from_params(**config["environment"]) algorithm_fn = ALGORITHMS.get(algorithm_name) algorithm = algorithm_fn.prepare_for_trainer(env_spec=env, config=config) if args.resume is not None: algorithm.load_checkpoint(filepath=args.resume) trainer = trainer_fn( algorithm=algorithm, env_spec=env, db_server=db_server, **config["trainer"], logdir=args.logdir, ) def on_exit(): for p in trainer.get_processes(): p.terminate() atexit.register(on_exit) trainer.run()