T=args.T) return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params) nIter = 5000 elapsed_wait_only = 0 import pickle import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = args_loader(parser, [loader]) def actor(sel, sel_gpu, reply): ''' import pdb pdb.set_trace() pickle.dump(utils_elf.to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) ''' reply[0]["a"][:] = 0 GC = loader.initialize() GC.reg_callback("actor", actor) before = datetime.now() GC.Start()
params["hist_len"] = args.hist_len params["T"] = args.T return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params) cmd_line = "--num_games 16 --batchsize 4 --hist_len 1 --frame_skip 4 --actor_only" nIter = 5000 elapsed_wait_only = 0 if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = args_loader(parser, [loader], cmd_line=cmd_line.split(" ")) GC = loader.initialize() def actor(sel, sel_gpu, reply): # pickle.dump(to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) reply[0]["a"][:] = 0 GC.reg_callback("actor", actor) reward_dist = Counter() before = datetime.now() GC.Start() import tqdm
method = method_class() args_providers = [sampler, trainer, game, runner, model_loader, method] eval_only = os.environ.get("eval_only", False) has_eval_process = os.environ.get("eval_process", False) if has_eval_process or eval_only: eval_process = EvaluationProcess() evaluator = Eval() args_providers.append(eval_process) args_providers.append(evaluator) else: eval_process = None all_args = args_loader(parser, args_providers) GC = game.initialize() GC.setup_gpu(0) all_args.method_class = method_class model = model_loader.load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=True) method.set_model_interface(mi) trainer.setup(sampler=sampler, mi=mi, rl_method=method) if use_multi_process: GC.reg_callback("actor", trainer.actor)