def run(args): device = torch.device("cpu") env = gym.make('SpaceInvaders-v0') state_size = env.observation_space.shape action_size = env.action_space.n model = ActorCritic([1, 4, 84, 84], action_size).to(device) opt = SharedRMSprop(model.parameters(), lr=args.lr, alpha=args.alpha, eps=1e-8, weight_decay=args.weight_decay, momentum=args.momentum, centered=False) opt_lock = mp.Lock() scheduler = LRScheduler(args) if args.load_fp: checkpoint = torch.load(args.load_fp) model.load_state_dict(checkpoint['model_state_dict']) opt.load_state_dict(checkpoint['optimizer_state_dict']) if args.train: start = time.time() model.share_memory() model.train() step_counter, max_reward, ma_reward, ma_loss = [ mp.Value('d', 0.0) for _ in range(4) ] processes = [] if args.num_procs == -1: args.num_procs = mp.cpu_count() for rank in range(args.num_procs): p = mp.Process(target=train, args=(rank, args, device, model, opt, opt_lock, scheduler, step_counter, max_reward, ma_reward, ma_loss)) p.start() processes.append(p) for p in processes: p.join() if args.verbose > 0: print(f"Seconds taken: {time.time() - start}") if args.save_fp: torch.save( { 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': opt.state_dict(), }, args.save_fp) if args.test: model.eval() test(args, device, model)
def main(scripts, args): scripts = " ".join(sys.argv[0:]) args = parser.parse_args() args.scripts = scripts torch.manual_seed(args.seed) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn') if (args.deploy): raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test, raw_test_upsize, gt_lbl_test_upsize = setup_data( args) else: raw, gt_lbl, raw_valid, gt_lbl_valid, raw_test, gt_lbl_test = setup_data( args) env_conf = setup_env_conf(args) shared_model = get_model(args, args.model, env_conf["observation_shape"], args.features, atrous_rates=args.atr_rate, num_actions=2, split=args.data_channel, multi=args.multi) manager = mp.Manager() shared_dict = manager.dict() if args.wctrl == "s2m": shared_dict["spl_w"] = args.spl_w shared_dict["mer_w"] = args.mer_w if args.load: saved_state = torch.load(args.load, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None processes = [] if not args.no_test: if raw_test is not None: if (args.deploy): p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid], (raw_test, gt_lbl_test, raw_test_upsize, gt_lbl_test_upsize, shared_dict))) else: p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid ], (raw_test, gt_lbl_test), shared_dict)) else: p = mp.Process(target=test_func, args=(args, shared_model, env_conf, [raw_valid, gt_lbl_valid], None, shared_dict)) p.start() processes.append(p) time.sleep(0.1) for rank in range(0, args.workers): p = mp.Process(target=train_func, args=(rank, args, shared_model, optimizer, env_conf, [raw, gt_lbl], shared_dict)) p.start() processes.append(p) time.sleep(0.1) for p in processes: time.sleep(0.1) p.join()
env_conf["num_action"], args.hidden_feat) else: shared_model = A3Clstm_continuous(env_conf["observation_shape"], env_conf["num_action"], args.hidden_feat) if args.load: saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None processes = [] if "EM_env" in args.env: p = mp.Process(target=test, args=(args, shared_model, env_conf, [raw, lbl, prob, gt_lbl], True)) else: p = mp.Process(target=test, args=(args, shared_model, env_conf))
map_location=lambda storage, loc: storage) for k, v in model_state.items(): if 'pose_actor' in k: model_state[k] = pose_saved_state[k] if 'pose_BiRNN' in k: key = k.replace('pose_BiRNN', 'global_net.pose_BiRNN') model_state[k] = pose_saved_state[key] shared_model.load_state_dict(model_state) params = shared_model.parameters() shared_model.share_memory() if args.shared_optimizer: print('share memory') if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(params, lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(params, lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None current_time = datetime.now().strftime('%b%d_%H-%M') args.log_dir = os.path.join(args.log_dir, args.env, current_time) env.close() processes = [] manager = mp.Manager() train_modes = manager.list() n_iters = manager.list()
if i in args.env: env_conf = setup_json[i] # env = atari_env(args.env, env_conf, args) env = OC_env(args.env) shared_model = OCPGModel(env.observation_space.shape[0], env.action_space, args.options, args.width) if args.load: saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) if args.load: saved_state = torch.load('{0}{1}1.torch'.format( args.load_model_dir, args.env), map_location=lambda storage, loc: storage) print("load state dict") optimizer.load_state_dict(saved_state) print("loaded optimizer") optimizer.share_memory() else: optimizer = None
] if args.load: saved_state = torch.load('{0}{1}_early.dat'.format( args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_models[0].load_state_dict(saved_state) saved_state = torch.load('{0}{1}_late.dat'.format( args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_models[1].load_state_dict(saved_state) shared_models[0].share_memory() shared_models[1].share_memory() if args.optimizer == 'RMSprop': optimizers = [ SharedRMSprop(shared_models[0].parameters(), lr=args.lr), SharedRMSprop(shared_models[1].parameters(), lr=args.lr) ] if args.optimizer == 'Adam': optimizers = [ SharedAdam(shared_models[0].parameters(), lr=args.lr, amsgrad=args.amsgrad), SharedAdam(shared_models[1].parameters(), lr=args.lr, amsgrad=args.amsgrad) ] optimizers[0].share_memory() optimizers[1].share_memory() processes = []
def run(self): torch.manual_seed(args.seed) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn') # env = make_env(env_type=args.env_type, env_name=args.env_name, args=args) shared_model = UNREAL(in_channels=3, action_size=6, enable_pixel_control=True) if args.load: saved_state = torch.load( '{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() lr = log_uniform(1e-4, 5e-3, 0.5) if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=lr, eps=0.1) if args.optimizer == 'Adam': optimizer = SharedAdam( shared_model.parameters(), lr=lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None # p = mp.Process(target=train, args=(args, shared_model, env_conf)) # p.start() # processes.append(p) # time.sleep(0.1) self.stop_requested = False self.terminate_reqested = False for rank in range(0, args.workers): trainer = Trainer(rank, args, shared_model=shared_model, optimizer=optimizer, lr=lr) self.trainers.append(trainer) # time.sleep(0.1) # 设置运行起始的时间 # set start time self.start_time = time.time() - 0 # wall_t processes = [] for rank in range(0, args.workers): if rank == 0: p = mp.Process(target=self.train_function, args=(rank, True, True)) else: p = mp.Process(target=self.train_function, args=(rank, True)) p.start() processes.append(p) # 注册终止信号 signal.signal(signal.SIGINT, self.signal_handler) print('Press Ctrl+C to stop') for rank in range(0, args.workers): time.sleep(0.01) processes[rank].join()
def start(): args = parser.parse_args() args.shared_optimizer = True if args.gpu_ids == -1: torch.manual_seed(args.seed) args.gpu_ids = [-1] device_share = torch.device('cpu') mp.set_start_method('spawn') else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn', force=True) if len(args.gpu_ids) > 1: device_share = torch.device('cpu') else: device_share = torch.device('cuda:' + str(args.gpu_ids[-1])) env = create_env(args.env, args) shared_model = build_model(env.observation_space, env.action_space, args, device_share).to(device_share) shared_model.share_memory() env.close() del env if args.load_coordinator_dir is not None: saved_state = torch.load(args.load_coordinator_dir, map_location=lambda storage, loc: storage) if args.load_coordinator_dir[-3:] == 'pth': shared_model.load_state_dict(saved_state['model'], strict=False) else: shared_model.load_state_dict(saved_state) params = shared_model.parameters() if args.shared_optimizer: print('share memory') if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(params, lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(params, lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None current_time = datetime.now().strftime('%b%d_%H-%M') args.log_dir = os.path.join(args.log_dir, args.env, current_time) processes = [] manager = mp.Manager() train_modes = manager.list() n_iters = manager.list() p = mp.Process(target=test, args=(args, shared_model, optimizer, train_modes, n_iters)) p.start() processes.append(p) time.sleep(args.sleep_time) for rank in range(0, args.workers): p = mp.Process(target=train, args=(rank, args, shared_model, optimizer, train_modes, n_iters)) p.start() processes.append(p) time.sleep(args.sleep_time) for p in processes: time.sleep(args.sleep_time) p.join()
self.tau = 1.0 if __name__ == "__main__": params = Params() mp.set_start_method('spawn') count = mp.Value('i', 0) # update count lock = mp.Lock() # shared_model = A3C() shared_model = A3C_LSTM() shared_model = shared_model.share_memory() # shared_optimizer = SharedAdam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, # weight_decay=params.weight_decay) shared_optimizer = SharedRMSprop(shared_model.parameters(), lr=params.lr) shared_optimizer.share_memory() # run_loop(0, params, shared_model, shared_optimizer, count, lock) # for debugging # test(0, params, shared_model, count, lock) processes = [] # have to add test module p = mp.Process(target=test, args=( 0, params, shared_model, count, lock, ))