def __init__(self, env, worker_id, gamma, env_render, logger, verbose): self.env = env self.worker_id = worker_id # discount rate self.gamma = gamma self.trajectory = [] # learning rate self.learning_rate = LEARNING_RATE self.env_render = env_render self.logger = logger self.verbose = verbose self.policy_model = rl_utils.get_rl_model(self.env).to(device) self.target_model = rl_utils.get_rl_model(self.env).to(device) self.target_model.load_state_dict(self.policy_model.state_dict()) self.target_model.eval() self.optimizer = rl_utils.get_optimizer( parameters=self.policy_model.parameters(), learning_rate=self.learning_rate) self.memory = ReplayMemory(10000) self.steps_done = 0 self.model = self.policy_model
def __init__(self, env, worker_id, gamma, env_render, logger, verbose): self.env = env self.worker_id = worker_id # discount rate self.gamma = gamma self.trajectory = [] # learning rate self.learning_rate = LEARNING_RATE self.env_render = env_render self.logger = logger self.verbose = verbose self.model = rl_utils.get_rl_model(self.env, self.worker_id) self.optimizer = rl_utils.get_optimizer( parameters=self.model.parameters(), learning_rate=self.learning_rate )
os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES_NUMBER_LIST if __name__ == "__main__": torch.manual_seed(SEED) if torch.cuda.is_available(): torch.cuda.manual_seed(SEED) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True utils.make_output_folders() utils.ask_file_removal() env = rl_utils.get_environment() rl_model = rl_utils.get_rl_model(env, -1) utils.print_configuration(env, rl_model) try: chief = Process(target=utils.run_chief, args=()) chief.start() time.sleep(1.5) workers = [] for worker_id in range(NUM_WORKERS): worker = Process(target=utils.run_worker, args=(worker_id, )) workers.append(worker) worker.start()