def __init__(self, algorithm, obs_dim, action_dim): self._action_dim = action_dim self._obs_dim = obs_dim self._update_target_steps = 1000 self._global_step = 0 super(ElevatorAgent, self).__init__(algorithm) use_cuda = True if self.gpu_id >= 0 else False if self.gpu_id >= 0: assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 exec_strategy.num_iteration_per_drop_scope = 10 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False self.learn_pe = fluid.ParallelExecutor( use_cuda=use_cuda, main_program=self._learn_program, build_strategy=build_strategy, exec_strategy=exec_strategy, )
def __init__(self, algorithm, obs_dim, action_dim): self._action_dim = action_dim self._obs_dim = obs_dim self._update_target_steps = 1000 self._global_step = 0 self.exploration_ratio = 0.9 self.exploration_decre = 1e-7 self.exploration_min = 0.1 super(ElevatorAgent, self).__init__(algorithm) use_cuda = machine_info.is_gpu_available() if self.gpu_id >= 0: assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: os.environ['CPU_NUM'] = str(1) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 exec_strategy.num_iteration_per_drop_scope = 10 build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False self.learn_pe = fluid.ParallelExecutor( use_cuda=use_cuda, main_program=self.learn_program, build_strategy=build_strategy, exec_strategy=exec_strategy, )
def __init__(self, algorithm, agent_index=None, obs_dim_n=None, act_dim_n=None, batch_size=None, speedup=False): assert isinstance(agent_index, int) assert isinstance(obs_dim_n, list) assert isinstance(act_dim_n, list) assert isinstance(batch_size, int) assert isinstance(speedup, bool) self.agent_index = agent_index self.obs_dim_n = obs_dim_n self.act_dim_n = act_dim_n self.batch_size = batch_size self.speedup = speedup self.n = len(act_dim_n) self.memory_size = int(1e6) self.min_memory_size = batch_size * 25 # batch_size * args.max_episode_len self.rpm = ReplayMemory( max_size=self.memory_size, obs_dim=self.obs_dim_n[agent_index], act_dim=self.act_dim_n[agent_index]) self.global_train_step = 0 if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' super(MAAgent, self).__init__(algorithm) # Attention: In the beginning, sync target model totally. self.alg.sync_target(decay=0)
def __init__(self, config): self.config = config self.sample_data_queue = queue.Queue( maxsize=config['sample_queue_max_size']) #=========== Create Agent ========== env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n model = AtariModel(act_dim) algorithm = parl.algorithms.IMPALA( model, sample_batch_steps=self.config['sample_batch_steps'], gamma=self.config['gamma'], vf_loss_coeff=self.config['vf_loss_coeff'], clip_rho_threshold=self.config['clip_rho_threshold'], clip_pg_rho_threshold=self.config['clip_pg_rho_threshold']) self.agent = AtariAgent(algorithm, obs_shape, act_dim, self.learn_data_provider) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' self.cache_params = self.agent.get_weights() self.params_lock = threading.Lock() self.params_updated = False self.cache_params_sent_cnt = 0 self.total_params_sync = 0 #========== Learner ========== self.lr, self.entropy_coeff = None, None self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler']) self.entropy_coeff_scheduler = PiecewiseScheduler( config['entropy_coeff_scheduler']) self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.kl_stat = WindowStat(100) self.learn_time_stat = TimeStat(100) self.start_time = None self.learn_thread = threading.Thread(target=self.run_learn) self.learn_thread.setDaemon(True) self.learn_thread.start() #========== Remote Actor =========== self.remote_count = 0 self.batch_buffer = [] self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.create_actors()
def setUp(self): self.model = TestModel() self.target_model = TestModel() self.target_model2 = TestModel() self.target_model3 = TestModel() gpu_count = get_gpu_count() device = torch.device('cuda' if gpu_count else 'cpu')
def setUp(self): gpu_count = get_gpu_count() if gpu_count > 0: place = fluid.CUDAPlace(0) self.gpu_id = 0 else: place = fluid.CPUPlace() self.gpu_id = -1 self.executor = fluid.Executor(place)
def setUp(self): self.model = TestModel() self.target_model = deepcopy(self.model) self.target_model2 = deepcopy(self.model) gpu_count = get_gpu_count() if gpu_count > 0: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() self.executor = fluid.Executor(place)
def __init__(self, args): if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM) algorithm = parl.algorithms.DDPG( model, gamma=GAMMA, tau=TAU, actor_lr=ACTOR_LR, critic_lr=CRITIC_LR) self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM) self.evaluate_result = [] self.lock = threading.Lock() self.model_lock = threading.Lock() self.model_queue = queue.Queue() self.best_shaping_reward = 0 self.best_env_reward = 0 if args.offline_evaluate: self.offline_evaluate() else: t = threading.Thread(target=self.online_evaluate) t.start() with self.lock: while True: model_path = self.model_queue.get() if not args.offline_evaluate: # online evaluate while not self.model_queue.empty(): model_path = self.model_queue.get() try: self.agent.restore(model_path) break except Exception as e: logger.warn("Agent restore Exception: {} ".format(e)) self.cur_model = model_path self.create_actors()
def __init__(self, config): self.config = config # 这里创建游戏单纯是为了获取游戏动作的维度 env = retro_util.RetroEnv(game=config['env_name'], use_restricted_actions=retro.Actions.DISCRETE, resize_shape=config['obs_shape'], render_preprocess=False) obs_dim = env.observation_space.shape action_dim = env.action_space.n self.config['action_dim'] = action_dim # 这里创建的模型是真正学习使用的 model = Model(action_dim) algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff']) self.agent = Agent(algorithm, config, obs_dim) # 只支持单个GPU if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' # 加载预训练模型 if self.config['restore_model']: logger.info("加载预训练模型...") self.agent.restore(self.config['model_path']) # 记录训练的日志 self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.lr = None self.entropy_coeff = None self.best_loss = None self.learn_time_stat = TimeStat(100) self.start_time = None # ========== Remote Actor =========== self.remote_count = 0 self.sample_data_queue = queue.Queue() self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.params_queues = [] self.create_actors()
def __init__(self, args): if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM) algorithm = parl.algorithms.DDPG( model, gamma=GAMMA, tau=TAU, actor_lr=ACTOR_LR, critic_lr=CRITIC_LR) self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM) self.rpm = ReplayMemory(args.rpm_size, OBS_DIM, ACT_DIM) if args.restore_rpm_path is not None: self.rpm.load(args.restore_rpm_path) if args.restore_model_path is not None: self.restore(args.restore_model_path) # add lock between training and predicting self.model_lock = threading.Lock() # add lock when appending data to rpm or writing scalars to summary self.memory_lock = threading.Lock() self.ready_actor_queue = queue.Queue() self.total_steps = 0 self.noiselevel = 0.5 self.critic_loss_stat = WindowStat(500) self.env_reward_stat = WindowStat(500) self.shaping_reward_stat = WindowStat(500) self.max_env_reward = 0 # thread to keep training learn_thread = threading.Thread(target=self.keep_training) learn_thread.setDaemon(True) learn_thread.start() self.create_actors()
def __init__(self, config, cuda): self.cuda = cuda self.config = config env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n self.config['obs_shape'] = obs_shape self.config['act_dim'] = act_dim model = ActorCritic(act_dim) if self.cuda: model = model.cuda() algorithm = A2C(model, config) self.agent = Agent(algorithm, config) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: os.environ['CPU_NUM'] = str(1) #========== Learner ========== self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.lr = None self.entropy_coeff = None self.learn_time_stat = TimeStat(100) self.start_time = None #========== Remote Actor =========== self.remote_count = 0 self.sample_total_steps = 0 self.sample_data_queue = queue.Queue() self.remote_metrics_queue = queue.Queue() self.params_queues = [] self.create_actors()
def __init__(self, config): self.config = config self.sample_data_queue = queue.Queue() self.batch_buffer = defaultdict(list) #=========== Create Agent ========== env = gym.make(config['env_name']) env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW') obs_shape = env.observation_space.shape act_dim = env.action_space.n self.config['obs_shape'] = obs_shape self.config['act_dim'] = act_dim model = AtariModel(act_dim) algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff']) self.agent = AtariAgent( algorithm, obs_shape=self.config['obs_shape'], predict_thread_num=self.config['predict_thread_num'], learn_data_provider=self.learn_data_provider) if machine_info.is_gpu_available(): assert get_gpu_count() == 1, 'Only support training in single GPU,\ Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .' else: cpu_num = os.environ.get('CPU_NUM') assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\ Please set environment variable: `export CPU_NUM=1`.' #========== Learner ========== self.lr, self.entropy_coeff = None, None self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler']) self.entropy_coeff_scheduler = PiecewiseScheduler( config['entropy_coeff_scheduler']) self.total_loss_stat = WindowStat(100) self.pi_loss_stat = WindowStat(100) self.vf_loss_stat = WindowStat(100) self.entropy_stat = WindowStat(100) self.learn_time_stat = TimeStat(100) self.start_time = None # learn thread self.learn_thread = threading.Thread(target=self.run_learn) self.learn_thread.setDaemon(True) self.learn_thread.start() self.predict_input_queue = queue.Queue() # predict thread self.predict_threads = [] for i in six.moves.range(self.config['predict_thread_num']): predict_thread = threading.Thread(target=self.run_predict, args=(i, )) predict_thread.setDaemon(True) predict_thread.start() self.predict_threads.append(predict_thread) #========== Remote Simulator =========== self.remote_count = 0 self.remote_metrics_queue = queue.Queue() self.sample_total_steps = 0 self.create_actors()
from dqn import DQN # slight changes from parl.algorithms.DQN from atari_agent import AtariAgent from atari_model import AtariModel from replay_memory import ReplayMemory, Experience from utils import get_player MEMORY_SIZE = int(1e6) MEMORY_WARMUP_SIZE = MEMORY_SIZE // 20 IMAGE_SIZE = (84, 84) CONTEXT_LEN = 4 FRAME_SKIP = 4 UPDATE_FREQ = 4 GAMMA = 0.99 LEARNING_RATE = 3e-4 gpu_num = get_gpu_count() def run_train_step(agent, rpm): for step in range(args.train_total_steps): # use the first 80% data to train batch_all_obs, batch_action, batch_reward, batch_isOver = rpm.sample_batch( args.batch_size * gpu_num) batch_obs = batch_all_obs[:, :CONTEXT_LEN, :, :] batch_next_obs = batch_all_obs[:, 1:, :, :] cost = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_isOver) if step % 100 == 0: # use the last 20% data to evaluate batch_all_obs, batch_action, batch_reward, batch_isOver = rpm.sample_test_batch(