Пример #1
0
    def __init__(self, algorithm, obs_dim, action_dim):
        self._action_dim = action_dim
        self._obs_dim = obs_dim
        self._update_target_steps = 1000
        self._global_step = 0
        super(ElevatorAgent, self).__init__(algorithm)

        use_cuda = True if self.gpu_id >= 0 else False
        if self.gpu_id >= 0:
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1
        exec_strategy.num_iteration_per_drop_scope = 10
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = False

        self.learn_pe = fluid.ParallelExecutor(
            use_cuda=use_cuda,
            main_program=self._learn_program,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy,
        )
Пример #2
0
    def __init__(self, algorithm, obs_dim, action_dim):
        self._action_dim = action_dim
        self._obs_dim = obs_dim
        self._update_target_steps = 1000

        self._global_step = 0
        self.exploration_ratio = 0.9
        self.exploration_decre = 1e-7
        self.exploration_min = 0.1
        super(ElevatorAgent, self).__init__(algorithm)

        use_cuda = machine_info.is_gpu_available()
        if self.gpu_id >= 0:
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            os.environ['CPU_NUM'] = str(1)

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1
        exec_strategy.num_iteration_per_drop_scope = 10
        build_strategy = fluid.BuildStrategy()
        build_strategy.remove_unnecessary_lock = False

        self.learn_pe = fluid.ParallelExecutor(
            use_cuda=use_cuda,
            main_program=self.learn_program,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy,
        )
Пример #3
0
    def __init__(self,
                 algorithm,
                 agent_index=None,
                 obs_dim_n=None,
                 act_dim_n=None,
                 batch_size=None,
                 speedup=False):
        assert isinstance(agent_index, int)
        assert isinstance(obs_dim_n, list)
        assert isinstance(act_dim_n, list)
        assert isinstance(batch_size, int)
        assert isinstance(speedup, bool)
        self.agent_index = agent_index
        self.obs_dim_n = obs_dim_n
        self.act_dim_n = act_dim_n
        self.batch_size = batch_size
        self.speedup = speedup
        self.n = len(act_dim_n)

        self.memory_size = int(1e6)
        self.min_memory_size = batch_size * 25  # batch_size * args.max_episode_len
        self.rpm = ReplayMemory(
            max_size=self.memory_size,
            obs_dim=self.obs_dim_n[agent_index],
            act_dim=self.act_dim_n[agent_index])
        self.global_train_step = 0

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        super(MAAgent, self).__init__(algorithm)

        # Attention: In the beginning, sync target model totally.
        self.alg.sync_target(decay=0)
Пример #4
0
    def __init__(self, config):
        self.config = config
        self.sample_data_queue = queue.Queue(
            maxsize=config['sample_queue_max_size'])

        #=========== Create Agent ==========
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape

        act_dim = env.action_space.n

        model = AtariModel(act_dim)
        algorithm = parl.algorithms.IMPALA(
            model,
            sample_batch_steps=self.config['sample_batch_steps'],
            gamma=self.config['gamma'],
            vf_loss_coeff=self.config['vf_loss_coeff'],
            clip_rho_threshold=self.config['clip_rho_threshold'],
            clip_pg_rho_threshold=self.config['clip_pg_rho_threshold'])
        self.agent = AtariAgent(algorithm, obs_shape, act_dim,
                                self.learn_data_provider)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        self.cache_params = self.agent.get_weights()
        self.params_lock = threading.Lock()
        self.params_updated = False
        self.cache_params_sent_cnt = 0
        self.total_params_sync = 0

        #========== Learner ==========
        self.lr, self.entropy_coeff = None, None
        self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.kl_stat = WindowStat(100)
        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        self.learn_thread = threading.Thread(target=self.run_learn)
        self.learn_thread.setDaemon(True)
        self.learn_thread.start()

        #========== Remote Actor ===========
        self.remote_count = 0

        self.batch_buffer = []
        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.create_actors()
Пример #5
0
    def setUp(self):
        self.model = TestModel()
        self.target_model = TestModel()
        self.target_model2 = TestModel()
        self.target_model3 = TestModel()

        gpu_count = get_gpu_count()
        device = torch.device('cuda' if gpu_count else 'cpu')
Пример #6
0
 def setUp(self):
     gpu_count = get_gpu_count()
     if gpu_count > 0:
         place = fluid.CUDAPlace(0)
         self.gpu_id = 0
     else:
         place = fluid.CPUPlace()
         self.gpu_id = -1
     self.executor = fluid.Executor(place)
Пример #7
0
    def setUp(self):
        self.model = TestModel()
        self.target_model = deepcopy(self.model)
        self.target_model2 = deepcopy(self.model)

        gpu_count = get_gpu_count()
        if gpu_count > 0:
            place = fluid.CUDAPlace(0)
        else:
            place = fluid.CPUPlace()
        self.executor = fluid.Executor(place)
Пример #8
0
    def __init__(self, args):
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM)
        algorithm = parl.algorithms.DDPG(
            model,
            gamma=GAMMA,
            tau=TAU,
            actor_lr=ACTOR_LR,
            critic_lr=CRITIC_LR)
        self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM)

        self.evaluate_result = []

        self.lock = threading.Lock()
        self.model_lock = threading.Lock()
        self.model_queue = queue.Queue()

        self.best_shaping_reward = 0
        self.best_env_reward = 0

        if args.offline_evaluate:
            self.offline_evaluate()
        else:
            t = threading.Thread(target=self.online_evaluate)
            t.start()

        with self.lock:
            while True:
                model_path = self.model_queue.get()
                if not args.offline_evaluate:
                    # online evaluate
                    while not self.model_queue.empty():
                        model_path = self.model_queue.get()
                try:
                    self.agent.restore(model_path)
                    break
                except Exception as e:
                    logger.warn("Agent restore Exception: {} ".format(e))

            self.cur_model = model_path

        self.create_actors()
Пример #9
0
    def __init__(self, config):
        self.config = config

        # 这里创建游戏单纯是为了获取游戏动作的维度
        env = retro_util.RetroEnv(game=config['env_name'],
                                  use_restricted_actions=retro.Actions.DISCRETE,
                                  resize_shape=config['obs_shape'],
                                  render_preprocess=False)
        obs_dim = env.observation_space.shape
        action_dim = env.action_space.n
        self.config['action_dim'] = action_dim

        # 这里创建的模型是真正学习使用的
        model = Model(action_dim)
        algorithm = parl.algorithms.A3C(model, vf_loss_coeff=config['vf_loss_coeff'])
        self.agent = Agent(algorithm, config, obs_dim)

        # 只支持单个GPU
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        # 加载预训练模型
        if self.config['restore_model']:
            logger.info("加载预训练模型...")
            self.agent.restore(self.config['model_path'])

        # 记录训练的日志
        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.lr = None
        self.entropy_coeff = None

        self.best_loss = None

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        # ========== Remote Actor ===========
        self.remote_count = 0
        self.sample_data_queue = queue.Queue()

        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.params_queues = []
        self.create_actors()
Пример #10
0
    def __init__(self, args):
        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        model = OpenSimModel(OBS_DIM, VEL_DIM, ACT_DIM)
        algorithm = parl.algorithms.DDPG(
            model,
            gamma=GAMMA,
            tau=TAU,
            actor_lr=ACTOR_LR,
            critic_lr=CRITIC_LR)
        self.agent = OpenSimAgent(algorithm, OBS_DIM, ACT_DIM)

        self.rpm = ReplayMemory(args.rpm_size, OBS_DIM, ACT_DIM)

        if args.restore_rpm_path is not None:
            self.rpm.load(args.restore_rpm_path)
        if args.restore_model_path is not None:
            self.restore(args.restore_model_path)

        # add lock between training and predicting
        self.model_lock = threading.Lock()

        # add lock when appending data to rpm or writing scalars to summary
        self.memory_lock = threading.Lock()

        self.ready_actor_queue = queue.Queue()

        self.total_steps = 0
        self.noiselevel = 0.5

        self.critic_loss_stat = WindowStat(500)
        self.env_reward_stat = WindowStat(500)
        self.shaping_reward_stat = WindowStat(500)
        self.max_env_reward = 0

        # thread to keep training
        learn_thread = threading.Thread(target=self.keep_training)
        learn_thread.setDaemon(True)
        learn_thread.start()

        self.create_actors()
Пример #11
0
    def __init__(self, config, cuda):
        self.cuda = cuda

        self.config = config
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape
        act_dim = env.action_space.n
        self.config['obs_shape'] = obs_shape
        self.config['act_dim'] = act_dim

        model = ActorCritic(act_dim)
        if self.cuda:
            model = model.cuda()

        algorithm = A2C(model, config)
        self.agent = Agent(algorithm, config)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            os.environ['CPU_NUM'] = str(1)

        #========== Learner ==========
        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)
        self.lr = None
        self.entropy_coeff = None

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        #========== Remote Actor ===========
        self.remote_count = 0
        self.sample_total_steps = 0
        self.sample_data_queue = queue.Queue()
        self.remote_metrics_queue = queue.Queue()
        self.params_queues = []

        self.create_actors()
Пример #12
0
    def __init__(self, config):
        self.config = config

        self.sample_data_queue = queue.Queue()
        self.batch_buffer = defaultdict(list)

        #=========== Create Agent ==========
        env = gym.make(config['env_name'])
        env = wrap_deepmind(env, dim=config['env_dim'], obs_format='NCHW')
        obs_shape = env.observation_space.shape
        act_dim = env.action_space.n

        self.config['obs_shape'] = obs_shape
        self.config['act_dim'] = act_dim

        model = AtariModel(act_dim)
        algorithm = parl.algorithms.A3C(model,
                                        vf_loss_coeff=config['vf_loss_coeff'])
        self.agent = AtariAgent(
            algorithm,
            obs_shape=self.config['obs_shape'],
            predict_thread_num=self.config['predict_thread_num'],
            learn_data_provider=self.learn_data_provider)

        if machine_info.is_gpu_available():
            assert get_gpu_count() == 1, 'Only support training in single GPU,\
                    Please set environment variable: `export CUDA_VISIBLE_DEVICES=[GPU_ID_YOU_WANT_TO_USE]` .'

        else:
            cpu_num = os.environ.get('CPU_NUM')
            assert cpu_num is not None and cpu_num == '1', 'Only support training in single CPU,\
                    Please set environment variable:  `export CPU_NUM=1`.'

        #========== Learner ==========
        self.lr, self.entropy_coeff = None, None
        self.lr_scheduler = PiecewiseScheduler(config['lr_scheduler'])
        self.entropy_coeff_scheduler = PiecewiseScheduler(
            config['entropy_coeff_scheduler'])

        self.total_loss_stat = WindowStat(100)
        self.pi_loss_stat = WindowStat(100)
        self.vf_loss_stat = WindowStat(100)
        self.entropy_stat = WindowStat(100)

        self.learn_time_stat = TimeStat(100)
        self.start_time = None

        # learn thread
        self.learn_thread = threading.Thread(target=self.run_learn)
        self.learn_thread.setDaemon(True)
        self.learn_thread.start()

        self.predict_input_queue = queue.Queue()

        # predict thread
        self.predict_threads = []
        for i in six.moves.range(self.config['predict_thread_num']):
            predict_thread = threading.Thread(target=self.run_predict,
                                              args=(i, ))
            predict_thread.setDaemon(True)
            predict_thread.start()
            self.predict_threads.append(predict_thread)

        #========== Remote Simulator ===========
        self.remote_count = 0

        self.remote_metrics_queue = queue.Queue()
        self.sample_total_steps = 0

        self.create_actors()
Пример #13
0
from dqn import DQN  # slight changes from parl.algorithms.DQN
from atari_agent import AtariAgent
from atari_model import AtariModel
from replay_memory import ReplayMemory, Experience
from utils import get_player

MEMORY_SIZE = int(1e6)
MEMORY_WARMUP_SIZE = MEMORY_SIZE // 20
IMAGE_SIZE = (84, 84)
CONTEXT_LEN = 4
FRAME_SKIP = 4
UPDATE_FREQ = 4
GAMMA = 0.99
LEARNING_RATE = 3e-4

gpu_num = get_gpu_count()


def run_train_step(agent, rpm):
    for step in range(args.train_total_steps):
        # use the first 80% data to train
        batch_all_obs, batch_action, batch_reward, batch_isOver = rpm.sample_batch(
            args.batch_size * gpu_num)
        batch_obs = batch_all_obs[:, :CONTEXT_LEN, :, :]
        batch_next_obs = batch_all_obs[:, 1:, :, :]
        cost = agent.learn(batch_obs, batch_action, batch_reward,
                           batch_next_obs, batch_isOver)

        if step % 100 == 0:
            # use the last 20% data to evaluate
            batch_all_obs, batch_action, batch_reward, batch_isOver = rpm.sample_test_batch(