Exemplo n.º 1
0
    def _save_model_checkpoint(self, checkpoint_root, checkpoint_name):
        checkpoint_dir = os.path.join(checkpoint_root, checkpoint_name)
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

        logger.log(now() + 'Pulling updatetime')
        updatetime_dict = self._model_pool_apis.pull_all_attr('updatetime')
        logger.log(
            now() +
            'Done pulling updatetime, no.={}'.format(len(updatetime_dict)))

        filenames = []
        for model_key, updatetime in updatetime_dict.items():
            filename = "%s_%s.model" % (model_key, updatetime)
            filepath = os.path.join(checkpoint_root, filename)
            filenames.append(filename + '\n')
            if not os.path.isfile(filepath):
                logger.log(now() + 'Pulling model {}'.format(model_key))
                model = self._model_pool_apis.pull_model(model_key)
                logger.log(now() + 'Done pulling model {}'.format(model_key))
                assert model_key == model.key
                with open(filepath, 'wb') as f:
                    pickle.dump(model, f)
                    if self._save_learner_meta:
                        learner_meta = self._model_pool_apis.pull_learner_meta(
                            model_key)
                        pickle.dump(learner_meta, f)
                    logger.log(now() + 'Saved model to {}'.format(f.name))
        filelistpath = os.path.join(checkpoint_dir, 'filename.list')
        with open(filelistpath, 'w') as f:
            f.writelines(filenames)
        with open(os.path.join(checkpoint_dir, '.ready'), 'w') as f:
            f.write('ready')
            f.flush()
Exemplo n.º 2
0
 def get_pseudo_id(self, learner_id):
   if learner_id not in self.id_map:
     self.cur_pseudo_id = (self.cur_pseudo_id + 1) % self.pseudo_learner_num
     logger.log("learner {} begins training with pseude learner_id"
                " {}".format(learner_id, self.cur_pseudo_id))
     self.id_map[learner_id] = self.cur_pseudo_id
   return self.id_map[learner_id]
Exemplo n.º 3
0
 def _push_data(self, data_queue):
     """ push trajectory for the learning agent (id 0). Invoked in a thread """
     logger.log('entering _push_data_to_learner',
                'steps: {}'.format(self._steps),
                level=logger.DEBUG + 5)
     me_id = self._learning_agent_id  # short name
     last_obs, actions, reward, info, done, other_vars = data_queue.get()
     value, state, neglogpac = other_vars
     while True:
         data_model_id = self.task.model_key1
         mb_obs, mb_rewards, mb_actions, mb_values, mb_dones, mb_neglogpacs = (
             [], [], [], [], [], [])
         mb_states = []
         for _ in range(self._unroll_length):
             mb_obs.append(
                 transform_tuple(last_obs[me_id], lambda x: x.copy()))
             mb_actions.append(actions[me_id])
             mb_rewards.append(reward)
             mb_dones.append(done)
             mb_values.append(value)
             mb_neglogpacs.append(neglogpac)
             mb_states.append(state)
             last_obs, actions, reward, info, done, other_vars = data_queue.get(
             )
             value, state, neglogpac = other_vars
         if (isinstance(last_obs[me_id], tuple)
                 or isinstance(last_obs[me_id], list)):
             mb_obs = tuple(
                 np.asarray(obs, dtype=obs[me_id].dtype)
                 for obs in zip(*mb_obs))
         else:
             mb_obs = np.asarray(mb_obs, dtype=last_obs[me_id].dtype)
         mb_rewards = np.asarray(mb_rewards, dtype=np.float32)
         if isinstance(actions[me_id], list) or isinstance(
                 actions[me_id], tuple):
             # actions can a list (e.g., from a transformer network)
             mb_actions = tuple(
                 np.squeeze(np.asarray(a, dtype=np.float32))
                 for a in zip(*mb_actions))
         else:
             mb_actions = np.asarray(mb_actions)
         mb_values = np.asarray(mb_values, dtype=np.float32)
         mb_neglogpacs = np.asarray(mb_neglogpacs, dtype=np.float32)
         mb_dones = np.asarray(mb_dones, dtype=np.bool)
         mb_states = np.asarray(mb_states)
         mb_returns = np.zeros_like(mb_rewards)
         mb_advs = np.zeros_like(mb_rewards)
         last_gae_lam = 0
         for t in reversed(range(self._unroll_length)):
             next_values = (value if t == self._unroll_length -
                            1 else mb_values[t + 1])
             delta = (mb_rewards[t] + self._gamma * next_values *
                      (1 - mb_dones[t]) - mb_values[t])
             mb_advs[t] = last_gae_lam = (delta + self._gamma * self._lam *
                                          (1 - mb_dones[t]) * last_gae_lam)
         mb_returns = mb_advs + mb_values
         # All done, send them to remote
         self._remote.push_data(
             (data_model_id, mb_obs, mb_returns, mb_dones, mb_actions,
              mb_values, mb_neglogpacs, mb_states))
Exemplo n.º 4
0
 def ready_for_val(self):
     if not self._val_rm.ready_for_sample():
         logger.log(
             'val data queue not full ({}/{} unrolls, wait...)'.format(
                 len(self._val_rm), self._val_rm._minimal_unroll))
         return False
     else:
         return True
Exemplo n.º 5
0
 def ready_for_train(self):
     if not self._replay_mem.ready_for_sample():
         logger.log(
             'train data queue not full ({}/{} unrolls, wait...)'.format(
                 len(self._replay_mem), self._replay_mem._minimal_unroll))
         return False
     else:
         return True
Exemplo n.º 6
0
 def log_outcome(self, info):
   if 'outcome' not in info:
     me_outcome = -95678
     logger.log("info['outcome'] not available",
                'return an arbitrary value', me_outcome, level=logger.WARN)
   else:
     me_outcome = info['outcome'][self._learning_agent_id]
   return me_outcome
Exemplo n.º 7
0
 def _on_request_eval_actor_task(self, actor_id):
     logger.log("get_eval_actor_task: actor_id:{}".format(str(actor_id)),
                level=logger.DEBUG)
     if len(self.game_mgr.players) > 1:
         rp, cp = self.game_mgr.get_eval_match()
         return ActorTask(rp, cp, None)
     else:
         return LeagueMgrErroMsg("Actor task not ready.")
Exemplo n.º 8
0
 def _on_notify_learner_task_end(self, learner_id):
     pseudo_id = self.get_pseudo_id(learner_id)
     self.cur_pseudo_id = (self.cur_pseudo_id + 1) % self.pseudo_learner_num
     self.id_map[learner_id] = self.cur_pseudo_id
     logger.log("learner {} switches from pseudo learner_id {} to "
                "pseudo learner_id {}".format(learner_id, pseudo_id,
                                              self.id_map[learner_id]))
     return super(PARLeagueMgr, self)._on_notify_learner_task_end(pseudo_id)
Exemplo n.º 9
0
 def _request_task(self):
     """Request the task for this actor."""
     logger.log('entering _request_task',
                'steps: {}'.format(self._steps),
                level=logger.DEBUG + 5)
     task = self._league_mgr_apis.request_actor_task(
         self._actor_id, self._learner_id)
     logger.log('leaving _request_task', level=logger.DEBUG + 5)
     return task
Exemplo n.º 10
0
 def extract(self):
     try:
         for frame in self._extract():
             yield frame
     except Exception as e:
         logger.log("Extract replay[%s] player[%d] failed: %s" %
                    (self._replay_filepath, self._player_id, e),
                    level=logger.WARN)
         raise e
Exemplo n.º 11
0
 def run(self):
     self.replay_task = self._data_pool_apis.request_replay_task()
     while self.replay_task != "":
         game_version = self.replay_task.game_version or self._game_version
         self._adapt_system(game_version)
         if game_version != self._game_version:
             # need re-init replay converter
             self._game_version = game_version
             self.converter_config['game_version'] = game_version
             self._replay_converter = self.replay_converter_type(
                 **self.converter_config)
         game_core_config = ({} if 'game_core_config'
                             not in self.converter_config else
                             self.converter_config['game_core_config'])
         extractor = ReplayExtractor(
             replay_dir=self._replay_dir,
             replay_filename=self.replay_task.replay_name,
             player_id=self.replay_task.player_id,
             replay_converter=self._replay_converter,
             step_mul=self._step_mul,
             version=game_version,
             game_core_config=game_core_config,
             da_rate=self._da_rate,
             unk_mmr_dft_to=self._unk_mmr_dft_to)
         self._steps = 0
         first_frame = True
         if self._use_policy:
             self.agent.reset()
             self._update_agent_model()
         for frame in extractor.extract():
             if self._post_process_data:
                 obs, act = self._post_process_data(*frame[0])
             else:
                 obs, act = frame[0]
             if self._use_policy:
                 data = (obs, act, self.agent.state,
                         np.array(first_frame, np.bool))
                 self.agent.update_state(obs)
                 first_frame = False
             else:
                 data = (obs, act)
             data = self.ds.flatten(self.ds.structure(data))
             if self._data_queue.full():
                 logger.log("Actor's queue is full.", level=logger.WARN)
             self._data_queue.put((TensorZipper.compress(data), frame[1]))
             logger.log('successfully put one tuple.', level=logger.DEBUG)
             self._steps += 1
             if self._steps % self._log_interval == 0:
                 logger.log(
                     "%d frames of replay task [%s] sent to learner." %
                     (self._steps, self.replay_task))
             if self._use_policy and self._steps % self._update_model_freq == 0:
                 self._update_agent_model()
         logger.log("Replay task [%s] done. %d frames sent to learner." %
                    (self.replay_task, self._steps))
         self.replay_task = self._data_pool_apis.request_replay_task()
     logger.log("All tasks done.")
Exemplo n.º 12
0
 def _update_agent_model(self):
     if self.infserver_addr is not None:
         return
     logger.log('entering _update_agents_model',
                'steps: {}'.format(self._steps),
                level=logger.DEBUG + 5)
     if self._should_update_model(self.model, self.model_key):
         model = self._model_pool_apis.pull_model(self.model_key)
         self.agent.load_model(model.model)
         self.model = model
Exemplo n.º 13
0
 def _update_distill_agent_model(self):
   if self.distill_infserver_addr is not None:
     return
   logger.log('entering _update_distill agent_model', f'steps: {self._steps}',
              level=logger.DEBUG + 5)
   model_key = self.task.hyperparam.distill_model_key
   if self._should_update_model(self.distill_model, model_key):
     model3 = self._model_pool_apis.pull_model(model_key)
     self.distill_agent.load_model(model3.model)
     self.distill_model = model3
   logger.log('leaving _update_distill_agent_model', level=logger.DEBUG + 5)
Exemplo n.º 14
0
 def _finish_task(self, task, outcome, info=None):
     """Do stuff (e.g., send match result) when task finishes."""
     info = info or {}
     logger.log('entering _finish_task',
                'steps: {}'.format(self._steps),
                level=logger.DEBUG + 5)
     match_result = MatchResult(task.model_key1, task.model_key2, outcome,
                                info)
     self._league_mgr_apis.notify_actor_task_end(self._actor_id,
                                                 match_result)
     logger.log('leaving _finish_task', level=logger.DEBUG + 5)
Exemplo n.º 15
0
    def _on_request_actor_task(self, actor_id, learner_id):
        actor_task = super(LeagueMgr,
                           self)._on_request_actor_task(actor_id, learner_id)
        if not isinstance(actor_task, LeagueMgrErroMsg):
            assert isinstance(actor_task, ActorTask)
            self.game_mgr.start_match(actor_task.model_key1,
                                      actor_task.model_key2, actor_id)

        logger.log("_on_request_actor_task: %s" % str(actor_task),
                   level=logger.DEBUG)
        return actor_task
Exemplo n.º 16
0
 def _restore_checkpoint(self, checkpoint_dir):
   super(LeagueMgr, self)._restore_checkpoint(checkpoint_dir)
   logger.log('{}loading league-mgr from {}'.format(now(), checkpoint_dir))
   # 3.
   self.game_mgr.load(checkpoint_dir)
   # 2.
   self._hyper_mgr.load(checkpoint_dir)
   # 1.
   filepath = os.path.join(checkpoint_dir, 'learner_task_table')
   with open(filepath, 'rb') as f:
     self._learner_task_table = pickle.load(f)
   logger.log('{}done loading league-mgr'.format(now()))
Exemplo n.º 17
0
 def _print_infos(self):
     # filter the zstat infos, can not average
     filter = lambda d: dict([(_k, _v) for _k, _v in d.items() if _v is
                              not None and not isinstance(_v, str)])
     stat = Counter({})
     for info in self._infos:
         stat += Counter(filter(info))
     num = float(len(self._infos))
     for k, v in stat.items():
         stat[k] = float(int(
             v / num * 100)) / 100.0  # two significant digits
     logger.log(stat)
Exemplo n.º 18
0
 def _update_hyperparam(self, task):
   logger.log('entering _update_hyperparam', f'steps: {self._steps}',
              level=logger.DEBUG + 5)
   if self._enable_push:
     if hasattr(task.hyperparam, 'gamma'):
       self._gamma = task.hyperparam.gamma
     if hasattr(task.hyperparam, 'lam'):
       self._lam = task.hyperparam.lam
     if hasattr(task.hyperparam, 'reward_weights'):
       self._reward_weights = np.array(task.hyperparam.reward_weights,
                                       dtype=np.float32)
   logger.log('leaving _update_hyperparam', level=logger.DEBUG + 5)
Exemplo n.º 19
0
 def log_kvs(self, reward_sum, info):
   time_end = time.time()
   logger.logkvs({
     'producing_fps': self._steps / (time_end - self.time_beg),
     'reward_sum': reward_sum,
     'episode_steps': self._steps,
   })
   if self.should_log_info:  # log additional info fields
     if isinstance(info, dict):
       logger.logkvs(info)
     else:
       logger.log(info)
   logger.dumpkvs()
Exemplo n.º 20
0
  def _save_checkpoint(self, checkpoint_root, checkpoint_name):
    checkpoint_dir = os.path.join(checkpoint_root, checkpoint_name)
    logger.log('{}saving league-mgr to {}'.format(now(), checkpoint_dir))

    super(LeagueMgr, self)._save_checkpoint(checkpoint_root, checkpoint_name)
    # 1.
    filepath = os.path.join(checkpoint_dir, 'learner_task_table')
    with open(filepath, 'wb') as f:
      pickle.dump(self._learner_task_table, f)
    # 2.
    self._hyper_mgr.save(checkpoint_dir)
    # 3.
    self.game_mgr.save(checkpoint_dir)
    logger.log('{}done saving league-mgr'.format(now()))
Exemplo n.º 21
0
 def _on_request_train_actor_task(self, actor_id, learner_id):
     logger.log(
         "_on_request_train_actor_task: actor_id:{}, learner_id:{}".format(
             actor_id, learner_id),
         level=logger.DEBUG)
     if (learner_id in self._learner_task_table and
             self._learner_task_table[learner_id].model_key is not None):
         hyperparam = self._learner_task_table[learner_id].hyperparam
         model_key = self._learner_task_table[learner_id].model_key
         oppo_model_key = self.game_mgr.get_opponent(model_key, hyperparam)
         if isinstance(oppo_model_key, LeagueMgrErroMsg):
             logger.log(f'get_opponent not ready: {oppo_model_key}',
                        level=logger.WARN)
             return oppo_model_key
         return ActorTask(model_key, oppo_model_key, hyperparam)
     else:
         if learner_id not in self._learner_task_table:
             logger.log(
                 "learner_id({}) hasn't request_learner_task.".format(
                     learner_id),
                 level=logger.WARN)
         elif self._learner_task_table[learner_id].model_key is None:
             logger.log(
                 "learner_id({}) hasn't notify_learner_task_begin.".format(
                     learner_id),
                 level=logger.WARN)
         return LeagueMgrErroMsg("Actor task not ready.")
Exemplo n.º 22
0
 def _push_data(self, data_queue):
     """ push trajectory for the learning agent (id 0). Invoked in a thread """
     while data_queue.empty():
         time.sleep(5)
     logger.log('entering _push_data_to_learner',
                'steps: {}'.format(self._steps),
                level=logger.DEBUG + 5)
     while True:
         task = self.replay_task
         frames = []
         weights = []
         for _ in range(self._unroll_length):
             frame, weight = data_queue.get()
             frames.append(frame)
             weights.append(weight)
         self._data_pool_apis.push_data((task, frames, weights))
Exemplo n.º 23
0
 def pull_keys(self):
     self._req_lock.acquire()
     while True:
         try:
             self._req_socket.send_string("keys")
             keys = self._req_socket.recv_pyobj()
             if not isinstance(keys, ModelPoolErroMsg):
                 break
             else:
                 logger.log(keys.msg)  # keys isinstance ModelPoolErroMsg
             time.sleep(2)
         except Exception as e:
             print("ModelPoolAPIs crushed on pull_keys,"
                   " the exception:\n{}".format(e))
     self._req_lock.release()
     return keys
Exemplo n.º 24
0
 def pull_all_attr(self, attr):
     self._req_lock.acquire()
     while True:
         try:
             self._req_socket.send_string('all_attr', zmq.SNDMORE)
             self._req_socket.send_string(attr)
             attrs = self._req_socket.recv_pyobj()
             if not isinstance(attrs, ModelPoolErroMsg):
                 break
             else:
                 logger.log(attrs.msg)  # attrs isinstance ModelPoolErroMsg
             time.sleep(2)
         except Exception as e:
             print("ModelPoolAPIs crushed on pull_all_attr,"
                   " the exception:\n{}".format(e))
     self._req_lock.release()
     return attrs
Exemplo n.º 25
0
 def pull_model(self, key):
     self._req_lock.acquire()
     while True:
         try:
             self._req_socket.send_string("model", zmq.SNDMORE)
             self._req_socket.send_string(key)
             model = self._req_socket.recv_pyobj()
             if not isinstance(model, ModelPoolErroMsg):
                 break
             else:
                 logger.log(model.msg)  # model isinstance ModelPoolErroMsg
             time.sleep(2)
         except Exception as e:
             print("ModelPoolAPIs crushed on pull_model {},"
                   " the exception:\n{}".format(key, e))
     self._req_lock.release()
     return model
Exemplo n.º 26
0
 def pull_attr(self, attr, key):
     self._req_lock.acquire()
     while True:
         try:
             self._req_socket.send_string(attr, zmq.SNDMORE)
             self._req_socket.send_string(key)
             ret_attr = self._req_socket.recv_pyobj()
             if not isinstance(ret_attr, ModelPoolErroMsg):
                 break
             else:
                 logger.log(
                     ret_attr.msg)  # ret_attr isinstance ModelPoolErroMsg
             time.sleep(2)
         except Exception as e:
             print("ModelPoolAPIs crushed on pull_attr {} of model {},"
                   " the exception:\n{}".format(attr, key, e))
     self._req_lock.release()
     return ret_attr
Exemplo n.º 27
0
 def _message_worker(self):
     train_idx = 0
     l = len(self._train_replays)
     while True:
         msg = self._rep_socket.recv_string()
         if msg == 'replay_task':
             if len(self._replay_tasks) > 0:
                 self._rep_socket.send_pyobj(self._replay_tasks.pop(0))
             elif self._repeat_training_task:
                 if train_idx == 0:
                     logger.log(
                         'Training task empty, repeat training task.')
                 self._rep_socket.send_pyobj(self._train_replays[train_idx])
                 train_idx = (train_idx + 1) % l
             else:
                 self._rep_socket.send_pyobj("")
         else:
             raise RuntimeError("message not recognized")
Exemplo n.º 28
0
 def pull_learner_meta(self, key):
     self._req_lock.acquire()
     while True:
         try:
             self._req_socket.send_string('learner_meta', zmq.SNDMORE)
             self._req_socket.send_string(key)
             learner_meta = self._req_socket.recv_pyobj()
             if not isinstance(learner_meta, ModelPoolErroMsg):
                 break
             else:
                 # learner_meta isinstance ModelPoolErroMsg
                 logger.log(learner_meta.msg)
             time.sleep(2)
         except Exception as e:
             print("ModelPoolAPIs crushed on pull_learner_meta {},"
                   " the exception:\n{}".format(key, e))
     self._req_lock.release()
     return learner_meta
Exemplo n.º 29
0
 def request(self, req):
   self._req_lock.acquire()
   while True:
     try:
       for msg in req[0:-1]:
         self._req_socket.send_string(msg, zmq.SNDMORE)
       self._req_socket.send_string(req[-1])
       ret = self._req_socket.recv_pyobj()
       if not isinstance(ret, ModelPoolErroMsg):
         break
       else:
         logger.log(ret.msg)  # ret isinstance ModelPoolErroMsg
       time.sleep(2)
     except BaseException as e:
       logger.error("ModelPoolAPIs may crushed on request {},"
                    " the exception:\n{}".format(req, e))
       raise e
   self._req_lock.release()
   return ret
Exemplo n.º 30
0
  def _update_agents_model(self, task):
    """Update the model (i.e., Neural Net parameters) for each agent.

    The learning agent uses model1, all the other opponent(s) use model2 """
    logger.log('entering _update_agents_model', 'steps: {}'.format(self._steps),
               level=logger.DEBUG + 5)
    if (self.self_infserver_addr is None
        and self._should_update_model(self.self_model, task.model_key1)):
      model1 = self._model_pool_apis.pull_model(task.model_key1)
      me_id = self._learning_agent_id  # short name
      self.agents[me_id].load_model(model1.model)
      self.self_model = model1
    if self._should_update_model(self.oppo_model, task.model_key2):
      model2 = self._model_pool_apis.pull_model(task.model_key2)
      oppo_id = self._oppo_agent_id  # short name
      for agt in self.agents[oppo_id:]:
        agt.load_model(model2.model)
      self.oppo_model = model2
    logger.log('leaving _update_agents_model', level=logger.DEBUG + 5)