示例#1
0
  def test_actor_task(self):
    actor_id = str(uuid.uuid1())
    learner_id = str(uuid.uuid1())
    league_client = LeagueMgrAPIs(league_mgr_addr="localhost:11007")
    learner_task = league_client.request_learner_task(learner_id=learner_id)
    league_client.notify_learner_task_begin(learner_id=learner_id,
                                            learner_task=learner_task)

    model_client = ModelPoolAPIs(model_pool_addrs=["localhost:11001:11006"])
    hyperparam = MutableHyperparam()
    model_client.push_model(None, hyperparam, str(uuid.uuid1()))

    task = league_client.request_actor_task(actor_id=actor_id,
                                            learner_id=learner_id)
    self.assertTrue(isinstance(task, ActorTask))
    league_client.notify_actor_task_begin(actor_id=actor_id)
    league_client.notify_actor_task_end(
      actor_id=actor_id,
      match_result=MatchResult(task.model_key1, task.model_key2, 1))
示例#2
0
    def test_pull_hyperparam(self):
        client = ModelPoolAPIs(model_pool_addrs=[
            "localhost:11001:11006", "localhost:11002:11007"
        ])
        key1 = str(uuid.uuid1())
        key2 = str(uuid.uuid1())
        client.push_model(None, "any_hyperparam_object", key1)
        client.push_model(None, "any_hyperparam_object", key2)
        client.push_model(None, "updated_hyperparam_object", key2)

        hyperparam1 = client.pull_attr('hyperparam', key1)
        self.assertEqual(hyperparam1, "any_hyperparam_object")
        hyperparam2 = client.pull_attr('hyperparam', key2)
        self.assertEqual(hyperparam2, "updated_hyperparam_object")
示例#3
0
    def test_pull_model(self):
        client = ModelPoolAPIs(model_pool_addrs=[
            "localhost:11001:11006", "localhost:11002:11007"
        ])
        key1 = str(uuid.uuid1())
        key2 = str(uuid.uuid1())
        client.push_model("any_model_object", None, key1)
        client.push_model("any_model_object", None, key2)
        client.push_model("updated_model_object", None, key2)

        model1 = client.pull_model(key1)
        self.assertEqual(model1.model, "any_model_object")
        model2 = client.pull_model(key2)
        self.assertEqual(model2.model, "updated_model_object")
示例#4
0
  def __init__(self, league_mgr_addr, model_pool_addrs, learner_ports,
               learner_id=''):
    if learner_id: self._learner_id = learner_id
    else: self._learner_id = str(uuid.uuid1())

    self._zmq_context = zmq.Context()
    self._rep_socket = self._zmq_context.socket(zmq.REP)
    self._rep_socket.bind("tcp://*:%s" % learner_ports[0])
    self._pull_socket = self._zmq_context.socket(zmq.PULL)
    self._pull_socket.setsockopt(zmq.RCVHWM, 1)
    self._pull_socket.bind("tcp://*:%s" % learner_ports[1])
    self._message_thread = Thread(target=self._message_worker)
    self._message_thread.daemon = True
    self._message_thread.start()
    self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
    self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)

    self.task = None
    self.model_key = None
    self.last_model_key = None
    self._lrn_period_count = 0  # learning period count
    self._pull_lock = Lock()
示例#5
0
    def test_pull_keys(self):
        client = ModelPoolAPIs(model_pool_addrs=[
            "localhost:11001:11006", "localhost:11002:11007"
        ])
        key1 = str(uuid.uuid1())
        key2 = str(uuid.uuid1())
        client.push_model(None, None, key1)
        client.push_model(None, None, key2)
        client.push_model(None, None, key1)

        saved_keys = client.pull_keys()
        self.assertEqual(len(saved_keys), 2)
        self.assertTrue(key1 in saved_keys)
        self.assertTrue(key2 in saved_keys)
def main(_):
    model_pool_apis = ModelPoolAPIs(FLAGS.model_pool_addrs.split(','))
    keys = model_pool_apis.pull_keys()
    for key, model_path in zip(FLAGS.model_key, FLAGS.model_path):
        if key in keys:
            m = model_pool_apis.pull_model(key)
            with open(model_path, 'rb') as f:
                model = pickle.load(f)
            if isinstance(model, Model):
                model = model.model
            model_pool_apis.push_model(model, m.hyperparam, m.key,
                                       m.createtime, m.freezetime,
                                       m.updatetime)
示例#7
0
    def __init__(self,
                 league_mgr_addr,
                 model_pool_addrs,
                 learner_addr=None,
                 verbose=0,
                 log_interval_steps=51):
        ip, hostname = get_ip_hostname()
        self._actor_id = hostname + '@' + ip + ':' + str(uuid.uuid1())[:8]
        self._learner_id = None
        self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
        self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
        if learner_addr:
            self._learner_apis = LearnerAPIs(learner_addr)
            self._learner_id = self._learner_apis.request_learner_id()

        self._log_interval_steps = log_interval_steps
        logger.configure(dir=None, format_strs=['stdout'])
        logger.set_level(verbose)
        self.task = None
        self._steps = 0
示例#8
0
def test1():
    from tleague.model_pools.model_pool import ModelPool
    from tleague.model_pools.model_pool_apis import ModelPoolAPIs
    from multiprocessing import Process

    server_process = Process(
        target=lambda: ModelPool(ports="11001:11006").run())
    server_process.start()

    model_pool_apis = ModelPoolAPIs(["localhost:11001:11006"])
    model_pool_apis.push_model('model1', None, 'model1')

    saver = ChkptsFromModelPool(model_pool_apis)
    saver._save_model_checkpoint('./', 'test')
    model_pool_apis.push_model('Modified_model1', None, 'model1')

    saver._restore_model_checkpoint('./test')
    model = model_pool_apis.pull_model('model1')
    server_process.terminate()
示例#9
0
 def __init__(self,
              league_mgr_addr,
              model_pool_addrs,
              port,
              ds,
              batch_size,
              ob_space,
              ac_space,
              policy,
              outputs=['a'],
              policy_config={},
              gpu_id=0,
              compress=True,
              batch_worker_num=4,
              update_model_seconds=60,
              learner_id=None,
              log_seconds=60,
              model_key="",
              task_attr='model_key',
              **kwargs):
     self._update_model_seconds = update_model_seconds
     self._log_seconds = log_seconds
     self._learner_id = learner_id
     self._task_attr = task_attr.split('.')
     if model_key:
         # If model_key is given, this indicates the infserver works
         # for a fixed model inference
         self._league_mgr_apis = None
         self.is_rl = False
         self.model_key = model_key
     else:
         # If model_key is absent, this indicates an infserver
         # that performs varying policy inference, and model_key will be
         # assigned by querying league_mgr
         self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
         self.is_rl = True
         self.model_key = None
     self.model = None
     self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
     assert hasattr(policy, 'net_config_cls')
     assert hasattr(policy, 'net_build_fun')
     # bookkeeping
     self.ob_space = ob_space
     self.ob_space = ac_space
     self.batch_size = batch_size
     self._ac_structure = tp_utils.template_structure_from_gym_space(
         ac_space)
     self.outputs = outputs
     # build the net
     policy_config = {} if policy_config is None else policy_config
     policy_config['batch_size'] = batch_size
     use_gpu = (gpu_id >= 0)
     self.data_server = InferDataServer(
         port=port,
         batch_size=batch_size,
         ds=ds,
         batch_worker_num=batch_worker_num,
         use_gpu=use_gpu,
         compress=compress,
     )
     config = tf.ConfigProto(allow_soft_placement=True)
     if use_gpu:
         config.gpu_options.visible_device_list = str(gpu_id)
         config.gpu_options.allow_growth = True
         if 'use_xla' in policy_config and policy_config['use_xla']:
             config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
     self._sess = tf.Session(config=config)
     self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
     self.net_out = policy.net_build_fun(self.data_server._batch_input,
                                         self.nc,
                                         scope='Inf_server')
     # saving/loading ops
     self.params = self.net_out.vars.all_vars
     self.params_ph = [
         tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params
     ]
     self.params_assign_ops = [
         p.assign(np_p) for p, np_p in zip(self.params, self.params_ph)
     ]
     # initialize the net params
     tf.global_variables_initializer().run(session=self._sess)
     self.setup_fetches(outputs)
     self.id_and_fetches = [self.data_server._batch_data_id, self.fetches]
     self._update_model()
示例#10
0
class InfServer(object):
    def __init__(self,
                 league_mgr_addr,
                 model_pool_addrs,
                 port,
                 ds,
                 batch_size,
                 ob_space,
                 ac_space,
                 policy,
                 outputs=['a'],
                 policy_config={},
                 gpu_id=0,
                 compress=True,
                 batch_worker_num=4,
                 update_model_seconds=60,
                 learner_id=None,
                 log_seconds=60,
                 model_key="",
                 task_attr='model_key',
                 **kwargs):
        self._update_model_seconds = update_model_seconds
        self._log_seconds = log_seconds
        self._learner_id = learner_id
        self._task_attr = task_attr.split('.')
        if model_key:
            # If model_key is given, this indicates the infserver works
            # for a fixed model inference
            self._league_mgr_apis = None
            self.is_rl = False
            self.model_key = model_key
        else:
            # If model_key is absent, this indicates an infserver
            # that performs varying policy inference, and model_key will be
            # assigned by querying league_mgr
            self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
            self.is_rl = True
            self.model_key = None
        self.model = None
        self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
        assert hasattr(policy, 'net_config_cls')
        assert hasattr(policy, 'net_build_fun')
        # bookkeeping
        self.ob_space = ob_space
        self.ob_space = ac_space
        self.batch_size = batch_size
        self._ac_structure = tp_utils.template_structure_from_gym_space(
            ac_space)
        self.outputs = outputs
        # build the net
        policy_config = {} if policy_config is None else policy_config
        policy_config['batch_size'] = batch_size
        use_gpu = (gpu_id >= 0)
        self.data_server = InferDataServer(
            port=port,
            batch_size=batch_size,
            ds=ds,
            batch_worker_num=batch_worker_num,
            use_gpu=use_gpu,
            compress=compress,
        )
        config = tf.ConfigProto(allow_soft_placement=True)
        if use_gpu:
            config.gpu_options.visible_device_list = str(gpu_id)
            config.gpu_options.allow_growth = True
            if 'use_xla' in policy_config and policy_config['use_xla']:
                config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        self._sess = tf.Session(config=config)
        self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
        self.net_out = policy.net_build_fun(self.data_server._batch_input,
                                            self.nc,
                                            scope='Inf_server')
        # saving/loading ops
        self.params = self.net_out.vars.all_vars
        self.params_ph = [
            tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params
        ]
        self.params_assign_ops = [
            p.assign(np_p) for p, np_p in zip(self.params, self.params_ph)
        ]
        # initialize the net params
        tf.global_variables_initializer().run(session=self._sess)
        self.setup_fetches(outputs)
        self.id_and_fetches = [self.data_server._batch_data_id, self.fetches]
        self._update_model()

    def load_model(self, loaded_params):
        self._sess.run(
            self.params_assign_ops[:len(loaded_params)],
            feed_dict={p: v
                       for p, v in zip(self.params_ph, loaded_params)})

    def setup_fetches(self, outputs):
        def split_batch(template, tf_structure):
            split_flatten = zip(*[
                tf.split(t, self.batch_size)
                for t in nest.flatten_up_to(template, tf_structure)
            ])
            return [
                nest.pack_sequence_as(template, flatten)
                for flatten in split_flatten
            ]

        if self.nc.use_self_fed_heads:
            a = nest.map_structure_up_to(self._ac_structure,
                                         lambda head: head.sam,
                                         self.net_out.self_fed_heads)
            neglogp = nest.map_structure_up_to(self._ac_structure,
                                               lambda head: head.neglogp,
                                               self.net_out.self_fed_heads)
            flatparam = nest.map_structure_up_to(self._ac_structure,
                                                 lambda head: head.flatparam,
                                                 self.net_out.self_fed_heads)
            self.all_outputs = {
                'a':
                split_batch(self._ac_structure, a),
                'neglogp':
                split_batch(self._ac_structure, neglogp),
                'flatparam':
                split_batch(self._ac_structure, flatparam),
                'v':
                tf.split(self.net_out.value_head, self.batch_size)
                if self.net_out.value_head is not None else [[]] *
                self.batch_size,
                'state':
                tf.split(self.net_out.S, self.batch_size)
                if self.net_out.S is not None else [[]] * self.batch_size
            }
        else:
            flatparam = nest.map_structure_up_to(self._ac_structure,
                                                 lambda head: head.flatparam,
                                                 self.net_out.outer_fed_heads)
            self.all_outputs = {
                'flatparam':
                split_batch(self._ac_structure, flatparam),
                'state':
                tf.split(self.net_out.S, self.batch_size)
                if self.net_out.S is not None else [[]] * self.batch_size
            }
        if self.nc.use_lstm and 'state' not in outputs:
            outputs.append('state')
        self.fetches = [
            dict(zip(outputs, pred))
            for pred in zip(*[self.all_outputs[o] for o in outputs])
        ]

    def _update_model(self):
        if self.is_rl:
            # if (self.model_key is None or
            #     (self.model is not None and self.model.is_freezed())):
            self._query_task()
        if self._should_update_model(self.model, self.model_key):
            self.model = self._model_pool_apis.pull_model(self.model_key)
            self.load_model(self.model.model)

    def _query_task(self):
        assert self.is_rl, '_query_task can be use in RL!'
        task = self._league_mgr_apis.query_learner_task(self._learner_id)
        while task is None:
            print('Learner has not request task! wait...')
            time.sleep(5)
            task = self._league_mgr_apis.query_learner_task(self._learner_id)
        self.last_model_key = self.model_key
        self.model_key = task
        for attr in self._task_attr:
            self.model_key = getattr(self.model_key, attr)
        return task

    def _should_update_model(self, model, model_key):
        if model is None or model_key != model.key:
            return True
        elif model.is_freezed():
            return False
        else:
            return self._model_pool_apis.pull_attr(
                'updatetime', model_key) > model.updatetime

    def run(self):
        while not self.data_server.ready:
            time.sleep(10)
            print('Waiting at least {} actors to '
                  'connect ...'.format(self.batch_size),
                  flush=True)
        last_update_time = time.time()
        last_log_time = last_update_time
        batch_num = 0
        last_log_batch_num = 0
        pid = os.getpid()
        while True:
            # input is pre-fetched in self.data_server
            data_ids, outputs = self._sess.run(self.id_and_fetches, {})
            self.data_server.response(data_ids, outputs)
            batch_num += 1
            t0 = time.time()
            if t0 > last_update_time + self._update_model_seconds:
                self._update_model()
                last_update_time = t0
            t0 = time.time()
            if t0 > last_log_time + self._log_seconds:
                cost = t0 - last_log_time
                sam_num = self.batch_size * (batch_num - last_log_batch_num)
                print(
                    'Process {} predicts {} samples costs {} seconds, fps {}'.
                    format(pid, sam_num, cost, sam_num / cost),
                    flush=True)
                last_log_batch_num = batch_num
                last_log_time = t0
示例#11
0
    def __init__(self,
                 ports,
                 gpu_id,
                 replay_filelist,
                 batch_size,
                 min_train_sample_num,
                 min_val_sample_num,
                 rm_size,
                 learning_rate,
                 print_interval,
                 checkpoint_interval,
                 num_val_batches,
                 replay_converter_type,
                 policy,
                 policy_config,
                 converter_config=None,
                 policy_config_type=None,
                 model_pool_addrs=None,
                 rollout_length=1,
                 checkpoints_dir=None,
                 restore_checkpoint_path=None,
                 train_generator_worker_num=4,
                 val_generator_worker_num=2,
                 pull_worker_num=2,
                 num_sgd_updates=int(1e30),
                 repeat_training_task=False,
                 unroll_length=32,
                 pub_interval=50,
                 max_clip_grad_norm=1,
                 after_loading_init_scope=None,
                 use_mixed_precision=False,
                 use_sparse_as_dense=False,
                 enable_validation=True,
                 post_process_data=None):
        assert len(ports) == 2
        self.use_hvd = has_hvd and hvd.size() > 1
        self.rank = 0 if not self.use_hvd else hvd.rank()
        self.model_key = 'IL-model'
        self.pub_interval = pub_interval
        self.rnn = (False if 'use_lstm' not in policy_config else
                    policy_config['use_lstm'])
        self.hs_len = None
        # overwrite it using the batch_size for training
        policy_config['batch_size'] = batch_size
        if self.rnn:
            assert model_pool_addrs is not None
            self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
            self._model_pool_apis.check_server_set_up()
            policy_config['rollout_len'] = rollout_length
            # infer hidden state length (size)
            if 'hs_len' in policy_config:
                self.hs_len = policy_config['hs_len']
            elif 'nlstm' in policy_config:
                self.hs_len = 2 * policy_config['nlstm']
            else:
                self.hs_len = 128

        self.should_push_model = (self.rnn and self.rank == 0)
        use_gpu = (gpu_id >= 0)
        converter_config = {} if converter_config is None else converter_config
        train_replay_filelist, val_replay_filelist = _get_local_replays(
            replay_filelist)
        replay_converter = replay_converter_type(**converter_config)
        ob_space, ac_space = replay_converter.space.spaces
        if post_process_data is not None:
            ob_space, ac_space = post_process_data(ob_space, ac_space)
        self.data_pool = ImDataServer(
            ports=ports,
            train_replay_filelist=train_replay_filelist,
            val_replay_filelist=val_replay_filelist,
            batch_size=batch_size,
            min_train_sample_num=min_train_sample_num,
            min_val_sample_num=min_val_sample_num,
            ob_space=ob_space,
            ac_space=ac_space,
            train_generator_worker_num=train_generator_worker_num,
            val_generator_worker_num=val_generator_worker_num,
            pull_worker_num=pull_worker_num,
            rm_size=rm_size,
            repeat_training_task=repeat_training_task,
            unroll_length=unroll_length,
            rollout_length=rollout_length,
            lstm=self.rnn,
            hs_len=self.hs_len,
            use_gpu=use_gpu)
        self._enable_validation = enable_validation

        config = tf.ConfigProto(allow_soft_placement=True)
        if use_gpu:
            config.gpu_options.visible_device_list = str(gpu_id)
            config.gpu_options.allow_growth = True
        self._sess = tf.Session(config=config)

        net_config = policy_config_type(ob_space, ac_space, **policy_config)
        net_config_val = deepcopy(net_config)
        with tf.variable_scope('model', reuse=tf.AUTO_REUSE) as model_scope:
            pass

        def create_policy(inputs, nc):
            return policy(inputs=inputs, nc=nc, scope=model_scope)

        if hasattr(net_config, 'endpoints_verbosity'):
            # intentionally disables endpoints during training
            net_config.endpoints_verbosity = 0
        device = '/gpu:0' if use_gpu else '/cpu:0'
        with tf.device(device):
            if 'use_xla' in policy_config and policy_config['use_xla']:
                try:
                    # Use tensorflow's accerlated linear algebra compile method
                    with tf.xla.experimental.jit_scope(True):
                        model = create_policy(self.data_pool.train_batch_input,
                                              net_config)
                except:
                    logger.log(
                        "WARNING: using tf.xla requires tf version>=1.15.")
                    model = create_policy(self.data_pool.train_batch_input,
                                          net_config)
            else:
                model = create_policy(self.data_pool.train_batch_input,
                                      net_config)

        model_val = create_policy(self.data_pool.val_batch_input,
                                  net_config_val)
        params = tf.trainable_variables(scope='model')
        param_norm = tf.global_norm(params)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           epsilon=1e-5)
        if use_mixed_precision:
            try:
                optimizer = tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
                    optimizer)
            except:
                logger.warn(
                    "using tf mixed_precision requires tf version>=1.15.")
        if self.use_hvd:
            optimizer = hvd.DistributedOptimizer(
                optimizer, sparse_as_dense=use_sparse_as_dense)
            barrier_op = hvd.allreduce(tf.Variable(0.))
            self.barrier = lambda: self._sess.run(barrier_op)
        train_loss = tf.reduce_mean(model.loss.total_il_loss *
                                    self.data_pool.train_batch_weight)
        val_loss = tf.reduce_mean(model_val.loss.total_il_loss *
                                  self.data_pool.val_batch_weight)
        if hasattr(net_config, 'weight_decay') and not net_config.weight_decay:
            # None or 0.0
            total_loss = train_loss
        else:
            total_loss = train_loss + model.loss.total_reg_loss
        grads_and_vars = optimizer.compute_gradients(total_loss, params)
        clip_vars = model.vars.lstm_vars
        clip_grads = [grad for grad, var in grads_and_vars if var in clip_vars]
        nonclip_grads_and_vars = [(grad, var) for grad, var in grads_and_vars
                                  if var not in clip_vars]
        if max_clip_grad_norm > 0:
            clip_grads, clip_grad_norm = tf.clip_by_global_norm(
                clip_grads, max_clip_grad_norm)
        else:
            clip_grad_norm = tf.global_norm(clip_grads)
        clip_grads_and_var = list(zip(clip_grads, clip_vars))
        grads_and_vars = clip_grads_and_var + nonclip_grads_and_vars
        grad_norm = tf.global_norm(list(zip(*grads_and_vars))[0])

        train_op = optimizer.apply_gradients(grads_and_vars)
        tf.global_variables_initializer().run(session=self._sess)

        self.new_params = [
            tf.placeholder(p.dtype, shape=p.get_shape()) for p in params
        ]
        self.param_assign_ops = [
            p.assign(new_p) for p, new_p in zip(params, self.new_params)
        ]
        opt_params = optimizer.variables()
        self.new_opt_params = [
            tf.placeholder(p.dtype, shape=p.get_shape()) for p in opt_params
        ]
        self.opt_param_assign_ops = [
            p.assign(new_p)
            for p, new_p in zip(opt_params, self.new_opt_params)
        ]

        def read_params():
            return self._sess.run(params)

        def read_opt_params():
            return self._sess.run(opt_params)

        def load_model(np_new_params):
            self._sess.run(
                self.param_assign_ops,
                feed_dict={
                    p: np_p
                    for p, np_p in zip(self.new_params, np_new_params)
                })

        def restore_optimizer(np_new_opt_params):
            self._sess.run(
                self.opt_param_assign_ops,
                feed_dict={
                    p: np_p
                    for p, np_p in zip(self.new_opt_params, np_new_opt_params)
                })

        def _train_step():
            return self._sess.run([
                train_loss_aggregated, *train_other_losses_aggregated,
                grad_norm, clip_grad_norm, param_norm, train_op
            ], {})[:-1]

        def _val_step():
            # maximal_feat = [tf.reduce_max(tf.cast(x, tf.float32))
            # for x in self.data_pool.val_batch_input.X]
            # print(self._sess.run(maximal_feat, {}))
            return self._sess.run([
                val_loss_aggregated, *val_other_losses_aggregated,
                *endpoints_aggregated
            ], {})

        self._saver = ChkptsFromSelf(read_params, load_model, self.model_key)

        if restore_checkpoint_path is not None:
            self._saver._restore_model_checkpoint(restore_checkpoint_path)

        if after_loading_init_scope is not None:
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                         scope=after_loading_init_scope)
            logger.log('perform after loading init for vars')
            for v in var_list:
                logger.log(v)
            tf.variables_initializer(var_list).run(session=self._sess)

        if self.use_hvd:
            hvd.broadcast_global_variables(0).run(session=self._sess)

        _allreduce = lambda x: x if not self.use_hvd else hvd.allreduce(x)
        train_loss_aggregated = _allreduce(train_loss)
        train_other_loss_names = model.loss.loss_endpoints.keys()
        train_other_losses_aggregated = [
            _allreduce(tf.reduce_mean(l * self.data_pool.train_batch_weight))
            for l in model.loss.loss_endpoints.values()
        ]
        val_loss_aggregated = _allreduce(val_loss)
        val_other_loss_names = model_val.loss.loss_endpoints.keys()
        val_other_losses_aggregated = [
            _allreduce(tf.reduce_mean(l * self.data_pool.val_batch_weight))
            for l in model_val.loss.loss_endpoints.values()
        ]
        endpoints_names = model_val.endpoints.keys()
        endpoints_aggregated = [
            _allreduce(tf.reduce_mean(l))
            for l in model_val.endpoints.values()
        ]
        self._sess.graph.finalize()
        self._total_samples = lambda: [
            self.data_pool._num_train_samples, self.data_pool._num_val_samples
        ]
        self._train_log_names = (['loss'] + list(train_other_loss_names) +
                                 ['grad_norm', 'clip_grad_norm', 'param_norm'])
        self._val_log_names = (['loss'] + list(val_other_loss_names) +
                               list(endpoints_names))
        self._batch_size = batch_size
        self._train_step = _train_step
        self._val_step = _val_step
        self._print_interval = print_interval
        self._checkpoint_interval = checkpoint_interval
        self._num_val_batches = num_val_batches
        self._checkpoints_dir = checkpoints_dir if self.rank == 0 else None
        self._num_sgd_updates = num_sgd_updates
        self.load_model = load_model
        self.restore_optimizer = restore_optimizer
        self.read_params = read_params
        self.read_opt_params = read_opt_params

        format_strs = ['log', 'tensorboard', 'csv']
        logger.configure(dir='training_log/rank{}'.format(self.rank),
                         format_strs=['stdout'] + format_strs)
        with logger.scoped_configure(dir='validation_log/rank{}'.format(
                self.rank),
                                     format_strs=['stderr'] + format_strs):
            self.val_logger = logger.Logger.CURRENT
示例#12
0
 def __init__(self,
              learner_addr,
              replay_dir,
              replay_converter_type,
              policy=None,
              policy_config=None,
              model_pool_addrs=None,
              n_v=1,
              log_interval=50,
              step_mul=8,
              SC2_bin_root='/root/',
              game_version='3.16.1',
              unroll_length=32,
              update_model_freq=32,
              converter_config=None,
              agent_cls=None,
              infserver_addr=None,
              compress=True,
              da_rate=-1.,
              unk_mmr_dft_to=4000):
     self._data_pool_apis = ImLearnerAPIs(learner_addr)
     self._SC2_bin_root = SC2_bin_root
     self._log_interval = log_interval
     self._replay_dir = replay_dir
     self._step_mul = step_mul
     self._game_version = game_version
     self._unroll_length = unroll_length
     self._data_queue = Queue(unroll_length)
     self._push_thread = Thread(target=self._push_data,
                                args=(self._data_queue, ))
     self._push_thread.daemon = True
     self._push_thread.start()
     self.converter_config = {} if converter_config is None else converter_config
     self.converter_config['game_version'] = game_version
     self.replay_converter_type = replay_converter_type
     self._replay_converter = replay_converter_type(**self.converter_config)
     self._use_policy = policy is not None
     self._update_model_freq = update_model_freq
     self.model_key = 'IL-model'
     self._da_rate = da_rate
     self._unk_mmr_dft_to = unk_mmr_dft_to
     self._system = platform.system()
     ob_space, ac_space = self._replay_converter.space
     if self._use_policy:
         self.model = None
         policy_config = {} if policy_config is None else policy_config
         agent_cls = agent_cls or PPOAgent
         policy_config['batch_size'] = 1
         policy_config['rollout_len'] = 1
         policy_config['use_loss_type'] = 'none'
         self.infserver_addr = infserver_addr
         if infserver_addr is None:
             self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
             self.agent = agent_cls(policy,
                                    ob_space,
                                    ac_space,
                                    n_v=n_v,
                                    scope_name='self',
                                    policy_config=policy_config)
         else:
             nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
             ds = InfData(ob_space, ac_space,
                          policy_config['use_self_fed_heads'], nc.use_lstm,
                          nc.hs_len)
             self.agent = PGAgentGPU(infserver_addr, ds, nc.hs_len,
                                     compress)
     self.ds = ILData(ob_space, ac_space, self._use_policy,
                      1)  # hs_len does not matter
示例#13
0
class ReplayActor(object):
    def __init__(self,
                 learner_addr,
                 replay_dir,
                 replay_converter_type,
                 policy=None,
                 policy_config=None,
                 model_pool_addrs=None,
                 n_v=1,
                 log_interval=50,
                 step_mul=8,
                 SC2_bin_root='/root/',
                 game_version='3.16.1',
                 unroll_length=32,
                 update_model_freq=32,
                 converter_config=None,
                 agent_cls=None,
                 infserver_addr=None,
                 compress=True,
                 da_rate=-1.,
                 unk_mmr_dft_to=4000):
        self._data_pool_apis = ImLearnerAPIs(learner_addr)
        self._SC2_bin_root = SC2_bin_root
        self._log_interval = log_interval
        self._replay_dir = replay_dir
        self._step_mul = step_mul
        self._game_version = game_version
        self._unroll_length = unroll_length
        self._data_queue = Queue(unroll_length)
        self._push_thread = Thread(target=self._push_data,
                                   args=(self._data_queue, ))
        self._push_thread.daemon = True
        self._push_thread.start()
        self.converter_config = {} if converter_config is None else converter_config
        self.converter_config['game_version'] = game_version
        self.replay_converter_type = replay_converter_type
        self._replay_converter = replay_converter_type(**self.converter_config)
        self._use_policy = policy is not None
        self._update_model_freq = update_model_freq
        self.model_key = 'IL-model'
        self._da_rate = da_rate
        self._unk_mmr_dft_to = unk_mmr_dft_to
        self._system = platform.system()
        ob_space, ac_space = self._replay_converter.space
        if self._use_policy:
            self.model = None
            policy_config = {} if policy_config is None else policy_config
            agent_cls = agent_cls or PPOAgent
            policy_config['batch_size'] = 1
            policy_config['rollout_len'] = 1
            policy_config['use_loss_type'] = 'none'
            self.infserver_addr = infserver_addr
            if infserver_addr is None:
                self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
                self.agent = agent_cls(policy,
                                       ob_space,
                                       ac_space,
                                       n_v=n_v,
                                       scope_name='self',
                                       policy_config=policy_config)
            else:
                nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
                ds = InfData(ob_space, ac_space,
                             policy_config['use_self_fed_heads'], nc.use_lstm,
                             nc.hs_len)
                self.agent = PGAgentGPU(infserver_addr, ds, nc.hs_len,
                                        compress)
        self.ds = ILData(ob_space, ac_space, self._use_policy,
                         1)  # hs_len does not matter

    def run(self):
        self.replay_task = self._data_pool_apis.request_replay_task()
        while self.replay_task != "":
            game_version = self.replay_task.game_version or self._game_version
            self._adapt_system(game_version)
            if game_version != self._game_version:
                # need re-init replay converter
                self._game_version = game_version
                self.converter_config['game_version'] = game_version
                self._replay_converter = self.replay_converter_type(
                    **self.converter_config)
            game_core_config = ({} if 'game_core_config'
                                not in self.converter_config else
                                self.converter_config['game_core_config'])
            extractor = ReplayExtractor(
                replay_dir=self._replay_dir,
                replay_filename=self.replay_task.replay_name,
                player_id=self.replay_task.player_id,
                replay_converter=self._replay_converter,
                step_mul=self._step_mul,
                version=game_version,
                game_core_config=game_core_config,
                da_rate=self._da_rate,
                unk_mmr_dft_to=self._unk_mmr_dft_to)
            self._steps = 0
            first_frame = True
            if self._use_policy:
                self.agent.reset()
                self._update_agent_model()
            for frame in extractor.extract():
                if self._use_policy:
                    data = (*frame[0], self.agent.state,
                            np.array(first_frame, np.bool))
                    self.agent.update_state(frame[0][0])
                    first_frame = False
                else:
                    data = frame[0]
                data = self.ds.flatten(self.ds.structure(data))
                if self._data_queue.full():
                    logger.log("Actor's queue is full.", level=logger.WARN)
                self._data_queue.put((TensorZipper.compress(data), frame[1]))
                logger.log('successfully put one tuple.', level=logger.DEBUG)
                self._steps += 1
                if self._steps % self._log_interval == 0:
                    logger.log(
                        "%d frames of replay task [%s] sent to learner." %
                        (self._steps, self.replay_task))
                if self._use_policy and self._steps % self._update_model_freq == 0:
                    self._update_agent_model()
            logger.log("Replay task [%s] done. %d frames sent to learner." %
                       (self.replay_task, self._steps))
            self.replay_task = self._data_pool_apis.request_replay_task()
        logger.log("All tasks done.")

    def _adapt_system(self, game_version):
        # TODO(pengsun): any stuff for Darwin, Window?
        if self._system == 'Linux':
            # set the SC2PATH for sc2 binary. See deepmind/pysc2 doc.
            if game_version != '4.7.1' or 'SC2PATH' in os.environ:
                os.environ['SC2PATH'] = os.path.join(self._SC2_bin_root,
                                                     game_version)
        return

    def _update_agent_model(self):
        if self.infserver_addr is not None:
            return
        logger.log('entering _update_agents_model',
                   'steps: {}'.format(self._steps),
                   level=logger.DEBUG + 5)
        if self._should_update_model(self.model, self.model_key):
            model = self._model_pool_apis.pull_model(self.model_key)
            self.agent.load_model(model.model)
            self.model = model

    def _should_update_model(self, model, model_key):
        if model is None:
            return True
        else:
            return self._model_pool_apis.pull_attr(
                'updatetime', model_key) > model.updatetime

    def _push_data(self, data_queue):
        """ push trajectory for the learning agent (id 0). Invoked in a thread """
        while data_queue.empty():
            time.sleep(5)
        logger.log('entering _push_data_to_learner',
                   'steps: {}'.format(self._steps),
                   level=logger.DEBUG + 5)
        while True:
            task = self.replay_task
            frames = []
            weights = []
            for _ in range(self._unroll_length):
                frame, weight = data_queue.get()
                frames.append(frame)
                weights.append(weight)
            self._data_pool_apis.push_data((task, frames, weights))
示例#14
0
 def test_push_model(self):
     client = ModelPoolAPIs(model_pool_addrs=[
         "localhost:11001:11006", "localhost:11002:11007"
     ])
     key1 = str(uuid.uuid1())
     client.push_model(None, None, key1)
示例#15
0
  def test_checkpoint(self):
    league_client = LeagueMgrAPIs(league_mgr_addr="localhost:11007")
    model_client1 = ModelPoolAPIs(model_pool_addrs=["localhost:11001:11006"])
    hyperparam = MutableHyperparam()
    model_key1 = str(uuid.uuid1())
    model_key2 = str(uuid.uuid1())
    model_client1.push_model("model_data1", hyperparam, model_key1)
    model_client1.push_model("model_data2", hyperparam, model_key2)
    time.sleep(4)
    league_client.request_add_model(
      Model("model_data1", hyperparam, model_key1))
    model_client1.push_model("model_data3", hyperparam, model_key2)
    time.sleep(3)
    checkpoints = [filename for filename in os.listdir("./checkpoints")
                   if filename.startswith("checkpoint")]
    self.assertTrue(len(checkpoints) > 0)

    checkpoint_dir = os.path.join("./checkpoints", checkpoints[-1])
    league_process = Process(
        target=lambda: LeagueMgr(
          port="11008",
          model_pool_addrs=["localhost:11011:11016"],
          mutable_hyperparam_type='MutableHyperparam',
          restore_checkpoint_dir=checkpoint_dir).run())
    league_process.start()

    model_client2 = ModelPoolAPIs(model_pool_addrs=["localhost:11011:11016"])
    time.sleep(2)
    keys = model_client2.pull_keys()
    self.assertTrue(model_key1 in keys)
    self.assertTrue(model_key2 in keys)
    model1 = model_client1.pull_model(model_key1)
    model2 = model_client2.pull_model(model_key1)
    self.assertEqual(model1.model, model2.model)
    self.assertEqual(model1.key, model2.key)
    self.assertEqual(model1.createtime, model2.createtime)
    model1 = model_client1.pull_model(model_key2)
    model2 = model_client2.pull_model(model_key2)
    self.assertEqual(model1.model, model2.model)
    self.assertEqual(model1.key, model2.key)
    self.assertEqual(model1.createtime, model2.createtime)
    league_process.terminate()
示例#16
0
class BaseLearner(object):
  """Base learner class.

  Define the basic workflow for a learner."""
  def __init__(self, league_mgr_addr, model_pool_addrs, learner_ports,
               learner_id=''):
    if learner_id: self._learner_id = learner_id
    else: self._learner_id = str(uuid.uuid1())

    self._zmq_context = zmq.Context()
    self._rep_socket = self._zmq_context.socket(zmq.REP)
    self._rep_socket.bind("tcp://*:%s" % learner_ports[0])
    self._pull_socket = self._zmq_context.socket(zmq.PULL)
    self._pull_socket.setsockopt(zmq.RCVHWM, 1)
    self._pull_socket.bind("tcp://*:%s" % learner_ports[1])
    self._message_thread = Thread(target=self._message_worker)
    self._message_thread.daemon = True
    self._message_thread.start()
    self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
    self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)

    self.task = None
    self.model_key = None
    self.last_model_key = None
    self._lrn_period_count = 0  # learning period count
    self._pull_lock = Lock()

  def run(self):
    while True:
      self.task = self._request_task()
      self._init_task()
      self._train()
      self._finish_task()
      self._lrn_period_count += 1

  @abstractmethod
  def _train(self, **kwargs):
    pass

  @abstractmethod
  def _init_task(self):
    pass

  def _request_task(self):
    task = self._league_mgr_apis.request_learner_task(self._learner_id)
    self.last_model_key = self.model_key
    self.model_key = task.model_key
    # lazy freeze the model of last lp, then actors will stop the last lp.
    if self.last_model_key and self.model_key != self.last_model_key:
      self._model_pool_apis.freeze_model(self.last_model_key)
    return task

  def _query_task(self):
    task = self._league_mgr_apis.query_learner_task(self._learner_id)
    if task is not None:
      self.last_model_key = self.model_key
      self.model_key = task.model_key
    return task

  def _finish_task(self):
    self._notify_task_end()

  def _pull_data(self):
    self._pull_lock.acquire()
    data = self._pull_socket.recv(copy=False)
    self._pull_lock.release()
    return pickle.loads(data)

  def _message_worker(self):
    while True:
      msg = self._rep_socket.recv_string()
      if msg == 'learner_id':
        self._rep_socket.send_pyobj(self._learner_id)
      else:
        raise RuntimeError("message not recognized")

  def _notify_task_begin(self, task):
    self._league_mgr_apis.notify_learner_task_begin(self._learner_id, task)

  def _notify_task_end(self):
    self._league_mgr_apis.notify_learner_task_end(self._learner_id)