Пример #1
0
def train():
    assert tf.test.is_gpu_available(), "Training requires GPUs!"
    dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = get_num_gpu()
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    config = TrainConfig(
        model=Model(),
        dataflow=master.get_training_dataflow(),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'],
                                      get_player),
                            every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if num_gpu == 1 else AsyncMultiGPUTrainer(
        train_tower)
    launch_train_with_config(config, trainer)
Пример #2
0
 def _before_train(self) -> None:
     self.queue = mp.Queue()
     self.event = mp.Event()
     self.process = mp.Process(target=self._worker,
                               args=(self.devices, self.queue, self.event))
     ensure_proc_terminate(self.process)
     start_proc_mask_signal(self.process)
Пример #3
0
def eval_model_multiprocess(model_path, romfile):
    M = Model()
    cfg = PredictConfig(
            model=M,
            input_data_mapping=[0],
            session_init=SaverRestore(model_path),
            output_var_names=['fct/output:0'])

    class Worker(ParallelPredictWorker):
        def __init__(self, idx, gpuid, config, outqueue):
            super(Worker, self).__init__(idx, gpuid, config)
            self.outq = outqueue

        def run(self):
            player = AtariPlayer(AtariDriver(romfile, viz=0),
                    action_repeat=ACTION_REPEAT)
            global NUM_ACTIONS
            NUM_ACTIONS = player.driver.get_num_actions()

            self._init_runtime()

            tot_reward = 0
            que = deque(maxlen=30)
            while True:
                s = player.current_state()
                outputs = self.func([[s]])
                action_value = outputs[0][0]
                act = action_value.argmax()
                #print action_value, act
                if random.random() < 0.01:
                    act = random.choice(range(player.driver.get_num_actions()))
                if len(que) == que.maxlen \
                        and que.count(que[0]) == que.maxlen:
                    act = 1
                que.append(act)
                #print(act)
                reward, isOver = player.action(act)
                tot_reward += reward
                if isOver:
                    self.outq.put(tot_reward)
                    tot_reward = 0

    NR_PROC = min(multiprocessing.cpu_count() // 2, 10)
    procs = []
    q = multiprocessing.Queue()
    for k in range(NR_PROC):
        procs.append(Worker(k, -1, cfg, q))
    ensure_proc_terminate(procs)
    for k in procs:
        k.start()
    stat = StatCounter()
    try:
        EVAL_EPISODE = 50
        for _ in tqdm(range(EVAL_EPISODE)):
            r = q.get()
            stat.feed(r)
    finally:
        logger.info("Average Score: {}. Max Score: {}".format(
            stat.average, stat.max))
Пример #4
0
def eval_model_multiprocess(model_path, romfile):
    M = Model()
    cfg = PredictConfig(model=M,
                        input_data_mapping=[0],
                        session_init=SaverRestore(model_path),
                        output_var_names=['fct/output:0'])

    class Worker(ParallelPredictWorker):
        def __init__(self, idx, gpuid, config, outqueue):
            super(Worker, self).__init__(idx, gpuid, config)
            self.outq = outqueue

        def run(self):
            player = AtariPlayer(AtariDriver(romfile, viz=0),
                                 action_repeat=ACTION_REPEAT)
            global NUM_ACTIONS
            NUM_ACTIONS = player.driver.get_num_actions()

            self._init_runtime()

            tot_reward = 0
            que = deque(maxlen=30)
            while True:
                s = player.current_state()
                outputs = self.func([[s]])
                action_value = outputs[0][0]
                act = action_value.argmax()
                #print action_value, act
                if random.random() < 0.01:
                    act = random.choice(range(player.driver.get_num_actions()))
                if len(que) == que.maxlen \
                        and que.count(que[0]) == que.maxlen:
                    act = 1
                que.append(act)
                #print(act)
                reward, isOver = player.action(act)
                tot_reward += reward
                if isOver:
                    self.outq.put(tot_reward)
                    tot_reward = 0

    NR_PROC = min(multiprocessing.cpu_count() // 2, 10)
    procs = []
    q = multiprocessing.Queue()
    for k in range(NR_PROC):
        procs.append(Worker(k, -1, cfg, q))
    ensure_proc_terminate(procs)
    for k in procs:
        k.start()
    stat = StatCounter()
    try:
        EVAL_EPISODE = 50
        for _ in tqdm(range(EVAL_EPISODE)):
            r = q.get()
            stat.feed(r)
    finally:
        logger.info("Average Score: {}. Max Score: {}".format(
            stat.average, stat.max))
Пример #5
0
def get_config():
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/')
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    M = Model()
    master = MySimulatorMaster(namec2s, names2c, M, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    return TrainConfig(model=M,
                       dataflow=dataflow,
                       callbacks=[
                           ModelSaver(max_to_keep=2),
                           ScheduledHyperParamSetter('learning_rate',
                                                     [(20, 0.0003),
                                                      (120, 0.0001)]),
                           ScheduledHyperParamSetter('entropy_beta',
                                                     [(80, 0.005)]),
                           HumanHyperParamSetter('learning_rate'),
                           HumanHyperParamSetter('entropy_beta'),
                           master,
                           StartProcOrThread(master),
                           PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'],
                                                     ['policy'], get_player),
                                           every_k_epochs=1),
                       ],
                       session_creator=sesscreate.NewSessionCreator(
                           config=get_default_sess_config(0.5)),
                       steps_per_epoch=STEPS_PER_EPOCH,
                       max_epoch=1000,
                       tower=train_tower)
Пример #6
0
def train():
    dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = get_num_gpu()
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = AutoResumeTrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'],
                                      get_player),
                            every_k_epochs=1),
        ],
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = MyTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(
        train_tower)
    launch_train_with_config2(config, trainer)
Пример #7
0
def train():
    dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(nr_gpu))[-nr_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
    else:
        logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = TrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['policy'], get_player),
                every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Пример #8
0
def train():
    dirname = os.path.join('train_log', 'a3c_small')
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use all gpus for inference
            predict_tower = list(range(nr_gpu))
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
    else:
        logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = AutoResumeTrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            StartProcOrThread(master),
            HumanHyperParamSetter('learning_rate'),
            Evaluator(
                100, ['role_id', 'policy_state_in', 'last_cards_in', 'minor_type_in'],
                ['passive_decision_prob', 'passive_bomb_prob', 'passive_response_prob',
                 'active_decision_prob', 'active_response_prob', 'active_seq_prob', 'minor_response_prob'], get_player),
        ],
        # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Пример #9
0
def train(args):
    assert tf.test.is_gpu_available(), "Training requires GPUs!"
    dirname = os.path.join(settings.path_prefix,
                           "train_from_scratch/{}".format(args.env))
    logger.set_logger_dir(dirname)
    logger.info("Logger/Model Path: {}".format(dirname))

    # assign GPUs for training & inference
    num_gpu = args.num_gpu
    global PREDICTOR_THREAD
    if num_gpu > 1:
        # use half gpus for inference
        predict_tower = list(range(num_gpu))[-num_gpu // 2:]
    else:
        predict_tower = [0]
    PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
    train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
    logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
        ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]

    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower, args)
    config = TrainConfig(
        model=Model(),
        dataflow=master.get_training_dataflow(),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'],
                                      get_player),
                            every_k_steps=2000),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
    trainer = SimpleTrainer()
    launch_train_with_config(config, trainer)
Пример #10
0
def get_config():
    logger.set_logger_dir(LOG_DIR)
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/')
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    lr = tf.Variable(0.001, trainable=False, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)

    return TrainConfig(
        dataset=dataflow,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
        callbacks=Callbacks([
            StatPrinter(),
            PeriodicCallback(ModelSaver(), 5),
            ScheduledHyperParamSetter('learning_rate', [(80, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            ScheduledHyperParamSetter('explore_factor', [(80, 2), (100, 3),
                                                         (120, 4), (140, 5)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            HumanHyperParamSetter('explore_factor'),
            master,
            PeriodicCallback(
                Evaluator(EVAL_EPISODE, ['state'], ['logits'],
                          policy_dist=POLICY_DIST), 5),
        ]),
        extra_threads_procs=[master],
        session_config=get_default_sess_config(0.5),
        model=M,
        step_per_epoch=STEP_PER_EPOCH,
        max_epoch=1000,
    )
Пример #11
0
    def __init__(self, simulators, pipe_sim2mgr, pipe_mgr2sim):
        self.sim2mgr = pipe_sim2mgr
        self.mgr2sim = pipe_mgr2sim

        self.context = zmq.Context()

        self.sim2mgr_socket = self.context.socket(zmq.PULL)
        self.sim2mgr_socket.bind(self.sim2mgr)
        self.sim2mgr_socket.set_hwm(2)

        self.mgr2sim_socket = self.context.socket(zmq.ROUTER)
        self.mgr2sim_socket.bind(self.mgr2sim)
        self.mgr2sim_socket.set_hwm(2)

        self.simulators = simulators
        for sim in self.simulators:
            ensure_proc_terminate(sim)

        self.queue = queue.Queue(maxsize=100)
        self.current_sim = None
        self.locked_sim = None
Пример #12
0
def bench_proc():
    Q = mp.Queue()

    def work():
        player = gym.make('PongDeterministic-v3')
        naction = player.action_space.n
        np.random.seed(os.getpid())
        player.reset()

        while True:
            act = np.random.choice(naction)
            ob, r, isOver, info = player.step(act)
            Q.put([ob, r])
            if isOver:
                player.reset()
    nr_proc = 8
    procs = [mp.Process(target=work) for _ in range(nr_proc)]
    ensure_proc_terminate(procs)
    for p in procs:
        p.start()

    for t in tqdm.trange(100000):
        Q.get()
Пример #13
0
def bench_proc():
    Q = mp.Queue()

    def work():
        player = gym.make('PongDeterministic-v3')
        naction = player.action_space.n
        np.random.seed(os.getpid())
        player.reset()

        while True:
            act = np.random.choice(naction)
            ob, r, isOver, info = player.step(act)
            Q.put([ob, r])
            if isOver:
                player.reset()

    nr_proc = 8
    procs = [mp.Process(target=work) for _ in range(nr_proc)]
    ensure_proc_terminate(procs)
    for p in procs:
        p.start()

    for t in tqdm.trange(100000):
        Q.get()
Пример #14
0
def hash_dp(dp):
    return sum([k.sum() for k in dp])


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--task',
                        default='basic',
                        choices=['basic', 'tworecv', 'send'])
    parser.add_argument('-n', '--num', type=int, default=10)
    args = parser.parse_args()

    if args.task == 'basic':
        DATA = random_array(args.num)
        p = mp.Process(target=send, args=(DATA, ))
        ensure_proc_terminate(p)
        start_proc_mask_signal(p)

        sess = tf.Session()
        recv = ZMQPullSocket(ENDPOINT, [tf.float32, tf.uint8]).pull()
        print(recv)

        for truth in DATA:
            arr = sess.run(recv)
            assert (arr[0] == truth[0]).all()
            assert (arr[1] == truth[1]).all()
    elif args.task == 'send':
        DATA = random_array(args.num)
        send(DATA)
    elif args.task == 'tworecv':
        DATA = random_array(args.num)
Пример #15
0
        self.context.destroy(linger=0)


if __name__ == '__main__':
    import random
    import gym

    class NaiveSimulator(SimulatorProcess):
        def _build_player(self):
            return gym.make('Breakout-v0')

    class NaiveActioner(SimulatorMaster):
        def _get_action(self, state):
            time.sleep(1)
            return random.randint(1, 3)

        def _on_episode_over(self, client):
            # print("Over: ", client.memory)
            client.memory = []
            client.state = 0

    name = 'ipc://@whatever'
    procs = [NaiveSimulator(k, name) for k in range(10)]
    [k.start() for k in procs]

    th = NaiveActioner(name)
    ensure_proc_terminate(procs)
    th.start()

    time.sleep(100)
Пример #16
0
def train_duel_value(args):
    logger.info("Test")
    assert tf.test.is_gpu_available(), "Training requires GPUs!"
    if args.logit_render_model_checkpoint == "pretrained":
        args.logit_render_model_checkpoint = settings.pretraind_model_path[
            args.env]
        render = "pretrained"
    else:
        args.logit_render_model_checkpoint = os.path.join(
            settings.supervised_model_checkpoint[args.env], 'checkpoint')
        render = "surpervised"
    dirname = os.path.join(
        settings.path_prefix,
        "reward_shaping_model/env-{}-shaping-{}-logit-render-{}")
    dirname = dirname.format(args.env, args.shaping, render)
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = args.num_gpu
    global PREDICTOR_THREAD
    if num_gpu > 1:
        # use half gpus for inference
        predict_tower = list(range(num_gpu))[-num_gpu // 2:]
    else:
        predict_tower = [0]
    PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
    train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
    logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
        ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]

    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = DuelValueSimulatorMaster(namec2s, names2c, predict_tower, args)
    config = TrainConfig(
        model=DuelValueModel(),
        dataflow=master.get_training_dataflow(),
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003),
                                                        (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            PeriodicTrigger(
                Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player),
                #EVAL_EPISODE, ['state'], ['reward_logits'], get_player),
                every_k_steps=2000),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=SmartInit(args.logit_render_model_checkpoint),
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    )  #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Пример #17
0
def get_config():
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR',
                              '/tmp/.ipcpipe').rstrip('/')
    if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR)
    else: os.system('rm -f {}/sim-*'.format(PIPE_DIR))
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC
    procs = [
        MySimulatorWorker(k, namec2s, names2c)
        for k in range(SIMULATOR_PROC * 2)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    class CBSyncWeight(Callback):
        def _after_run(self, ctx, _):
            if self.local_step > 1 and self.local_step % SIMULATOR_PROC == 0:
                # print("before step ",self.local_step)
                return [M._td_sync_op]

        def _before_run(self, ctx):

            if self.local_step % 10 == 0:
                return [M._sync_op, M._td_sync_op]
            if self.local_step % SIMULATOR_PROC == 0 and 0:
                return [M._td_sync_op]

    import functools
    return TrainConfig(
        model=M,
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            HyperParamSetterWithFunc(
                'learning_rate/actor',
                functools.partial(M._calc_learning_rate, 'actor')),
            HyperParamSetterWithFunc(
                'learning_rate/critic',
                functools.partial(M._calc_learning_rate, 'critic')),

            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            # HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter('entropy_beta'),
            # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            master,
            StartProcOrThread(master),
            CBSyncWeight(),
            # CBTDSyncWeight()
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Пример #18
0
def get_config():
    M = Model()

    name_base = str(uuid.uuid1())[:6]
    PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/')
    if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR)
    else: os.system('rm -f {}/sim-*'.format(PIPE_DIR))
    namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base)
    names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base)
    # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC*2)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, M)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)

    class CBSyncWeight(Callback):



        def _after_run(self,ctx,_):
            if self.local_step > 1 and self.local_step % SIMULATOR_PROC ==0:
                # print("before step ",self.local_step)
                return [M._td_sync_op]

        def _before_run(self, ctx):

            if self.local_step % 10 == 0:
                return [M._sync_op,M._td_sync_op]
            if self.local_step % SIMULATOR_PROC ==0 and 0:
                return [M._td_sync_op]

    import functools
    return TrainConfig(
        model=M,
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            HyperParamSetterWithFunc(
                'learning_rate/actor',
                functools.partial(M._calc_learning_rate, 'actor')),
            HyperParamSetterWithFunc(
                'learning_rate/critic',
                functools.partial(M._calc_learning_rate, 'critic')),

            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            # HumanHyperParamSetter('learning_rate'),
            # HumanHyperParamSetter('entropy_beta'),
            # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]),
            master,
            StartProcOrThread(master),
            CBSyncWeight(),
            # CBTDSyncWeight()
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Пример #19
0
def train():
    dirname = os.path.join('train_log', 'A3C-{}'.format(ENV_NAME))
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    num_gpu = get_num_gpu()
    global PREDICTOR_THREAD
    if num_gpu > 0:
        if num_gpu > 1:
            # use half gpus for inference
            predict_tower = list(range(num_gpu))[-num_gpu // 2:]
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
    else:
        logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    prefix = '@' if sys.platform.startswith('linux') else ''
    namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
    names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)
    procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    # config = TrainConfig(
    #     model=Model(),
    #     dataflow=dataflow,
    #     callbacks=[
    #         ModelSaver(),
    #         ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
    #         ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
    #         HumanHyperParamSetter('learning_rate'),
    #         HumanHyperParamSetter('entropy_beta'),
    #         master,
    #         StartProcOrThread(master),
    #         PeriodicTrigger(Evaluator(
    #             EVAL_EPISODE, ['state'], ['policy'], get_player),
    #             every_k_epochs=3),
    #         PeriodicTrigger(LogVisualizeEpisode(
    #             ['state'], ['policy'], get_player),
    #             every_k_epochs=1),
    #     ],
    #     session_creator=sesscreate.NewSessionCreator(
    #         config=get_default_sess_config(0.5)),
    #     steps_per_epoch=STEPS_PER_EPOCH,
    #     session_init=get_model_loader(args.load) if args.load else None,
    #     max_epoch=1000,
    # )
    # config = get_config()
    expreplay = ExpReplay(
        predictor_io_names=(['state'], ['policy']),
        player=get_player(train=True),
        state_shape=IMAGE_SHAPE3,
        batch_size=BATCH_SIZE,
        memory_size=MEMORY_SIZE,
        init_memory_size=INIT_MEMORY_SIZE,
        init_exploration=1.0,
        update_frequency=UPDATE_FREQ,
        history_len=FRAME_HISTORY
    )
    config = TrainConfig(
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            HumanHyperParamSetter('learning_rate'),
            HumanHyperParamSetter('entropy_beta'),
            master,
            StartProcOrThread(master),
            PeriodicTrigger(Evaluator(
                EVAL_EPISODE, ['state'], ['policy'], get_player),
                every_k_epochs=3),
            expreplay,
            ScheduledHyperParamSetter(
                ObjAttrParam(expreplay, 'exploration'),
                [(0, 1), (10, 0.9), (50, 0.1), (320, 0.01)],   # 1->0.1 in the first million steps
                interp='linear'),
            PeriodicTrigger(LogVisualizeEpisode(
                ['state'], ['policy'], get_player),
                every_k_epochs=1),
        ],
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        session_init=get_model_loader(args.load) if args.load else None,
        max_epoch=1000,
    )
    trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)
Пример #20
0
                i = -1
                j = 0
                while j < len(mem):
                    if mem[j].first_st:
                        i += 1
                    target = [0 for _ in range(7)]
                    k = mem[j]
                    target[k.mode] = k.action
                    # self.queue.put(
                    #     [role_id, k.prob_state, k.all_state, k.last_cards_onehot, *target, k.minor_type, k.mode, k.prob,
                    #      dr[i]])
                    j += 1

                client.memory[role_id - 1] = []

        def _on_episode_over(self, client):
            # print("Over: ", client.memory)
            client.memory = []
            client.state = 0

    name = 'ipc://c2s'
    name2 = 'ipc://s2c'
    procs = [NaiveSimulator(k, name, name2) for k in range(20)]
    [k.start() for k in procs]

    th = NaiveActioner(name, name2)
    ensure_proc_terminate(procs)
    th.start()

    time.sleep(100)
Пример #21
0
def train():
    dirname = os.path.join('train_log', 'A3C-LSTM')
    logger.set_logger_dir(dirname)

    # assign GPUs for training & inference
    nr_gpu = get_nr_gpu()
    global PREDICTOR_THREAD
    if nr_gpu > 0:
        if nr_gpu > 1:
            # use all gpus for inference
            predict_tower = list(range(nr_gpu))
        else:
            predict_tower = [0]
        PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
        train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0]
        logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
            ','.join(map(str, train_tower)), ','.join(map(str,
                                                          predict_tower))))
    else:
        logger.warn(
            "Without GPU this model will never learn! CPU is only useful for debug."
        )
        PREDICTOR_THREAD = 1
        predict_tower, train_tower = [0], [0]

    # setup simulator processes
    name_base = str(uuid.uuid1())[:6]
    if os.name == 'nt':
        namec2s = 'tcp://127.0.0.1:8000'
        names2c = 'tcp://127.0.0.1:9000'
    else:
        prefix = '@' if sys.platform.startswith('linux') else ''
        namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base)
        names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base)

    procs = [
        MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)
    ]
    ensure_proc_terminate(procs)
    start_proc_mask_signal(procs)

    master = MySimulatorMaster(namec2s, names2c, predict_tower)
    dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
    config = AutoResumeTrainConfig(
        always_resume=True,
        # starting_epoch=0,
        model=Model(),
        dataflow=dataflow,
        callbacks=[
            ModelSaver(),
            MaxSaver('true_reward_2'),
            HumanHyperParamSetter('learning_rate'),
            # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]),
            # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
            master,
            StartProcOrThread(master),
            Evaluator(100, [
                'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in'
            ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player),
            # SendStat(
            #     'export http_proxy=socks5://127.0.0.1:1080 https_proxy=socks5://127.0.0.1:1080 && /home/neil/anaconda3/bin/curl --header "Access-Token: o.CUdAMXqiVz9qXTxLYIXc0XkcAfZMpNGM" -d type=note -d title="doudizhu" '
            #     '-d body="lord win rate: {lord_win_rate}\n policy loss: {policy_loss_2}\n value loss: {value_loss_2}\n entropy loss: {entropy_loss_2}\n'
            #     'true reward: {true_reward_2}\n predict reward: {predict_reward_2}\n advantage: {rms_advantage_2}\n" '
            #     '--request POST https://api.pushbullet.com/v2/pushes',
            #     ['lord_win_rate', 'policy_loss_2', 'value_loss_2', 'entropy_loss_2',
            #      'true_reward_2', 'predict_reward_2', 'rms_advantage_2']
            #     ),
        ],
        # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'),
        # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
    trainer = SimpleTrainer(
    ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower)
    launch_train_with_config(config, trainer)