def train(): assert tf.test.is_gpu_available(), "Training requires GPUs!" dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) config = TrainConfig( model=Model(), dataflow=master.get_training_dataflow(), callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if num_gpu == 1 else AsyncMultiGPUTrainer( train_tower) launch_train_with_config(config, trainer)
def _before_train(self) -> None: self.queue = mp.Queue() self.event = mp.Event() self.process = mp.Process(target=self._worker, args=(self.devices, self.queue, self.event)) ensure_proc_terminate(self.process) start_proc_mask_signal(self.process)
def eval_model_multiprocess(model_path, romfile): M = Model() cfg = PredictConfig( model=M, input_data_mapping=[0], session_init=SaverRestore(model_path), output_var_names=['fct/output:0']) class Worker(ParallelPredictWorker): def __init__(self, idx, gpuid, config, outqueue): super(Worker, self).__init__(idx, gpuid, config) self.outq = outqueue def run(self): player = AtariPlayer(AtariDriver(romfile, viz=0), action_repeat=ACTION_REPEAT) global NUM_ACTIONS NUM_ACTIONS = player.driver.get_num_actions() self._init_runtime() tot_reward = 0 que = deque(maxlen=30) while True: s = player.current_state() outputs = self.func([[s]]) action_value = outputs[0][0] act = action_value.argmax() #print action_value, act if random.random() < 0.01: act = random.choice(range(player.driver.get_num_actions())) if len(que) == que.maxlen \ and que.count(que[0]) == que.maxlen: act = 1 que.append(act) #print(act) reward, isOver = player.action(act) tot_reward += reward if isOver: self.outq.put(tot_reward) tot_reward = 0 NR_PROC = min(multiprocessing.cpu_count() // 2, 10) procs = [] q = multiprocessing.Queue() for k in range(NR_PROC): procs.append(Worker(k, -1, cfg, q)) ensure_proc_terminate(procs) for k in procs: k.start() stat = StatCounter() try: EVAL_EPISODE = 50 for _ in tqdm(range(EVAL_EPISODE)): r = q.get() stat.feed(r) finally: logger.info("Average Score: {}. Max Score: {}".format( stat.average, stat.max))
def eval_model_multiprocess(model_path, romfile): M = Model() cfg = PredictConfig(model=M, input_data_mapping=[0], session_init=SaverRestore(model_path), output_var_names=['fct/output:0']) class Worker(ParallelPredictWorker): def __init__(self, idx, gpuid, config, outqueue): super(Worker, self).__init__(idx, gpuid, config) self.outq = outqueue def run(self): player = AtariPlayer(AtariDriver(romfile, viz=0), action_repeat=ACTION_REPEAT) global NUM_ACTIONS NUM_ACTIONS = player.driver.get_num_actions() self._init_runtime() tot_reward = 0 que = deque(maxlen=30) while True: s = player.current_state() outputs = self.func([[s]]) action_value = outputs[0][0] act = action_value.argmax() #print action_value, act if random.random() < 0.01: act = random.choice(range(player.driver.get_num_actions())) if len(que) == que.maxlen \ and que.count(que[0]) == que.maxlen: act = 1 que.append(act) #print(act) reward, isOver = player.action(act) tot_reward += reward if isOver: self.outq.put(tot_reward) tot_reward = 0 NR_PROC = min(multiprocessing.cpu_count() // 2, 10) procs = [] q = multiprocessing.Queue() for k in range(NR_PROC): procs.append(Worker(k, -1, cfg, q)) ensure_proc_terminate(procs) for k in procs: k.start() stat = StatCounter() try: EVAL_EPISODE = 50 for _ in tqdm(range(EVAL_EPISODE)): r = q.get() stat.feed(r) finally: logger.info("Average Score: {}. Max Score: {}".format( stat.average, stat.max))
def get_config(): nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use half gpus for inference predict_tower = list(range(nr_gpu))[-nr_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/') namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) M = Model() master = MySimulatorMaster(namec2s, names2c, M, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) return TrainConfig(model=M, dataflow=dataflow, callbacks=[ ModelSaver(max_to_keep=2), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=1), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, tower=train_tower)
def train(): dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = AutoResumeTrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), master, StartProcOrThread(master), PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=1), ], steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = MyTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer( train_tower) launch_train_with_config2(config, trainer)
def train(): dirname = os.path.join('train_log', 'train-atari-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use half gpus for inference predict_tower = list(range(nr_gpu))[-nr_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator( EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def train(): dirname = os.path.join('train_log', 'a3c_small') logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use all gpus for inference predict_tower = list(range(nr_gpu)) else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = AutoResumeTrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, StartProcOrThread(master), HumanHyperParamSetter('learning_rate'), Evaluator( 100, ['role_id', 'policy_state_in', 'last_cards_in', 'minor_type_in'], ['passive_decision_prob', 'passive_bomb_prob', 'passive_response_prob', 'active_decision_prob', 'active_response_prob', 'active_seq_prob', 'minor_response_prob'], get_player), ], # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def train(args): assert tf.test.is_gpu_available(), "Training requires GPUs!" dirname = os.path.join(settings.path_prefix, "train_from_scratch/{}".format(args.env)) logger.set_logger_dir(dirname) logger.info("Logger/Model Path: {}".format(dirname)) # assign GPUs for training & inference num_gpu = args.num_gpu global PREDICTOR_THREAD if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower, args) config = TrainConfig( model=Model(), dataflow=master.get_training_dataflow(), callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, PeriodicTrigger(Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_steps=2000), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) trainer = SimpleTrainer() launch_train_with_config(config, trainer)
def get_config(): logger.set_logger_dir(LOG_DIR) M = Model() name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/') namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, M) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) lr = tf.Variable(0.001, trainable=False, name='learning_rate') tf.scalar_summary('learning_rate', lr) return TrainConfig( dataset=dataflow, optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), callbacks=Callbacks([ StatPrinter(), PeriodicCallback(ModelSaver(), 5), ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), ScheduledHyperParamSetter('explore_factor', [(80, 2), (100, 3), (120, 4), (140, 5)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), HumanHyperParamSetter('explore_factor'), master, PeriodicCallback( Evaluator(EVAL_EPISODE, ['state'], ['logits'], policy_dist=POLICY_DIST), 5), ]), extra_threads_procs=[master], session_config=get_default_sess_config(0.5), model=M, step_per_epoch=STEP_PER_EPOCH, max_epoch=1000, )
def __init__(self, simulators, pipe_sim2mgr, pipe_mgr2sim): self.sim2mgr = pipe_sim2mgr self.mgr2sim = pipe_mgr2sim self.context = zmq.Context() self.sim2mgr_socket = self.context.socket(zmq.PULL) self.sim2mgr_socket.bind(self.sim2mgr) self.sim2mgr_socket.set_hwm(2) self.mgr2sim_socket = self.context.socket(zmq.ROUTER) self.mgr2sim_socket.bind(self.mgr2sim) self.mgr2sim_socket.set_hwm(2) self.simulators = simulators for sim in self.simulators: ensure_proc_terminate(sim) self.queue = queue.Queue(maxsize=100) self.current_sim = None self.locked_sim = None
def bench_proc(): Q = mp.Queue() def work(): player = gym.make('PongDeterministic-v3') naction = player.action_space.n np.random.seed(os.getpid()) player.reset() while True: act = np.random.choice(naction) ob, r, isOver, info = player.step(act) Q.put([ob, r]) if isOver: player.reset() nr_proc = 8 procs = [mp.Process(target=work) for _ in range(nr_proc)] ensure_proc_terminate(procs) for p in procs: p.start() for t in tqdm.trange(100000): Q.get()
def hash_dp(dp): return sum([k.sum() for k in dp]) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--task', default='basic', choices=['basic', 'tworecv', 'send']) parser.add_argument('-n', '--num', type=int, default=10) args = parser.parse_args() if args.task == 'basic': DATA = random_array(args.num) p = mp.Process(target=send, args=(DATA, )) ensure_proc_terminate(p) start_proc_mask_signal(p) sess = tf.Session() recv = ZMQPullSocket(ENDPOINT, [tf.float32, tf.uint8]).pull() print(recv) for truth in DATA: arr = sess.run(recv) assert (arr[0] == truth[0]).all() assert (arr[1] == truth[1]).all() elif args.task == 'send': DATA = random_array(args.num) send(DATA) elif args.task == 'tworecv': DATA = random_array(args.num)
self.context.destroy(linger=0) if __name__ == '__main__': import random import gym class NaiveSimulator(SimulatorProcess): def _build_player(self): return gym.make('Breakout-v0') class NaiveActioner(SimulatorMaster): def _get_action(self, state): time.sleep(1) return random.randint(1, 3) def _on_episode_over(self, client): # print("Over: ", client.memory) client.memory = [] client.state = 0 name = 'ipc://@whatever' procs = [NaiveSimulator(k, name) for k in range(10)] [k.start() for k in procs] th = NaiveActioner(name) ensure_proc_terminate(procs) th.start() time.sleep(100)
def train_duel_value(args): logger.info("Test") assert tf.test.is_gpu_available(), "Training requires GPUs!" if args.logit_render_model_checkpoint == "pretrained": args.logit_render_model_checkpoint = settings.pretraind_model_path[ args.env] render = "pretrained" else: args.logit_render_model_checkpoint = os.path.join( settings.supervised_model_checkpoint[args.env], 'checkpoint') render = "surpervised" dirname = os.path.join( settings.path_prefix, "reward_shaping_model/env-{}-shaping-{}-logit-render-{}") dirname = dirname.format(args.env, args.shaping, render) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = args.num_gpu global PREDICTOR_THREAD if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = DuelValueSimulatorMaster(namec2s, names2c, predict_tower, args) config = TrainConfig( model=DuelValueModel(), dataflow=master.get_training_dataflow(), callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, PeriodicTrigger( Evaluator(EVAL_EPISODE, ['state'], ['policy'], get_player), #EVAL_EPISODE, ['state'], ['reward_logits'], get_player), every_k_steps=2000), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=SmartInit(args.logit_render_model_checkpoint), max_epoch=1000, ) trainer = SimpleTrainer( ) #if num_gpu == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
def get_config(): M = Model() name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/') if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR) else: os.system('rm -f {}/sim-*'.format(PIPE_DIR)) namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC * 2) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, M) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) class CBSyncWeight(Callback): def _after_run(self, ctx, _): if self.local_step > 1 and self.local_step % SIMULATOR_PROC == 0: # print("before step ",self.local_step) return [M._td_sync_op] def _before_run(self, ctx): if self.local_step % 10 == 0: return [M._sync_op, M._td_sync_op] if self.local_step % SIMULATOR_PROC == 0 and 0: return [M._td_sync_op] import functools return TrainConfig( model=M, dataflow=dataflow, callbacks=[ ModelSaver(), HyperParamSetterWithFunc( 'learning_rate/actor', functools.partial(M._calc_learning_rate, 'actor')), HyperParamSetterWithFunc( 'learning_rate/critic', functools.partial(M._calc_learning_rate, 'critic')), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]), # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]), master, StartProcOrThread(master), CBSyncWeight(), # CBTDSyncWeight() # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def get_config(): M = Model() name_base = str(uuid.uuid1())[:6] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '/tmp/.ipcpipe').rstrip('/') if not os.path.exists(PIPE_DIR): os.makedirs(PIPE_DIR) else: os.system('rm -f {}/sim-*'.format(PIPE_DIR)) namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) # AgentTorcs * SIMULATOR_PROC, AgentReplay * SIMULATOR_PROC procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC*2)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, M) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) class CBSyncWeight(Callback): def _after_run(self,ctx,_): if self.local_step > 1 and self.local_step % SIMULATOR_PROC ==0: # print("before step ",self.local_step) return [M._td_sync_op] def _before_run(self, ctx): if self.local_step % 10 == 0: return [M._sync_op,M._td_sync_op] if self.local_step % SIMULATOR_PROC ==0 and 0: return [M._td_sync_op] import functools return TrainConfig( model=M, dataflow=dataflow, callbacks=[ ModelSaver(), HyperParamSetterWithFunc( 'learning_rate/actor', functools.partial(M._calc_learning_rate, 'actor')), HyperParamSetterWithFunc( 'learning_rate/critic', functools.partial(M._calc_learning_rate, 'critic')), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # ScheduledHyperParamSetter('actor/sigma_beta_accel', [(1, 0.2), (2, 0.01), (3, 1e-3), (4, 1e-4)]), # ScheduledHyperParamSetter('actor/sigma_beta_steering', [(1, 0.1), (2, 0.01), (3, 1e-3), (4, 1e-4)]), master, StartProcOrThread(master), CBSyncWeight(), # CBTDSyncWeight() # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, )
def train(): dirname = os.path.join('train_log', 'A3C-{}'.format(ENV_NAME)) logger.set_logger_dir(dirname) # assign GPUs for training & inference num_gpu = get_num_gpu() global PREDICTOR_THREAD if num_gpu > 0: if num_gpu > 1: # use half gpus for inference predict_tower = list(range(num_gpu))[-num_gpu // 2:] else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC)] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) # config = TrainConfig( # model=Model(), # dataflow=dataflow, # callbacks=[ # ModelSaver(), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), # HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('entropy_beta'), # master, # StartProcOrThread(master), # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), # PeriodicTrigger(LogVisualizeEpisode( # ['state'], ['policy'], get_player), # every_k_epochs=1), # ], # session_creator=sesscreate.NewSessionCreator( # config=get_default_sess_config(0.5)), # steps_per_epoch=STEPS_PER_EPOCH, # session_init=get_model_loader(args.load) if args.load else None, # max_epoch=1000, # ) # config = get_config() expreplay = ExpReplay( predictor_io_names=(['state'], ['policy']), player=get_player(train=True), state_shape=IMAGE_SHAPE3, batch_size=BATCH_SIZE, memory_size=MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE, init_exploration=1.0, update_frequency=UPDATE_FREQ, history_len=FRAME_HISTORY ) config = TrainConfig( model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('entropy_beta'), master, StartProcOrThread(master), PeriodicTrigger(Evaluator( EVAL_EPISODE, ['state'], ['policy'], get_player), every_k_epochs=3), expreplay, ScheduledHyperParamSetter( ObjAttrParam(expreplay, 'exploration'), [(0, 1), (10, 0.9), (50, 0.1), (320, 0.01)], # 1->0.1 in the first million steps interp='linear'), PeriodicTrigger(LogVisualizeEpisode( ['state'], ['policy'], get_player), every_k_epochs=1), ], session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, session_init=get_model_loader(args.load) if args.load else None, max_epoch=1000, ) trainer = SimpleTrainer() if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)
i = -1 j = 0 while j < len(mem): if mem[j].first_st: i += 1 target = [0 for _ in range(7)] k = mem[j] target[k.mode] = k.action # self.queue.put( # [role_id, k.prob_state, k.all_state, k.last_cards_onehot, *target, k.minor_type, k.mode, k.prob, # dr[i]]) j += 1 client.memory[role_id - 1] = [] def _on_episode_over(self, client): # print("Over: ", client.memory) client.memory = [] client.state = 0 name = 'ipc://c2s' name2 = 'ipc://s2c' procs = [NaiveSimulator(k, name, name2) for k in range(20)] [k.start() for k in procs] th = NaiveActioner(name, name2) ensure_proc_terminate(procs) th.start() time.sleep(100)
def train(): dirname = os.path.join('train_log', 'A3C-LSTM') logger.set_logger_dir(dirname) # assign GPUs for training & inference nr_gpu = get_nr_gpu() global PREDICTOR_THREAD if nr_gpu > 0: if nr_gpu > 1: # use all gpus for inference predict_tower = list(range(nr_gpu)) else: predict_tower = [0] PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( ','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) else: logger.warn( "Without GPU this model will never learn! CPU is only useful for debug." ) PREDICTOR_THREAD = 1 predict_tower, train_tower = [0], [0] # setup simulator processes name_base = str(uuid.uuid1())[:6] if os.name == 'nt': namec2s = 'tcp://127.0.0.1:8000' names2c = 'tcp://127.0.0.1:9000' else: prefix = '@' if sys.platform.startswith('linux') else '' namec2s = 'ipc://{}sim-c2s-{}'.format(prefix, name_base) names2c = 'ipc://{}sim-s2c-{}'.format(prefix, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(SIMULATOR_PROC) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) master = MySimulatorMaster(namec2s, names2c, predict_tower) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) config = AutoResumeTrainConfig( always_resume=True, # starting_epoch=0, model=Model(), dataflow=dataflow, callbacks=[ ModelSaver(), MaxSaver('true_reward_2'), HumanHyperParamSetter('learning_rate'), # ScheduledHyperParamSetter('learning_rate', [(20, 0.0003), (120, 0.0001)]), # ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), master, StartProcOrThread(master), Evaluator(100, [ 'role_id', 'policy_state_in', 'last_cards_in', 'lstm_state_in' ], ['active_prob', 'passive_prob', 'new_lstm_state'], get_player), # SendStat( # 'export http_proxy=socks5://127.0.0.1:1080 https_proxy=socks5://127.0.0.1:1080 && /home/neil/anaconda3/bin/curl --header "Access-Token: o.CUdAMXqiVz9qXTxLYIXc0XkcAfZMpNGM" -d type=note -d title="doudizhu" ' # '-d body="lord win rate: {lord_win_rate}\n policy loss: {policy_loss_2}\n value loss: {value_loss_2}\n entropy loss: {entropy_loss_2}\n' # 'true reward: {true_reward_2}\n predict reward: {predict_reward_2}\n advantage: {rms_advantage_2}\n" ' # '--request POST https://api.pushbullet.com/v2/pushes', # ['lord_win_rate', 'policy_loss_2', 'value_loss_2', 'entropy_loss_2', # 'true_reward_2', 'predict_reward_2', 'rms_advantage_2'] # ), ], # session_init=SaverRestore('./train_log/a3c_action_1d/max-true_reward_2'), # session_init=ModelLoader('policy_network_2', 'SL_policy_network', 'value_network', 'SL_value_network'), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) trainer = SimpleTrainer( ) if config.nr_tower == 1 else AsyncMultiGPUTrainer(train_tower) launch_train_with_config(config, trainer)