Пример #1
0
 def __init__(self, weight_path):
     super().__init__()
     agent_names = ['agent%d' % i for i in range(1, 4)]
     model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99)
     self.predictors = {n: Predictor(OfflinePredictor(PredictConfig(
         model=model,
         session_init=SaverRestore(weight_path),
         input_names=[n + '/state', n + '_comb_mask', n + '/fine_mask'],
         output_names=[n + '/Qvalue'])), num_actions=(1000, 21)) for n in self.get_all_agent_names()}
Пример #2
0
def get_config():
    agent_names = ['agent%d' % i for i in range(1, 4)]
    model = Model(agent_names, STATE_SHAPE, METHOD, NUM_ACTIONS, GAMMA)
    exps = [
        ExpReplay(
            # model=model,
            agent_name=name,
            player=Env(agent_names),
            state_shape=STATE_SHAPE,
            num_actions=[MAX_NUM_COMBS, MAX_NUM_GROUPS],
            batch_size=BATCH_SIZE,
            memory_size=MEMORY_SIZE,
            init_memory_size=INIT_MEMORY_SIZE,
            init_exploration=1.,
            update_frequency=UPDATE_FREQ) for name in agent_names
    ]

    df = MyDataFLow(exps)

    bl_evaluators = [
        BLEvaluator(EVAL_EPISODE, agent_names[0], 2, lambda: CEnv()),
        BLEvaluator(EVAL_EPISODE, agent_names[1], 3, lambda: CEnv()),
        BLEvaluator(EVAL_EPISODE, agent_names[2], 1, lambda: CEnv())
    ]

    return AutoResumeTrainConfig(
        # always_resume=False,
        data=QueueInput(df),
        model=model,
        callbacks=[
            ModelSaver(),
            PeriodicTrigger(RunOp(model.update_target_param, verbose=True),
                            every_k_steps=STEPS_PER_EPOCH //
                            10),  # update target network every 10k steps
            *exps,
            # ScheduledHyperParamSetter('learning_rate',
            #                           [(60, 5e-5), (100, 2e-5)]),
            *[
                ScheduledHyperParamSetter(
                    ObjAttrParam(exp, 'exploration'),
                    [(0, 1), (30, 0.5), (100, 0.3),
                     (320, 0.1)],  # 1->0.1 in the first million steps
                    interp='linear') for exp in exps
            ],
            *bl_evaluators,
            Evaluator(EVAL_EPISODE, agent_names, lambda: Env(agent_names)),
            HumanHyperParamSetter('learning_rate'),
        ],
        # session_init=ChainInit([SaverRestore('../Hierarchical_Q/train_log/DQN-9-3-LASTCARDS/model-240000', 'agent1'),
        #                        SaverRestore('./train_log/DQN-60-MA/model-355000')]),
        # starting_epoch=0,
        # session_init=SaverRestore('train_log/DQN-54-AUG-STATE/model-75000'),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )
Пример #3
0
    def get_config():

        model = Model(agent_names, STATE_SHAPE, METHOD, NUM_ACTIONS, GAMMA)
        exps = [
            ExpReplay(agent_name=name,
                      state_shape=STATE_SHAPE,
                      num_actions=[MAX_NUM_COMBS, MAX_NUM_GROUPS],
                      batch_size=BATCH_SIZE,
                      memory_size=MEMORY_SIZE,
                      init_memory_size=INIT_MEMORY_SIZE,
                      init_exploration=1.,
                      update_frequency=UPDATE_FREQ,
                      pipe_exp2sim=name_exp2sim + str(i),
                      pipe_sim2exp=name_sim2exp + str(i))
            for i, name in enumerate(agent_names)
        ]

        df = MyDataFLow(exps)

        return AutoResumeTrainConfig(
            # always_resume=False,
            data=QueueInput(df),
            model=model,
            callbacks=[
                ModelSaver(),
                PeriodicTrigger(RunOp(model.update_target_param, verbose=True),
                                every_k_steps=STEPS_PER_EPOCH //
                                10),  # update target network every 10k steps
                # the following order is important
                coordinator,
                manager,
                *exps,
                # ScheduledHyperParamSetter('learning_rate',
                #                           [(60, 5e-5), (100, 2e-5)]),
                *[
                    ScheduledHyperParamSetter(
                        ObjAttrParam(sim, 'exploration'),
                        [(0, 1), (30, 0.5), (100, 0.3),
                         (320, 0.1)],  # 1->0.1 in the first million steps
                        interp='linear') for sim in sims
                ],
                # Evaluator(EVAL_EPISODE, agent_names, lambda: Env(agent_names)),
                HumanHyperParamSetter('learning_rate'),
            ],
            session_init=ChainInit([
                SaverRestore(
                    '../TensorPack/MA_Hierarchical_Q/train_log/DQN-60-MA/model-355000'
                )
            ]),
            # starting_epoch=0,k
            # session_init=SaverRestore('train_log/DQN-54-AUG-STATE/model-75000'),
            steps_per_epoch=STEPS_PER_EPOCH,
            max_epoch=1000,
        )
Пример #4
0
 def __init__(self, role_id, weight_path):
     def role2agent(role):
         if role == 2:
             return 'agent1'
         elif role == 1:
             return 'agent3'
         else:
             return 'agent2'
     super().__init__(role_id)
     agent_names = ['agent%d' % i for i in range(1, 4)]
     model = Model(agent_names, (1000, 21, 256 + 256 * 2 + 120), 'Double', (1000, 21), 0.99)
     self.predictor = Predictor(OfflinePredictor(PredictConfig(
         model=model,
         session_init=SaverRestore(weight_path),
         input_names=[role2agent(role_id) + '/state', role2agent(role_id) + '_comb_mask', role2agent(role_id) + '/fine_mask'],
         output_names=[role2agent(role_id) + '/Qvalue'])), num_actions=(1000, 21))
Пример #5
0
    parser.add_argument('--task', help='task to perform',
                        choices=['play', 'eval', 'train'], default='train')
    parser.add_argument('--algo', help='algorithm',
                        choices=['DQN', 'Double', 'Dueling'], default='Double')
    args = parser.parse_args()

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    METHOD = args.algo
    # set num_actions
    NUM_ACTIONS = max(MAX_NUM_GROUPS, MAX_NUM_COMBS)

    nr_gpu = get_nr_gpu()
    train_tower = list(range(nr_gpu))
    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(PredictConfig(
            model=Model(),
            session_init=get_model_loader(args.load),
            input_names=['state', 'comb_mask'],
            output_names=['Qvalue']))
    else:
        logger.set_logger_dir(
            os.path.join('train_log', 'DQN-60-MA-SELF_PLAY'))
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        trainer = SimpleTrainer() if nr_gpu == 1 else AsyncMultiGPUTrainer(train_tower)
        launch_train_with_config(config, trainer)

Пример #6
0
                        default='train')
    parser.add_argument('--algo',
                        help='algorithm',
                        choices=['DQN', 'Double', 'Dueling'],
                        default='Double')
    args = parser.parse_args()

    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    METHOD = args.algo
    # set num_actions
    NUM_ACTIONS = max(MAX_NUM_GROUPS, MAX_NUM_COMBS)

    nr_gpu = get_nr_gpu()
    train_tower = list(range(nr_gpu))
    if args.task != 'train':
        assert args.load is not None
        pred = OfflinePredictor(
            PredictConfig(model=Model(),
                          session_init=get_model_loader(args.load),
                          input_names=['state', 'comb_mask'],
                          output_names=['Qvalue']))
    else:
        logger.set_logger_dir(os.path.join('train_log', 'DQN-60-MA-SELF_PLAY'))
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)
        trainer = SimpleTrainer() if nr_gpu == 1 else AsyncMultiGPUTrainer(
            train_tower)
        launch_train_with_config(config, trainer)