Пример #1
0
    def read_process(json_str, p_request_dict, p_result_dict, lock):
        begin_time = time.time()
        obj = JSON.loads(json_str)
        raw_state_info = StateInfo.decode(obj)
        p_battle_id = raw_state_info.battleid
        # if raw_state_info.tick == -1:
        #     print('read_process: need to handle ', p_battle_id, raw_state_info.tick, 'raw log', json_str)
        # else:
        #     print('read_process: need to handle ', p_battle_id, raw_state_info.tick)

        with lock:
            # print('read_process', p_battle_id, 'send a request', raw_state_info.tick)
            p_request_dict[p_battle_id] = json_str

        try:
            while True:
                if p_battle_id in p_result_dict.keys():
                    with lock:
                        # print('read_process', p_battle_id, 'get a result', raw_state_info.tick)
                        result = p_result_dict[p_battle_id]
                        del p_result_dict[p_battle_id]
                        end_time = time.time()
                        print('read_process', p_battle_id, raw_state_info.tick,
                              (end_time - begin_time) * 1000, '取得结果', result)
                        return result
        except queue.Empty:
            print("LineTrainerManager Exception empty")
            return '{}'
        except Exception:
            print("LineTrainerManager Exception")
            type, value, traceback = sys.exc_info()
            traceback.print_exc()
            return '{}'
Пример #2
0
def start_consumer(battle_id_num, request_queues, result_queues):
    consumer_times = []
    while True:
        indexs = []
        requests = []
        for index, request_queue in enumerate(request_queues):

            if not request_queue.empty():
                request = request_queue.get()
                requests.append(request)
                indexs.append(index)

        begin_time = time.time()
        for index, json_str in zip(indexs, requests):
            obj = JSON.loads(json_str)
            raw_state_info = StateInfo.decode(obj)
            rand = np.random.rand(3, 3700)
            result_queues[index].put(rand)

        end_time = time.time()
        delta_millionseconds = (end_time - begin_time) * 1000
        consumer_times.append(delta_millionseconds)
        if len(consumer_times) >= 1000:
            print("model get_action average calculate time(ms)",
                  sum(consumer_times) // float(len(consumer_times)))
            consumer_times = []
Пример #3
0
 def parse_state_log(json_str):
     # print(json_str)
     json_str = json_str[23:]
     # todo maybe becasu python3, the time before the { should be cut off
     state_json = JSON.loads(json_str)
     state_info = StateInfo.decode(state_json)
     return state_info
Пример #4
0
    def do_GET(self):
        content_length = int(
            self.headers['Content-Length'])  # <--- Gets the size of data
        get_data = self.rfile.read(content_length)  # <--- Gets the data itself

        # decode for python3 version
        get_data = get_data.decode()

        # 解析客户端发送的请求
        obj = JSON.loads(get_data)
        raw_state_info = StateInfo.decode(obj)
        if raw_state_info.battleid not in self.line_trainers:
            # DQN
            # self.line_trainer[raw_state_info.battleid] = LineTrainer(self.save_dir, ['27'], self.model1,
            #                                                          self.model1_save_header,
            #                                                          ['28'], self.model2,
            #                                                          self.model2_save_header
            #                                                          )
            # PPO
            ob = np.zeros(183, dtype=float).tolist()
            model1_cache = PPO_CACHE2(ob,
                                      1,
                                      horizon=self.model_1.optim_batchsize)
            model2_cache = PPO_CACHE2(ob,
                                      1,
                                      horizon=self.model_2.optim_batchsize)
            self.line_trainers[raw_state_info.battleid] = LineTrainerPPO(
                self.save_dir,
                '27',
                self.model_1,
                self.model1_save_header,
                model1_cache,
                '28',
                self.model_2,
                self.model2_save_header,
                model2_cache,
                real_hero=None,
                policy_ratio=-1,
                policy_continue_acts=3)
        # 交给对线训练器来进行训练
        rsp_str = self.line_trainers[raw_state_info.battleid].train_line_model(
            get_data)
        print(rsp_str)
        rsp_str = rsp_str.encode(encoding="utf-8")

        #给客户端提供对应的指令
        self._set_headers()
        self.wfile.write(rsp_str)
Пример #5
0
    def read_process(self, json_str):
        begin_time = time.time()
        obj = JSON.loads(json_str)
        raw_state_info = StateInfo.decode(obj)
        p_battle_id = raw_state_info.battleid

        try:
            response = self.battle_trainers[p_battle_id].build_response(json_str)
            return response
        except queue.Empty:
            print("LineTrainerManager Exception empty")
            return '{}'
        except Exception:
            print("LineTrainerManager Exception")
            traceback.print_exc(file=sys.stdout)
            return '{}'
Пример #6
0
    def train_line_model(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick >= 193512:
            debug_i = 1

        # 根据之前帧更新当前帧信息,变成完整的信息
        # 发现偶然的情况下,其实的tick会是66,然后第二条tick是528
        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (prev_state_info is None or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.hero_strategy = {}
            self.model1_just_dead = 0
            self.model2_just_dead = 0
        elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick:
            print("clear %s %s" % (prev_state_info.tick, raw_state_info.tick))
            self.state_cache = []
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗
            print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]
            rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs}
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str
        state_info = StateUtil.update_state_log(prev_state_info, raw_state_info)

        # Test
        hero = state_info.get_hero(self.model1_hero)
        if hero is None or hero.hp is None:
            print(self.battle_id, self.model1_hero, state_info.tick, '读取信息为空,异常')
            print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]
            rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs}
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 持久化
        self.state_cache.append(state_info)
        # self.save_state_log(state_info)

        # 首先得到模型的选择,同时会将选择action记录到当前帧中
        action_strs = []
        restart = False
        if self.model1_hero is not None and self.real_hero != self.model1_hero:
            actions_model1, restart = self.build_response(self.state_cache, -1, self.model1_hero)
            action_strs.extend(actions_model1)
        if self.model2_hero is not None and not restart and self.real_hero != self.model2_hero:
            actions_model2, restart = self.build_response(self.state_cache, -1, self.model2_hero)
            action_strs.extend(actions_model2)

        # 计算奖励值,如果有真实玩家,因为需要推测行为的原因,则多往前回朔几帧
        reward_state_idx = -2 if self.real_hero is None else -4
        new = 0
        if len(self.state_cache) + reward_state_idx > 0:
            new, loss_team = self.if_restart(self.state_cache, reward_state_idx)
            if self.model1_hero is not None:
                self.remember_replay(self.state_cache, reward_state_idx, self.model1_cache, self.model_process,
                                         self.model1_hero, self.model2_hero, new, loss_team)
            if self.model2_hero is not None:
                self.remember_replay(self.state_cache, reward_state_idx, self.model2_cache, self.model_process,
                                         self.model2_hero, self.model1_hero, new, loss_team)

        # 这里为了尽量减少重启次数,在训练结束之后,我们只是清空上个模型的行为串
        if restart:
            self.model1_cache.clear_cache()
            self.model2_cache.clear_cache()
            # 当前帧返回空的行为串
            action_strs = {}

        # 如果达到了重开条件,重新开始游戏
        # 当线上第一个塔被摧毁时候重开
        if new == 1:
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]

        # 返回结果给游戏端
        rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs}
        rsp_str = JSON.dumps(rsp_obj)
        return rsp_str
Пример #7
0
    def build_response(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(
            self.state_cache) > 0 else None
        response_strs = []

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (
                prev_state_info is None
                or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.battle_started = -1
            self.battle_heroes_cache = []
            self.dead_heroes = []
            self.dead_heroes_cache = []
            self.data_inputs = []
            self.rebooting = False
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗
            print("battle_id", self.battle_id, "tick", raw_state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        state_info = StateUtil.update_state_log(prev_state_info,
                                                raw_state_info)
        hero = state_info.get_hero("27")

        if hero is None or hero.hp is None:
            # 偶然情况处理,如果找不到英雄,直接重开
            print("battle_id", self.battle_id, "tick", state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 战斗前准备工作
        if len(self.state_cache) == 0:
            # 第一帧的时候,添加金钱和等级
            for hero in self.heros:
                add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None,
                                         None, None, None, None, None, None)
                add_gold_cmd.gold = 3000
                add_gold_str = StateUtil.build_command(add_gold_cmd)
                response_strs.append(add_gold_str)

                add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None,
                                       None, None, None, None, None)
                add_lv_cmd.lv = 9
                add_lv_str = StateUtil.build_command(add_lv_cmd)
                response_strs.append(add_lv_str)
        elif len(self.state_cache) > 1:
            # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧
            for hero in self.heros:
                upgrade_cmd = self.upgrade_skills(state_info, hero)
                if upgrade_cmd is not None:
                    response_strs.append(upgrade_cmd)

                buy_cmd = self.buy_equip(state_info, hero)
                if buy_cmd is not None:
                    response_strs.append(buy_cmd)

        for hero in self.heros:
            # 判断是否英雄死亡
            if prev_state_info is not None:
                dead = StateUtil.if_hero_dead(prev_state_info, state_info,
                                              hero)
                if dead == 1 and hero not in self.dead_heroes:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "英雄死亡", hero, "tick", state_info.tick)
                    self.dead_heroes.append(hero)

        # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定
        # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动
        #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来

        # 这里会排除掉死亡的英雄,他们不需要再加入团战
        # 团战范围在收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)
        heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range(
            state_info, self.heros, self.dead_heroes, battle_range)

        # 存活英雄
        battle_heros = list(heroes_in_range)
        battle_heros.extend(heroes_out_range)

        # 缓存参战情况和死亡情况,用于后续训练
        self.battle_heroes_cache.append(battle_heros)
        self.dead_heroes_cache.append(list(self.dead_heroes))

        if state_info.tick >= 142560:
            debuginfo = True

        # 团战还没有开始,有英雄还在圈外
        if len(heroes_out_range) > 0:
            if self.battle_started > -1:
                print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外",
                      ','.join(heroes_out_range), "battle_range", battle_range)

            # 移动到两个开始战斗地点附近
            # 如果是团战开始之后,移动到团战中心点
            for hero in heroes_out_range:
                start_point_x = randint(0, 8000)
                start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0
                start_point_z += randint(-4000, 4000)
                if TeamBattleUtil.get_hero_team(hero) == 0:
                    start_point_z *= -1
                start_point_z += TeamBattleTrainer.BATTLE_POINT_Z
                tgt_pos = PosStateInfo(start_point_x, 0, start_point_z)
                move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None,
                                        tgt_pos, None, None, None, None)
                mov_cmd_str = StateUtil.build_command(move_action)
                response_strs.append(mov_cmd_str)
        # 团战已经开始
        elif not self.rebooting:
            if self.battle_started == -1:
                self.battle_started = len(self.state_cache)

            # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态
            state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible(
                state_info, self.state_cache)

            # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range)
            # 跟队伍,每个队伍得到行为
            team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range)
            team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team(
                state_info, team_a, heroes_in_range)
            team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team(
                state_info, team_b, heroes_in_range)

            # 如果模型已经开战,重启战斗
            if (model_upgrade_a or model_upgrade_b
                ) and self.battle_started < len(self.state_cache) + 1:
                print("battle_id", self.battle_id, "因为模型升级,重启战斗",
                      self.battle_started, len(self.state_cache))
                action_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
                rsp_obj = {
                    "ID": raw_state_info.battleid,
                    "tick": raw_state_info.tick,
                    "cmd": action_strs
                }
                rsp_str = JSON.dumps(rsp_obj)
                return rsp_str
            data_input_map = {}
            for action_cmd, data_input in zip(team_actions_a + team_actions_b,
                                              input_list_a + input_list_b):
                action_str = StateUtil.build_command(action_cmd)
                response_strs.append(action_str)
                state_info.add_action(action_cmd)
                data_input_map[action_cmd.hero_name] = data_input

            # 缓存所有的模型输入,用于后续训练
            self.data_inputs.append(data_input_map)

        # 添加记录到缓存中
        self.state_cache.append(state_info)

        # 将模型行为加入训练缓存,同时计算奖励值
        # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟
        last_x_index = 2
        if self.battle_started > -1 and len(self.data_inputs) >= last_x_index:
            if self.rebooting:
                # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启")

                # 重启游戏
                response_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
            else:
                state_index = len(self.state_cache) - last_x_index
                win, win_team, left_heroes = self.remember_replay_heroes(
                    -last_x_index, state_index, battle_range)

                # 团战结束条件
                # 首先战至最后一人
                # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range)
                # if self.battle_started:
                #     if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1):
                if win == 1:
                    # 重启游戏
                    print('battle_id', self.battle_id, "重启游戏", "剩余人员",
                          ','.join(left_heroes))
                    response_strs = [
                        StateUtil.build_action_command('27', 'RESTART', None)
                    ]
                    self.rebooting = True
        # battle_heros = self.search_team_battle(state_info)
        # if len(battle_heros) > 0:
        #     print("team battle heros", ';'.join(battle_heros))
        #
        # heros_need_model = []
        # for hero in self.heros:
        #     # 判断是否英雄死亡
        #     if prev_state_info is not None:
        #         dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero)
        #         if dead == 1 and hero not in self.dead_heroes:
        #             self.dead_heroes.append(hero)
        #
        #     # 复活的英雄不要再去参团
        #     if hero in self.dead_heroes:
        #         continue
        #
        #     # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE)
        #     if hero not in battle_heros:
        #         # 移动到团战点附近,添加部分随机
        #         rdm_delta_x = randint(0, 1000)
        #         rdm_delta_z = randint(0, 1000)
        #         tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z)
        #         move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None)
        #         mov_cmd_str = StateUtil.build_command(move_action)
        #         response_strs.append(mov_cmd_str)
        #     else:
        #         # 启动模型决策
        #         heros_need_model.append(hero)
        #
        # if len(heros_need_model) > 0:
        #     action_cmds = self.get_model_actions(state_info, heros_need_model)
        #     for action_cmd in action_cmds:
        #         action_str = StateUtil.build_command(action_cmd)
        #         response_strs.append(action_str)
        #         state_info.add_action(action_cmd)

        #TODO 记录模型输出,用于后续训练

        # 返回结果给游戏端
        rsp_obj = {
            "ID": state_info.battleid,
            "tick": state_info.tick,
            "cmd": response_strs
        }
        rsp_str = JSON.dumps(rsp_obj)
        print('battle_id', self.battle_id, 'response', rsp_str)
        return rsp_str
Пример #8
0
    def train_line_model(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return ''

        if raw_state_info.tick == 285516:
            debug_i = 1

        # 根据之前帧更新当前帧信息,变成完整的信息
        if raw_state_info.tick <= StateUtil.TICK_PER_STATE:
            print("clear")
            self.state_cache = []
            prev_state_info = None
        elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick:
            print ("clear %s %s" % (prev_state_info.tick, raw_state_info.tick))
            self.state_cache = []
        state_info = StateUtil.update_state_log(prev_state_info, raw_state_info)

        # 首先得到模型的选择,同时会将选择action记录到当前帧中
        action_strs = self.build_response(state_info, prev_state_info, self.model1, self.model1_heros)
        if self.model2_heros is not None:
            actions_model2 = self.build_response(state_info, prev_state_info, self.model2, self.model2_heros)
            action_strs.extend(actions_model2)

        # 缓存
        self.state_cache.append(state_info)
        self.save_state_log(state_info)

        # 更新玩家行为以及奖励值,有一段时间延迟
        reward_state_idx = len(self.state_cache) - LineModel.REWARD_DELAY_STATE_NUM
        # print('reward_state_idx: ' + str(reward_state_idx))
        state_with_reward = None
        if reward_state_idx > 1:
            if self.state_cache[reward_state_idx].tick >= 686004:
                debug = 1
            self.guess_hero_actions(reward_state_idx, self.real_heros)
            prev_4_m = self.state_cache[reward_state_idx - 1]
            state_with_reward = LineModel_DQN.update_state_rewards(self.state_cache, reward_state_idx)

        if state_with_reward is not None:
            # 将中间结果写入文件
            next_state_4_m = self.state_cache[reward_state_idx + 1]
            self.save_reward_log(state_with_reward)
            added = self.model1.remember(prev_4_m, state_with_reward, next_state_4_m)

            # 学习
            if added:
                model1_memory_len = self.model1.get_memory_size()
                if self.model1.if_replay(64):
                    # print ('开始模型训练')
                    self.model1.replay(64)
                    if model1_memory_len > 0 and model1_memory_len % 1000 == 0:
                        self.model1.save(self.model1_save_header + str(self.model1.get_memory_size()) + '/model')
                    # print ('结束模型训练')

            if self.model2 is not None:
                # TODO 过滤之后放入相应的模型
                added = self.model2.remember(prev_4_m, state_with_reward, next_state_4_m)

                # 学习
                if added:
                    model2_memory_len = self.model2.get_memory_size()
                    if self.model2.if_replay(64):
                        # print ('开始模型训练')
                        self.model2.replay(64)
                        if model2_memory_len > 0 and model2_memory_len % 1000 == 0:
                            self.model2.save(self.model2_save_header + str(self.model2.get_memory_size()) + '/model')
                        # print ('结束模型训练')

        # 如果达到了重开条件,重新开始游戏
        # 当线上第一个塔被摧毁时候重开
        if StateUtil.if_first_tower_destroyed_in_middle_line(state_info):
            print('重新开始游戏')
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]

        # 返回结果给游戏端
        rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs}
        rsp_str = JSON.dumps(rsp_obj)
        return rsp_str