Python StateInfo примеры использования

Язык программирования: Python

Пространство имен/Пакет: model.stateinfo

Класс/Тип: StateInfo

Примеров на hotexamples.com: 8

Python StateInfo - 8 примеров найдено. Это лучшие примеры Python кода для model.stateinfo.StateInfo, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

decode(8)

Основные методы

decode (8)

Пример #1

Показать файл

    def read_process(json_str, p_request_dict, p_result_dict, lock):
        begin_time = time.time()
        obj = JSON.loads(json_str)
        raw_state_info = StateInfo.decode(obj)
        p_battle_id = raw_state_info.battleid
        # if raw_state_info.tick == -1:
        #     print('read_process: need to handle ', p_battle_id, raw_state_info.tick, 'raw log', json_str)
        # else:
        #     print('read_process: need to handle ', p_battle_id, raw_state_info.tick)

        with lock:
            # print('read_process', p_battle_id, 'send a request', raw_state_info.tick)
            p_request_dict[p_battle_id] = json_str

        try:
            while True:
                if p_battle_id in p_result_dict.keys():
                    with lock:
                        # print('read_process', p_battle_id, 'get a result', raw_state_info.tick)
                        result = p_result_dict[p_battle_id]
                        del p_result_dict[p_battle_id]
                        end_time = time.time()
                        print('read_process', p_battle_id, raw_state_info.tick,
                              (end_time - begin_time) * 1000, '取得结果', result)
                        return result
        except queue.Empty:
            print("LineTrainerManager Exception empty")
            return '{}'
        except Exception:
            print("LineTrainerManager Exception")
            type, value, traceback = sys.exc_info()
            traceback.print_exc()
            return '{}'

Пример #2

Показать файл

def start_consumer(battle_id_num, request_queues, result_queues):
    consumer_times = []
    while True:
        indexs = []
        requests = []
        for index, request_queue in enumerate(request_queues):

            if not request_queue.empty():
                request = request_queue.get()
                requests.append(request)
                indexs.append(index)

        begin_time = time.time()
        for index, json_str in zip(indexs, requests):
            obj = JSON.loads(json_str)
            raw_state_info = StateInfo.decode(obj)
            rand = np.random.rand(3, 3700)
            result_queues[index].put(rand)

        end_time = time.time()
        delta_millionseconds = (end_time - begin_time) * 1000
        consumer_times.append(delta_millionseconds)
        if len(consumer_times) >= 1000:
            print("model get_action average calculate time(ms)",
                  sum(consumer_times) // float(len(consumer_times)))
            consumer_times = []

Пример #3

Показать файл

 def parse_state_log(json_str):
     # print(json_str)
     json_str = json_str[23:]
     # todo maybe becasu python3, the time before the { should be cut off
     state_json = JSON.loads(json_str)
     state_info = StateInfo.decode(state_json)
     return state_info

Пример #4

Показать файл

    def do_GET(self):
        content_length = int(
            self.headers['Content-Length'])  # <--- Gets the size of data
        get_data = self.rfile.read(content_length)  # <--- Gets the data itself

        # decode for python3 version
        get_data = get_data.decode()

        # 解析客户端发送的请求
        obj = JSON.loads(get_data)
        raw_state_info = StateInfo.decode(obj)
        if raw_state_info.battleid not in self.line_trainers:
            # DQN
            # self.line_trainer[raw_state_info.battleid] = LineTrainer(self.save_dir, ['27'], self.model1,
            #                                                          self.model1_save_header,
            #                                                          ['28'], self.model2,
            #                                                          self.model2_save_header
            #                                                          )
            # PPO
            ob = np.zeros(183, dtype=float).tolist()
            model1_cache = PPO_CACHE2(ob,
                                      1,
                                      horizon=self.model_1.optim_batchsize)
            model2_cache = PPO_CACHE2(ob,
                                      1,
                                      horizon=self.model_2.optim_batchsize)
            self.line_trainers[raw_state_info.battleid] = LineTrainerPPO(
                self.save_dir,
                '27',
                self.model_1,
                self.model1_save_header,
                model1_cache,
                '28',
                self.model_2,
                self.model2_save_header,
                model2_cache,
                real_hero=None,
                policy_ratio=-1,
                policy_continue_acts=3)
        # 交给对线训练器来进行训练
        rsp_str = self.line_trainers[raw_state_info.battleid].train_line_model(
            get_data)
        print(rsp_str)
        rsp_str = rsp_str.encode(encoding="utf-8")

        #给客户端提供对应的指令
        self._set_headers()
        self.wfile.write(rsp_str)

Пример #5

Показать файл

    def read_process(self, json_str):
        begin_time = time.time()
        obj = JSON.loads(json_str)
        raw_state_info = StateInfo.decode(obj)
        p_battle_id = raw_state_info.battleid

        try:
            response = self.battle_trainers[p_battle_id].build_response(json_str)
            return response
        except queue.Empty:
            print("LineTrainerManager Exception empty")
            return '{}'
        except Exception:
            print("LineTrainerManager Exception")
            traceback.print_exc(file=sys.stdout)
            return '{}'

Пример #6

Показать файл

    def train_line_model(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick >= 193512:
            debug_i = 1

        # 根据之前帧更新当前帧信息，变成完整的信息
        # 发现偶然的情况下，其实的tick会是66，然后第二条tick是528
        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (prev_state_info is None or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.hero_strategy = {}
            self.model1_just_dead = 0
            self.model2_just_dead = 0
        elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick:
            print("clear %s %s" % (prev_state_info.tick, raw_state_info.tick))
            self.state_cache = []
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick（即-1）的情况，这种情况下只能重启本场战斗
            print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]
            rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs}
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str
        state_info = StateUtil.update_state_log(prev_state_info, raw_state_info)

        # Test
        hero = state_info.get_hero(self.model1_hero)
        if hero is None or hero.hp is None:
            print(self.battle_id, self.model1_hero, state_info.tick, '读取信息为空，异常')
            print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]
            rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs}
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 持久化
        self.state_cache.append(state_info)
        # self.save_state_log(state_info)

        # 首先得到模型的选择，同时会将选择action记录到当前帧中
        action_strs = []
        restart = False
        if self.model1_hero is not None and self.real_hero != self.model1_hero:
            actions_model1, restart = self.build_response(self.state_cache, -1, self.model1_hero)
            action_strs.extend(actions_model1)
        if self.model2_hero is not None and not restart and self.real_hero != self.model2_hero:
            actions_model2, restart = self.build_response(self.state_cache, -1, self.model2_hero)
            action_strs.extend(actions_model2)

        # 计算奖励值，如果有真实玩家，因为需要推测行为的原因，则多往前回朔几帧
        reward_state_idx = -2 if self.real_hero is None else -4
        new = 0
        if len(self.state_cache) + reward_state_idx > 0:
            new, loss_team = self.if_restart(self.state_cache, reward_state_idx)
            if self.model1_hero is not None:
                self.remember_replay(self.state_cache, reward_state_idx, self.model1_cache, self.model_process,
                                         self.model1_hero, self.model2_hero, new, loss_team)
            if self.model2_hero is not None:
                self.remember_replay(self.state_cache, reward_state_idx, self.model2_cache, self.model_process,
                                         self.model2_hero, self.model1_hero, new, loss_team)

        # 这里为了尽量减少重启次数，在训练结束之后，我们只是清空上个模型的行为串
        if restart:
            self.model1_cache.clear_cache()
            self.model2_cache.clear_cache()
            # 当前帧返回空的行为串
            action_strs = {}

        # 如果达到了重开条件，重新开始游戏
        # 当线上第一个塔被摧毁时候重开
        if new == 1:
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]

        # 返回结果给游戏端
        rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs}
        rsp_str = JSON.dumps(rsp_obj)
        return rsp_str

Пример #7

Показать файл

    def build_response(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(
            self.state_cache) > 0 else None
        response_strs = []

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (
                prev_state_info is None
                or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.battle_started = -1
            self.battle_heroes_cache = []
            self.dead_heroes = []
            self.dead_heroes_cache = []
            self.data_inputs = []
            self.rebooting = False
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick（即-1）的情况，这种情况下只能重启本场战斗
            print("battle_id", self.battle_id, "tick", raw_state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        state_info = StateUtil.update_state_log(prev_state_info,
                                                raw_state_info)
        hero = state_info.get_hero("27")

        if hero is None or hero.hp is None:
            # 偶然情况处理，如果找不到英雄，直接重开
            print("battle_id", self.battle_id, "tick", state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 战斗前准备工作
        if len(self.state_cache) == 0:
            # 第一帧的时候，添加金钱和等级
            for hero in self.heros:
                add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None,
                                         None, None, None, None, None, None)
                add_gold_cmd.gold = 3000
                add_gold_str = StateUtil.build_command(add_gold_cmd)
                response_strs.append(add_gold_str)

                add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None,
                                       None, None, None, None, None)
                add_lv_cmd.lv = 9
                add_lv_str = StateUtil.build_command(add_lv_cmd)
                response_strs.append(add_lv_str)
        elif len(self.state_cache) > 1:
            # 第二帧时候开始，升级技能，购买装备，这个操作可能会持续好几帧
            for hero in self.heros:
                upgrade_cmd = self.upgrade_skills(state_info, hero)
                if upgrade_cmd is not None:
                    response_strs.append(upgrade_cmd)

                buy_cmd = self.buy_equip(state_info, hero)
                if buy_cmd is not None:
                    response_strs.append(buy_cmd)

        for hero in self.heros:
            # 判断是否英雄死亡
            if prev_state_info is not None:
                dead = StateUtil.if_hero_dead(prev_state_info, state_info,
                                              hero)
                if dead == 1 and hero not in self.dead_heroes:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "英雄死亡", hero, "tick", state_info.tick)
                    self.dead_heroes.append(hero)

        # 首先要求所有英雄站到团战圈内，然后开始模型计算，这时候所有的行动都有模型来决定
        # 需要过滤掉无效的行动，同时屏蔽会离开战斗圈的移动
        #TODO 开始团战后，如果有偶尔的技能移动会离开圈，则拉回来

        # 这里会排除掉死亡的英雄，他们不需要再加入团战
        # 团战范围在收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)
        heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range(
            state_info, self.heros, self.dead_heroes, battle_range)

        # 存活英雄
        battle_heros = list(heroes_in_range)
        battle_heros.extend(heroes_out_range)

        # 缓存参战情况和死亡情况，用于后续训练
        self.battle_heroes_cache.append(battle_heros)
        self.dead_heroes_cache.append(list(self.dead_heroes))

        if state_info.tick >= 142560:
            debuginfo = True

        # 团战还没有开始，有英雄还在圈外
        if len(heroes_out_range) > 0:
            if self.battle_started > -1:
                print('battle_id', self.battle_id, "战斗已经开始，但是为什么还有英雄在团战圈外",
                      ','.join(heroes_out_range), "battle_range", battle_range)

            # 移动到两个开始战斗地点附近
            # 如果是团战开始之后，移动到团战中心点
            for hero in heroes_out_range:
                start_point_x = randint(0, 8000)
                start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0
                start_point_z += randint(-4000, 4000)
                if TeamBattleUtil.get_hero_team(hero) == 0:
                    start_point_z *= -1
                start_point_z += TeamBattleTrainer.BATTLE_POINT_Z
                tgt_pos = PosStateInfo(start_point_x, 0, start_point_z)
                move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None,
                                        tgt_pos, None, None, None, None)
                mov_cmd_str = StateUtil.build_command(move_action)
                response_strs.append(mov_cmd_str)
        # 团战已经开始
        elif not self.rebooting:
            if self.battle_started == -1:
                self.battle_started = len(self.state_cache)

            # 对特殊情况。比如德古拉使用大招hp会变1，修改帧状态
            state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible(
                state_info, self.state_cache)

            # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range)
            # 跟队伍，每个队伍得到行为
            team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range)
            team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team(
                state_info, team_a, heroes_in_range)
            team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team(
                state_info, team_b, heroes_in_range)

            # 如果模型已经开战，重启战斗
            if (model_upgrade_a or model_upgrade_b
                ) and self.battle_started < len(self.state_cache) + 1:
                print("battle_id", self.battle_id, "因为模型升级，重启战斗",
                      self.battle_started, len(self.state_cache))
                action_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
                rsp_obj = {
                    "ID": raw_state_info.battleid,
                    "tick": raw_state_info.tick,
                    "cmd": action_strs
                }
                rsp_str = JSON.dumps(rsp_obj)
                return rsp_str
            data_input_map = {}
            for action_cmd, data_input in zip(team_actions_a + team_actions_b,
                                              input_list_a + input_list_b):
                action_str = StateUtil.build_command(action_cmd)
                response_strs.append(action_str)
                state_info.add_action(action_cmd)
                data_input_map[action_cmd.hero_name] = data_input

            # 缓存所有的模型输入，用于后续训练
            self.data_inputs.append(data_input_map)

        # 添加记录到缓存中
        self.state_cache.append(state_info)

        # 将模型行为加入训练缓存，同时计算奖励值
        # 注意：因为奖励值需要看后续状态，所以这个计算会有延迟
        last_x_index = 2
        if self.battle_started > -1 and len(self.data_inputs) >= last_x_index:
            if self.rebooting:
                # 测试发现重启指令发出之后，可能下一帧还没开始重启战斗，这种情况下抛弃训练
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "warn", "要求重启战斗，但是还在收到后续帧状态, 继续重启")

                # 重启游戏
                response_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
            else:
                state_index = len(self.state_cache) - last_x_index
                win, win_team, left_heroes = self.remember_replay_heroes(
                    -last_x_index, state_index, battle_range)

                # 团战结束条件
                # 首先战至最后一人
                # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range)
                # if self.battle_started:
                #     if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1):
                if win == 1:
                    # 重启游戏
                    print('battle_id', self.battle_id, "重启游戏", "剩余人员",
                          ','.join(left_heroes))
                    response_strs = [
                        StateUtil.build_action_command('27', 'RESTART', None)
                    ]
                    self.rebooting = True
        # battle_heros = self.search_team_battle(state_info)
        # if len(battle_heros) > 0:
        #     print("team battle heros", ';'.join(battle_heros))
        #
        # heros_need_model = []
        # for hero in self.heros:
        #     # 判断是否英雄死亡
        #     if prev_state_info is not None:
        #         dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero)
        #         if dead == 1 and hero not in self.dead_heroes:
        #             self.dead_heroes.append(hero)
        #
        #     # 复活的英雄不要再去参团
        #     if hero in self.dead_heroes:
        #         continue
        #
        #     # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE)
        #     if hero not in battle_heros:
        #         # 移动到团战点附近，添加部分随机
        #         rdm_delta_x = randint(0, 1000)
        #         rdm_delta_z = randint(0, 1000)
        #         tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z)
        #         move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None)
        #         mov_cmd_str = StateUtil.build_command(move_action)
        #         response_strs.append(mov_cmd_str)
        #     else:
        #         # 启动模型决策
        #         heros_need_model.append(hero)
        #
        # if len(heros_need_model) > 0:
        #     action_cmds = self.get_model_actions(state_info, heros_need_model)
        #     for action_cmd in action_cmds:
        #         action_str = StateUtil.build_command(action_cmd)
        #         response_strs.append(action_str)
        #         state_info.add_action(action_cmd)

        #TODO 记录模型输出，用于后续训练

        # 返回结果给游戏端
        rsp_obj = {
            "ID": state_info.battleid,
            "tick": state_info.tick,
            "cmd": response_strs
        }
        rsp_str = JSON.dumps(rsp_obj)
        print('battle_id', self.battle_id, 'response', rsp_str)
        return rsp_str

Пример #8

Показать файл

    def train_line_model(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return ''

        if raw_state_info.tick == 285516:
            debug_i = 1

        # 根据之前帧更新当前帧信息，变成完整的信息
        if raw_state_info.tick <= StateUtil.TICK_PER_STATE:
            print("clear")
            self.state_cache = []
            prev_state_info = None
        elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick:
            print ("clear %s %s" % (prev_state_info.tick, raw_state_info.tick))
            self.state_cache = []
        state_info = StateUtil.update_state_log(prev_state_info, raw_state_info)

        # 首先得到模型的选择，同时会将选择action记录到当前帧中
        action_strs = self.build_response(state_info, prev_state_info, self.model1, self.model1_heros)
        if self.model2_heros is not None:
            actions_model2 = self.build_response(state_info, prev_state_info, self.model2, self.model2_heros)
            action_strs.extend(actions_model2)

        # 缓存
        self.state_cache.append(state_info)
        self.save_state_log(state_info)

        # 更新玩家行为以及奖励值，有一段时间延迟
        reward_state_idx = len(self.state_cache) - LineModel.REWARD_DELAY_STATE_NUM
        # print('reward_state_idx: ' + str(reward_state_idx))
        state_with_reward = None
        if reward_state_idx > 1:
            if self.state_cache[reward_state_idx].tick >= 686004:
                debug = 1
            self.guess_hero_actions(reward_state_idx, self.real_heros)
            prev_4_m = self.state_cache[reward_state_idx - 1]
            state_with_reward = LineModel_DQN.update_state_rewards(self.state_cache, reward_state_idx)

        if state_with_reward is not None:
            # 将中间结果写入文件
            next_state_4_m = self.state_cache[reward_state_idx + 1]
            self.save_reward_log(state_with_reward)
            added = self.model1.remember(prev_4_m, state_with_reward, next_state_4_m)

            # 学习
            if added:
                model1_memory_len = self.model1.get_memory_size()
                if self.model1.if_replay(64):
                    # print ('开始模型训练')
                    self.model1.replay(64)
                    if model1_memory_len > 0 and model1_memory_len % 1000 == 0:
                        self.model1.save(self.model1_save_header + str(self.model1.get_memory_size()) + '/model')
                    # print ('结束模型训练')

            if self.model2 is not None:
                # TODO 过滤之后放入相应的模型
                added = self.model2.remember(prev_4_m, state_with_reward, next_state_4_m)

                # 学习
                if added:
                    model2_memory_len = self.model2.get_memory_size()
                    if self.model2.if_replay(64):
                        # print ('开始模型训练')
                        self.model2.replay(64)
                        if model2_memory_len > 0 and model2_memory_len % 1000 == 0:
                            self.model2.save(self.model2_save_header + str(self.model2.get_memory_size()) + '/model')
                        # print ('结束模型训练')

        # 如果达到了重开条件，重新开始游戏
        # 当线上第一个塔被摧毁时候重开
        if StateUtil.if_first_tower_destroyed_in_middle_line(state_info):
            print('重新开始游戏')
            action_strs = [StateUtil.build_action_command('27', 'RESTART', None)]

        # 返回结果给游戏端
        rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs}
        rsp_str = JSON.dumps(rsp_obj)
        return rsp_str