Пример #1
0
 def get_attack_unit_action(state_info, hero_name, unit_name, skill_id):
     creeps = StateUtil.get_nearby_enemy_units(state_info, hero_name)
     unit_idx = [c.unit_name for c in creeps].index(unit_name)
     action_idx = unit_idx + 10 * skill_id + 10
     if skill_id >= 1:
         action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, unit_name,
                            None, None, None, action_idx, None)
     else:
         tgtpos = creeps[unit_idx].pos
         hero = state_info.get_hero(hero_name)
         fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id,
                            unit_name, tgtpos, fwd, None, action_idx, None)
     return action
Пример #2
0
 def get_attack_hero_action(state_info, hero_name, rival_hero_name,
                            skill_id):
     action_idx = 10 * skill_id + 9
     if skill_id == 0:
         action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0,
                            rival_hero_name, None, None, None, action_idx,
                            None)
     else:
         tgtpos = state_info.get_hero(rival_hero_name).pos
         hero = state_info.get_hero(hero_name)
         fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id,
                            rival_hero_name, tgtpos, fwd, None, action_idx,
                            None)
     return action
Пример #3
0
 def get_self_cast_action(state_info, hero_name, rival_hero_name, skill_id):
     action_idx = 10 * skill_id + 8
     hero = state_info.get_hero(hero_name)
     tgtpos = hero.pos
     fwd = tgtpos.fwd(hero.pos)
     action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id,
                        rival_hero_name, tgtpos, fwd, None, action_idx,
                        None)
     return action
Пример #4
0
 def get_action(selected,
                state_info,
                hero,
                hero_name,
                rival_hero,
                revert=False):
     if selected < 8:  # move
         fwd = StateUtil.mov(selected, revert)
         tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15,
                               hero.pos.y + fwd.y * 15,
                               hero.pos.z + fwd.z * 15)
         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None,
                            tgtpos, None, None, selected, None)
         return action
     elif selected < 18:  # 对敌英雄,塔,敌小兵1~8使用普攻
         if selected == 8:  # 敌方塔
             tower = StateUtil.get_nearest_enemy_tower(
                 state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS)
             tgtid = tower.unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         elif selected == 9:  # 敌方英雄
             tgtid = rival_hero
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
         else:  # 小兵
             creeps = StateUtil.get_nearby_enemy_units(
                 state_info, hero_name)
             n = selected - 10
             tgtid = creeps[n].unit_name
             action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid,
                                None, None, None, selected, None)
             return action
     elif selected < 48:  # skill
         skillid = int((selected - 18) / 10 + 1)
         [tgtid, tgtpos] = LineModel.choose_skill_target(
             selected - 18 - (skillid - 1) * 10, state_info, skillid,
             hero_name, hero.pos, rival_hero)
         if tgtpos is None:
             fwd = None
         else:
             fwd = tgtpos.fwd(hero.pos)
         action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid,
                            tgtpos, fwd, None, selected, None)
         return action
     elif selected == 48:  # hold
         # print("轮到了48号行为-hold")
         action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                            hero.pos, None, None, 48, None)
         return action
     else:  # 撤退
         retreat_pos = StateUtil.get_retreat_pos(state_info,
                                                 hero,
                                                 line_index=1)
         action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None,
                            retreat_pos, None, None, selected, None)
         return action
Пример #5
0
 def upgrade_skills(self, state_info, hero_name):
     # 如果有可以升级的技能,优先升级技能3
     hero = state_info.get_hero(hero_name)
     skills = StateUtil.get_skills_can_upgrade(hero)
     if len(skills) > 0:
         skillid = 3 if 3 in skills else skills[0]
         update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE,
                                skillid, None, None, None, None, None, None)
         update_str = StateUtil.build_command(update_cmd)
         return update_str
Пример #6
0
    def get_or_insert_reward(self, hero_name):
        for action in self.actions:
            if action.hero_name == hero_name:
                return action.reward

        # 如果没有的话,添加一个空行为
        empty_action = CmdAction(hero_name, CmdActionEnum.EMPTY, None, None,
                                 None, None, None, None, 0)
        self.add_action(empty_action)
        return empty_action.reward
Пример #7
0
 def policy_move_retreat(hero_info):
     if hero_info.team == 0:
         mov_idx = 6
     else:
         mov_idx = 0
     fwd = StateUtil.mov(mov_idx)
     tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15,
                           hero_info.pos.y + fwd.y * 15,
                           hero_info.pos.z + fwd.z * 15)
     action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None,
                        tgtpos, None, None, mov_idx, None)
     return action
Пример #8
0
 def get_attack_tower_action(hero_name, hero_info, tower_unit):
     # 因为目前模型中侦测塔的范围较大,可能出现攻击不到塔的情况
     # 所以需要先接近塔
     # 使用tgtpos,而不是fwd。move命令中fwd坐标系比较奇怪
     if StateUtil.cal_distance(
             hero_info.pos, tower_unit.pos) > StateUtil.ATTACK_UNIT_RADIUS:
         fwd = tower_unit.pos.fwd(hero_info.pos)
         [fwd, output_index] = Replayer.get_closest_fwd(fwd)
         tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15,
                               hero_info.pos.y + fwd.y * 15,
                               hero_info.pos.z + fwd.z * 15)
         print("朝塔移动,", hero_name, "hero_pos", hero_info.pos.to_string(),
               "tower_pos", tower_unit.pos.to_string(), "fwd",
               fwd.to_string(), "output_index", output_index)
         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None,
                            tgtpos, None, None, output_index, None)
     else:
         action_idx = 11
         action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0,
                            tower_unit.unit_name, None, None, None,
                            action_idx, None)
     return action
Пример #9
0
    def decode(obj):
        battleid = obj['wldstatic']['ID']
        tick = obj['wldruntime']['tick'] if 'tick' in obj['wldruntime'] else -1

        # 忽略了第一帧中的兵线信息

        # 貌似从27-36是英雄
        heros = []
        hero_id = 27
        while True:
            hero_info = StateInfo.decode_hero(obj, hero_id)
            if hero_info is not None:
                heros.append(hero_info)
                hero_id += 1
            else:
                break
        # 最后一次递增需要回滚
        hero_id -= 1

        # 其它单位
        units = []
        for key in obj.keys():
            if key.isdigit():
                key1 = int(key)
                # todo: in my python3 version, there is a type error, so I use a int key1 replace key
                if key1 < 27 or key1 > hero_id:
                    units.append(UnitStateInfo.decode(obj[key], key))

        attack_infos = []
        if 'attackinfos' in obj:
            for ai in obj['attackinfos']:
                attack_infos.append(AttackStateInfo.decode(ai))

        hit_infos = []
        if 'hitinfos' in obj:
            for hi in obj['hitinfos']:
                hit_infos.append(HitStateInfo.decode(hi))

        dmg_infos = []
        if 'dmginfos' in obj:
            for di in obj['dmginfos']:
                dmg_infos.append(DmgStateInfo.decode(di))

        actions = []
        if 'actions' in obj:
            for ac in obj['actions']:
                actions.append(CmdAction.decode(ac))

        return StateInfo(battleid, tick, heros, units, attack_infos, hit_infos,
                         dmg_infos, actions, None)
Пример #10
0
    def buy_equip(state_info, hero_name):
        hero = state_info.get_hero(hero_name)
        if hero.cfg_id in EquipUtil.equip_plans:
            plan = EquipUtil.equip_plans[hero.cfg_id]

            # 按顺序查找玩家还不具有的装备
            owned_equips = [int(item.id) for item in hero.equips]
            for equip_id in plan:
                if equip_id not in owned_equips:
                    equip_info = EquipUtil.get_equip_info(equip_id)
                    if equip_info.buy_price <= hero.gold:
                        print(state_info.battleid, hero_name, '购买道具', equip_id,
                              '当前拥有', ','.join(str(e) for e in owned_equips),
                              '金币', hero.gold, '价格', equip_info.buy_price,
                              '名称', equip_info.name)
                        return CmdAction(hero_name, CmdActionEnum.BUY, None,
                                         None, None, None, equip_id, None,
                                         None)
                    else:
                        # 如果钱不够直接返回空,而不是购买下一件商品
                        return None
        return None
Пример #11
0
    def build_response(self, state_cache, state_index, hero_name):
        action_strs=[]
        restart = False

        # 对于模型,分析当前帧的行为
        if self.real_hero != hero_name:
            state_info = state_cache[state_index]
            prev_hero = state_cache[state_index-1].get_hero(hero_name) if len(state_cache) >= 2 is not None else None
        # 如果有真实玩家,我们需要一些历史数据,所以分析3帧前的行为
        elif len(state_cache) > 3:
            state_info = state_cache[state_index-3]
            next1_state_info = state_cache[state_index-2]
            next2_state_info = state_cache[state_index-1]
            next3_state_info = state_cache[state_index]
        else:
            return action_strs, False

        # 决定是否购买道具
        buy_action = EquipUtil.buy_equip(state_info, hero_name)
        if buy_action is not None:
            buy_str = StateUtil.build_command(buy_action)
            action_strs.append(buy_str)

        # 如果有可以升级的技能,优先升级技能3
        hero = state_info.get_hero(hero_name)
        skills = StateUtil.get_skills_can_upgrade(hero)
        if len(skills) > 0:
            skillid = 3 if 3 in skills else skills[0]
            update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None)
            update_str = StateUtil.build_command(update_cmd)
            action_strs.append(update_str)

        # 回城相关逻辑
        # 如果在回城中且没有被打断则继续回城,什么也不用返回
        if prev_hero is not None:
            if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.town_ing \
                    and prev_hero.hp <= hero.hp \
                    and not StateUtil.if_hero_at_basement(hero):
                if not hero.skills[6].canuse:
                    print(self.battle_id, hero.hero_name, '回城中,继续回城')
                    return action_strs, False
                else:
                    print(self.battle_id, hero.hero_name, '回城失败')
                    town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None,
                                            None, None)
                    action_str = StateUtil.build_command(town_action)
                    action_strs.append(action_str)
                    return action_strs, False
                if hero.hp <= 0:
                    self.hero_strategy[hero.hero_name] = None
                    return action_strs, False

        # # 补血逻辑
        # if prev_hero is not None and hero.hero_name in self.hero_strategy and self.hero_strategy[
        #     hero.hero_name] == ActionEnum.hp_restore:
        #     if StateUtil.cal_distance2(prev_hero.pos, hero.pos) < 100:
        #         print(self.battle_id, hero_name, '到达补血点', '血量增长', hero.hp - prev_hero.hp)
        #         del self.hero_strategy[hero_name]
        #         if hero == self.model1_hero:
        #             self.model1_hp_restore = time.time()
        #         else:
        #             self.model2_hp_restore = time.time()

        # 撤退逻辑
        # TODO 甚至可以使用移动技能移动
        if prev_hero is not None and hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.retreat_to_town:
            if StateUtil.cal_distance2(prev_hero.pos, hero.pos) < 100:
                print(self.battle_id, hero_name, '开始回城')
                self.hero_strategy[hero.hero_name] = ActionEnum.town_ing
                town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None,
                                        None, None)
                action_str = StateUtil.build_command(town_action)
                action_strs.append(action_str)
            else:
                print(self.battle_id, hero_name, '还在撤退中', StateUtil.cal_distance2(prev_hero.pos, hero.pos))
            return action_strs, False

        # 如果击杀了对方英雄,扫清附近小兵之后则启动撤退回城逻辑
        if prev_hero is not None:
            if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.town_ing and prev_hero.hp <= hero.hp \
                    and not StateUtil.if_hero_at_basement(hero):
                if not hero.skills[6].canuse:
                    return action_strs, False
                else:
                    town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None,
                                            None, None)
                    action_str = StateUtil.build_command(town_action)
                    action_strs.append(action_str)
        if hero.hp <= 0:
            self.hero_strategy[hero.hero_name] = None
            return action_strs, False

        # 检查周围状况
        near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
        near_enemy_units = StateUtil.get_nearby_enemy_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
        nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(state_info, hero.hero_name,
                                                                StateUtil.LINE_MODEL_RADIUS + 3)
        nearest_friend_units = StateUtil.get_nearby_friend_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
        line_index = 1
        near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index)
        nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index)

        # 如果击杀对面英雄就回城补血。整体逻辑为,周围没有兵的情况下启动撤退逻辑,到达撤退地点之后启动回城。补满血之后再跟兵出来
        # 处在泉水之中的时候设置策略层为吃线
        if len(near_enemy_units_in_line) == 0 and len(near_enemy_heroes) == 0:
            if (hero_name == self.model1_hero and self.model2_just_dead == 1 and not StateUtil.if_hero_at_basement(hero)) \
                    or (hero_name == self.model2_hero and self.model1_just_dead == 1 and not StateUtil.if_hero_at_basement(hero)):
                if hero.hp / float(hero.maxhp) > 0.8:
                    if hero_name == self.model1_hero:
                        self.model2_just_dead = 0
                    else:
                        self.model1_just_dead = 0
                else:
                    print(self.battle_id, hero_name, '选择撤退')
                    self.hero_strategy[hero_name] = ActionEnum.retreat_to_town
                    retreat_pos = StateUtil.get_retreat_pos(state_info, hero, line_index=1)
                    action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, retreat_pos, None, None, -1, None)
                    action_str = StateUtil.build_command(action)
                    action_strs.append(action_str)
                    if hero_name == self.model1_hero:
                        self.model2_just_dead = 0
                    else:
                        self.model1_just_dead = 0
                    return action_strs, False

            if StateUtil.if_hero_at_basement(hero):
                if hero_name == self.model1_hero:
                    self.model2_just_dead = 0
                else:
                    self.model1_just_dead = 0
                if hero.hp < hero.maxhp:
                    if hero_name in self.hero_strategy:
                        del self.hero_strategy[hero_name]
                    return action_strs, False

            # # 残血并且周围没有敌人的情况下,可以去塔后吃加血
            # if hero.hp / float(hero.maxhp) < 0.9 and hero not in self.hero_strategy:
            #     print('补血条件', self.battle_id, hero_name, time.time(), self.model1_hp_restore, self.model2_hp_restore)
            #     if hero == self.model1_hero and time.time() - self.model1_hp_restore > LineTrainerPPO.HP_RESTORE_GAP:
            #         print(self.battle_id, hero_name, '选择加血')
            #         self.hero_strategy[hero_name] = ActionEnum.hp_restore
            #     elif hero == self.model2_hero and time.time() - self.model2_hp_restore > LineTrainerPPO.HP_RESTORE_GAP:
            #         print(self.battle_id, hero_name, '选择加血')
            #         self.hero_strategy[hero_name] = ActionEnum.hp_restore
            #
            #     if self.hero_strategy[hero_name] == ActionEnum.hp_restore:
            #         restore_pos = StateUtil.get_hp_restore_place(state_info, hero)
            #         action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, restore_pos, None, None, -1, None)
            #         action_str = StateUtil.build_command(action)
            #         action_strs.append(action_str)
            #         return action_strs, False

        # 开始根据策略决定当前的行动
        # 对线情况下,首先拿到兵线,朝最前方的兵线移动
        # 如果周围有危险(敌方单位)则启动对线模型
        # 如果周围有小兵或者塔,需要他们都是在指定线上的小兵或者塔
        if (len(near_enemy_units_in_line) == 0 and len(nearest_enemy_tower_in_line) == 0 and (
                len(near_enemy_heroes) == 0 or
                StateUtil.if_in_line(hero, line_index, 4000) == -1)
            ) or\
            (len(nearest_friend_units) == 0 and len(near_enemy_units_in_line) == 0 and
            len(near_enemy_heroes) == 0 and len(nearest_enemy_tower_in_line) == 1):

            # 跟兵线或者跟塔,优先跟塔
            self.hero_strategy[hero.hero_name] = ActionEnum.line_1
            # print("策略层:因为附近没有指定兵线的敌人所以开始吃线 " + hero.hero_name)
            front_soldier = StateUtil.get_frontest_soldier_in_line(state_info, line_index, hero.team)
            first_tower = StateUtil.get_first_tower(state_info, hero)

            if front_soldier is None or (hero.team == 0 and first_tower.pos.x > front_soldier.pos.x) or (hero.team == 1 and first_tower.pos.x < front_soldier.pos.x):
                # 跟塔,如果塔在前面的话
                follow_tower_pos = StateUtil.get_tower_behind(first_tower, hero, line_index=1)
                move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, follow_tower_pos, None, None,
                                        None, None)
                action_str = StateUtil.build_command(move_action)
                action_strs.append(action_str)
            else:
                # 得到最前方的兵线位置
                move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, front_soldier.pos, None, None,
                                        None, None)
                action_str = StateUtil.build_command(move_action)
                action_strs.append(action_str)
        else:
            if self.real_hero != hero_name:
                # 使用模型进行决策
                # print("使用对线模型决定英雄%s的行动" % hero.hero_name)
                self.hero_strategy[hero.hero_name] = ActionEnum.line_model

                # 目前对线只涉及到两名英雄
                rival_hero = '28' if hero.hero_name == '27' else '27'
                action, explorer_ratio, action_ratios = self.get_action(state_info, hero_name, rival_hero)

                # 考虑使用固定策略
                # 如果决定使用策略,会连续n条行为全都采用策略(比如确保对方残血时候连续攻击的情况)
                # 如果策略返回为空则表示策略中断
                if self.policy_ratio > 0 and (
                        0 < self.cur_policy_act_idx_map[hero_name] < self.policy_continue_acts
                        or random.uniform(0, 1) <= self.policy_ratio
                ):
                    policy_action = LineTrainerPolicy.choose_action(state_info, action_ratios, hero_name, rival_hero,
                                            near_enemy_units, nearest_friend_units)
                    if policy_action is not None:
                        policy_action.vpred = action.vpred
                        action = policy_action
                        self.cur_policy_act_idx_map[hero_name] += 1
                        print("英雄 " + hero_name + " 使用策略,策略行为计数 idx " + str(self.cur_policy_act_idx_map[hero_name]))
                        if self.cur_policy_act_idx_map[hero_name] >= self.policy_continue_acts:
                            self.cur_policy_act_idx_map[hero_name] = 0
                    else:
                        # 策略中断,清零
                        if self.cur_policy_act_idx_map[hero_name] > 0:
                            print("英雄 " + hero_name + " 策略中断,清零")
                            self.cur_policy_act_idx_map[hero_name] = 0

                action_str = StateUtil.build_command(action)
                action_strs.append(action_str)

                # 如果是要求英雄施法回城,更新英雄状态,这里涉及到后续多帧是否等待回城结束
                if action.action == CmdActionEnum.CAST and int(action.skillid) == 6:
                    print("英雄%s释放了回城" % hero_name)
                    self.hero_strategy[hero.hero_name] = ActionEnum.town_ing

                # 如果是选择了撤退,进行特殊标记,会影响到后续的行为
                if action.action == CmdActionEnum.RETREAT:
                    print("英雄%s释放了撤退,撤退点为%s" % (hero_name, action.tgtpos.to_string()))
                    self.hero_strategy[hero.hero_name] = ActionEnum.retreat
                    self.retreat_pos = action.tgtpos

                # 如果批量训练结束了,这时候需要清空未使用的训练集,然后重启游戏
                if action.action == CmdActionEnum.RESTART:
                    restart = True
                else:
                    # 保存action信息到状态帧中
                    state_info.add_action(action)
            else:
                # 还是需要模型来计算出一个vpred
                rival_hero = '28' if hero.hero_name == '27' else '27'
                action, explorer_ratio, action_ratios = self.get_action(state_info, hero_name, rival_hero)

                # 推测玩家的行为
                guess_action = Replayer.guess_player_action(state_info, next1_state_info, next2_state_info,
                                                            next3_state_info, hero_name, rival_hero)
                guess_action.vpred = action.vpred
                action_str = StateUtil.build_command(guess_action)
                action_str['tick'] = state_info.tick
                print('猜测玩家行为为:' + JSON.dumps(action_str))

                # 保存action信息到状态帧中
                state_info.add_action(guess_action)

        return action_strs, restart
Пример #12
0
    def get_action_cmd(action_list,
                       unaval_list,
                       recommmend_list,
                       state_info,
                       hero_name,
                       friends,
                       opponents,
                       revert=False):
        hero = state_info.get_hero(hero_name)
        found = False

        # 如果有推荐的行为,只从中挑选
        if len(recommmend_list) > 0:
            for i in range(len(action_list)):
                if i not in recommmend_list:
                    action_list[i] = -1
            print("battle_id", state_info.battleid, "tick", state_info.tick,
                  "hero", hero_name, "根据推荐,只从以下行为中挑选",
                  ",".join(str("%f" % float(act)) for act in action_list),
                  ",".join(str("%f" % float(act)) for act in recommmend_list))

        while not found:
            max_q = max(action_list)
            if max_q <= -1:
                action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                                   hero.pos, None, None, 48, None)
                return action, max_q, -1

            selected = action_list.index(max_q)
            avail_type = unaval_list[selected]
            if avail_type == -1:
                #TODO avail_type == 0: 是否考虑技能不可用时候不接近对方
                # 不可用行为
                action_list[selected] = -1
                continue

            if selected < 8:  # move
                fwd = StateUtil.mov(selected, revert)
                # 根据我们的移动公式计算一个目的地,缺点是这样可能被障碍物阻挡,同时可能真的可以移动距离比我们计算的长
                tgtpos = TeamBattleUtil.set_move_target(hero, fwd)
                # tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15)
                action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None,
                                   None, tgtpos, None, None, selected, None)
                return action, max_q, selected
            elif selected < 13:  # 对敌英雄使用普攻
                target_index = selected - 8
                target_hero = TeamBattleUtil.get_target_hero(
                    hero.hero_name, friends, opponents, target_index)
                target_hero_info = state_info.get_hero(target_hero)
                avail_type = unaval_list[selected]
                if avail_type == 0:
                    action = CmdAction(hero.hero_name, CmdActionEnum.MOVE,
                                       None, None, target_hero_info.pos, None,
                                       None, selected, None)
                else:
                    action = CmdAction(hero.hero_name, CmdActionEnum.ATTACK, 0,
                                       target_hero, None, None, None, selected,
                                       None)
                return action, max_q, selected
            elif selected < 28:  # skill
                skillid = int((selected - 13) / 5 + 1)
                tgt_index = selected - 13 - (skillid - 1) * 5
                skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid)
                is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False
                is_self = True if skill_info.cast_target == SkillTargetEnum.self else False
                tgt_hero = TeamBattleUtil.get_target_hero(
                    hero.hero_name, friends, opponents, tgt_index, is_buff,
                    is_self)
                tgt_pos = state_info.get_hero(tgt_hero).pos
                fwd = tgt_pos.fwd(hero.pos)
                avail_type = unaval_list[selected]
                if avail_type == 0:
                    action = CmdAction(hero.hero_name, CmdActionEnum.MOVE,
                                       None, None, tgt_pos, None, None,
                                       selected, None)
                else:
                    action = CmdAction(hero.hero_name, CmdActionEnum.CAST,
                                       skillid, tgt_hero, tgt_pos, fwd, None,
                                       selected, None)
                return action, max_q, selected
Пример #13
0
    def build_response(self, raw_state_str):
        self.save_raw_log(raw_state_str)
        prev_state_info = self.state_cache[-1] if len(
            self.state_cache) > 0 else None
        response_strs = []

        # 解析客户端发送的请求
        obj = JSON.loads(raw_state_str)
        raw_state_info = StateInfo.decode(obj)

        # 重开时候会有以下报文  {"wldstatic":{"ID":9051},"wldruntime":{"State":0}}
        if raw_state_info.tick == -1:
            return {"ID": raw_state_info.battleid, "tick": -1}

        if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (
                prev_state_info is None
                or prev_state_info.tick > raw_state_info.tick):
            print("clear")
            prev_state_info = None
            self.state_cache = []
            self.battle_started = -1
            self.battle_heroes_cache = []
            self.dead_heroes = []
            self.dead_heroes_cache = []
            self.data_inputs = []
            self.rebooting = False
        elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE:
            # 不是开始帧的话直接返回重启游戏
            # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗
            print("battle_id", self.battle_id, "tick", raw_state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        state_info = StateUtil.update_state_log(prev_state_info,
                                                raw_state_info)
        hero = state_info.get_hero("27")

        if hero is None or hero.hp is None:
            # 偶然情况处理,如果找不到英雄,直接重开
            print("battle_id", self.battle_id, "tick", state_info.tick,
                  '不是开始帧的话直接返回重启游戏', raw_state_info.tick)
            action_strs = [
                StateUtil.build_action_command('27', 'RESTART', None)
            ]
            rsp_obj = {
                "ID": raw_state_info.battleid,
                "tick": raw_state_info.tick,
                "cmd": action_strs
            }
            rsp_str = JSON.dumps(rsp_obj)
            return rsp_str

        # 战斗前准备工作
        if len(self.state_cache) == 0:
            # 第一帧的时候,添加金钱和等级
            for hero in self.heros:
                add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None,
                                         None, None, None, None, None, None)
                add_gold_cmd.gold = 3000
                add_gold_str = StateUtil.build_command(add_gold_cmd)
                response_strs.append(add_gold_str)

                add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None,
                                       None, None, None, None, None)
                add_lv_cmd.lv = 9
                add_lv_str = StateUtil.build_command(add_lv_cmd)
                response_strs.append(add_lv_str)
        elif len(self.state_cache) > 1:
            # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧
            for hero in self.heros:
                upgrade_cmd = self.upgrade_skills(state_info, hero)
                if upgrade_cmd is not None:
                    response_strs.append(upgrade_cmd)

                buy_cmd = self.buy_equip(state_info, hero)
                if buy_cmd is not None:
                    response_strs.append(buy_cmd)

        for hero in self.heros:
            # 判断是否英雄死亡
            if prev_state_info is not None:
                dead = StateUtil.if_hero_dead(prev_state_info, state_info,
                                              hero)
                if dead == 1 and hero not in self.dead_heroes:
                    print("battle_id", self.battle_id, "tick", state_info.tick,
                          "英雄死亡", hero, "tick", state_info.tick)
                    self.dead_heroes.append(hero)

        # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定
        # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动
        #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来

        # 这里会排除掉死亡的英雄,他们不需要再加入团战
        # 团战范围在收缩
        battle_range = self.cal_battle_range(
            len(self.state_cache) - self.battle_started)
        heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range(
            state_info, self.heros, self.dead_heroes, battle_range)

        # 存活英雄
        battle_heros = list(heroes_in_range)
        battle_heros.extend(heroes_out_range)

        # 缓存参战情况和死亡情况,用于后续训练
        self.battle_heroes_cache.append(battle_heros)
        self.dead_heroes_cache.append(list(self.dead_heroes))

        if state_info.tick >= 142560:
            debuginfo = True

        # 团战还没有开始,有英雄还在圈外
        if len(heroes_out_range) > 0:
            if self.battle_started > -1:
                print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外",
                      ','.join(heroes_out_range), "battle_range", battle_range)

            # 移动到两个开始战斗地点附近
            # 如果是团战开始之后,移动到团战中心点
            for hero in heroes_out_range:
                start_point_x = randint(0, 8000)
                start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0
                start_point_z += randint(-4000, 4000)
                if TeamBattleUtil.get_hero_team(hero) == 0:
                    start_point_z *= -1
                start_point_z += TeamBattleTrainer.BATTLE_POINT_Z
                tgt_pos = PosStateInfo(start_point_x, 0, start_point_z)
                move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None,
                                        tgt_pos, None, None, None, None)
                mov_cmd_str = StateUtil.build_command(move_action)
                response_strs.append(mov_cmd_str)
        # 团战已经开始
        elif not self.rebooting:
            if self.battle_started == -1:
                self.battle_started = len(self.state_cache)

            # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态
            state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible(
                state_info, self.state_cache)

            # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range)
            # 跟队伍,每个队伍得到行为
            team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range)
            team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team(
                state_info, team_a, heroes_in_range)
            team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team(
                state_info, team_b, heroes_in_range)

            # 如果模型已经开战,重启战斗
            if (model_upgrade_a or model_upgrade_b
                ) and self.battle_started < len(self.state_cache) + 1:
                print("battle_id", self.battle_id, "因为模型升级,重启战斗",
                      self.battle_started, len(self.state_cache))
                action_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
                rsp_obj = {
                    "ID": raw_state_info.battleid,
                    "tick": raw_state_info.tick,
                    "cmd": action_strs
                }
                rsp_str = JSON.dumps(rsp_obj)
                return rsp_str
            data_input_map = {}
            for action_cmd, data_input in zip(team_actions_a + team_actions_b,
                                              input_list_a + input_list_b):
                action_str = StateUtil.build_command(action_cmd)
                response_strs.append(action_str)
                state_info.add_action(action_cmd)
                data_input_map[action_cmd.hero_name] = data_input

            # 缓存所有的模型输入,用于后续训练
            self.data_inputs.append(data_input_map)

        # 添加记录到缓存中
        self.state_cache.append(state_info)

        # 将模型行为加入训练缓存,同时计算奖励值
        # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟
        last_x_index = 2
        if self.battle_started > -1 and len(self.data_inputs) >= last_x_index:
            if self.rebooting:
                # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练
                print("battle_id", self.battle_id, "tick", state_info.tick,
                      "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启")

                # 重启游戏
                response_strs = [
                    StateUtil.build_action_command('27', 'RESTART', None)
                ]
            else:
                state_index = len(self.state_cache) - last_x_index
                win, win_team, left_heroes = self.remember_replay_heroes(
                    -last_x_index, state_index, battle_range)

                # 团战结束条件
                # 首先战至最后一人
                # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range)
                # if self.battle_started:
                #     if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1):
                if win == 1:
                    # 重启游戏
                    print('battle_id', self.battle_id, "重启游戏", "剩余人员",
                          ','.join(left_heroes))
                    response_strs = [
                        StateUtil.build_action_command('27', 'RESTART', None)
                    ]
                    self.rebooting = True
        # battle_heros = self.search_team_battle(state_info)
        # if len(battle_heros) > 0:
        #     print("team battle heros", ';'.join(battle_heros))
        #
        # heros_need_model = []
        # for hero in self.heros:
        #     # 判断是否英雄死亡
        #     if prev_state_info is not None:
        #         dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero)
        #         if dead == 1 and hero not in self.dead_heroes:
        #             self.dead_heroes.append(hero)
        #
        #     # 复活的英雄不要再去参团
        #     if hero in self.dead_heroes:
        #         continue
        #
        #     # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE)
        #     if hero not in battle_heros:
        #         # 移动到团战点附近,添加部分随机
        #         rdm_delta_x = randint(0, 1000)
        #         rdm_delta_z = randint(0, 1000)
        #         tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z)
        #         move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None)
        #         mov_cmd_str = StateUtil.build_command(move_action)
        #         response_strs.append(mov_cmd_str)
        #     else:
        #         # 启动模型决策
        #         heros_need_model.append(hero)
        #
        # if len(heros_need_model) > 0:
        #     action_cmds = self.get_model_actions(state_info, heros_need_model)
        #     for action_cmd in action_cmds:
        #         action_str = StateUtil.build_command(action_cmd)
        #         response_strs.append(action_str)
        #         state_info.add_action(action_cmd)

        #TODO 记录模型输出,用于后续训练

        # 返回结果给游戏端
        rsp_obj = {
            "ID": state_info.battleid,
            "tick": state_info.tick,
            "cmd": response_strs
        }
        rsp_str = JSON.dumps(rsp_obj)
        print('battle_id', self.battle_id, 'response', rsp_str)
        return rsp_str
Пример #14
0
    def build_response(self, state_info, prev_state_info, line_model, hero_names=None):

        battle_id = state_info.battleid
        tick = state_info.tick

        if tick >= 139062:
            db = 1

        action_strs=[]

        if hero_names is None:
            hero_names = [hero.hero_name for hero in state_info.heros]
        for hero_name in hero_names:
            hero = state_info.get_hero(hero_name)
            prev_hero = prev_state_info.get_hero(hero.hero_name) if prev_state_info is not None else None

            # 检查是否重启游戏
            # 线上第一个塔被摧毁


            # 如果有可以升级的技能,优先升级技能3
            skills = StateUtil.get_skills_can_upgrade(hero)
            if len(skills) > 0:
                skillid = 3 if 3 in skills else skills[0]
                update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None)
                update_str = StateUtil.build_command(update_cmd)
                action_strs.append(update_str)

            # 检查周围状况
            near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
            near_enemy_units = StateUtil.get_nearby_enemy_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS)
            nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS + 3)

            # 回城相关逻辑
            # 如果在回城中且没有被打断则继续回城,什么也不用返回
            if prev_hero is not None:
                if self.hero_strategy[hero.hero_name] == ActionEnum.town_ing and prev_hero.hp <= hero.hp \
                        and not StateUtil.if_hero_at_basement(hero):
                    if not hero.skills[6].canuse:
                        print('回城中,继续回城')
                        continue
                    else:
                        print('回城失败')

            if hero.hp <= 0:
                self.hero_strategy[hero.hero_name] = None
                continue

            # 处在少血状态是,且周围没有地方单位的情况下选择回城
            # if len(near_enemy_heroes) == 0 and len(near_enemy_units) == 0 and nearest_enemy_tower is None:
            #     if hero.hp/float(hero.maxhp) < LineTrainer.TOWN_HP_THRESHOLD:
            #         print('策略层:回城')
            #         # 检查英雄当前状态,如果在回城但是上一帧中受到了伤害,则将状态设置为正在回城,开始回城
            #         if self.hero_strategy[hero.hero_name] == ActionEnum.town_ing:
            #             if prev_hero.hp > hero.hp:
            #                 town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None)
            #                 action_str = StateUtil.build_command(town_action)
            #                 action_strs.append(action_str)
            #         # 检查英雄当前状态,如果不在回城,则将状态设置为正在回城,开始回城
            #         elif self.hero_strategy[hero.hero_name] != ActionEnum.town_ing:
            #             self.hero_strategy[hero.hero_name] = ActionEnum.town_ing
            #             town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None)
            #             action_str = StateUtil.build_command(town_action)
            #             action_strs.append(action_str)
            #
            #         # 无论上面怎么操作,玩家下面的动作应该都是在回城中,所以跳过其它的操作
            #         continue

            # 处在泉水之中的时候设置策略层为吃线
            if StateUtil.if_hero_at_basement(hero):
                if hero.hp < hero.maxhp:
                    continue

            # 撤退逻辑
            # TODO 甚至可以使用移动技能移动
            if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.retreat:
                dist = StateUtil.cal_distance(hero.pos, self.retreat_pos)
                if dist <= 2:
                    print('到达撤退点附近')
                    self.hero_strategy[hero.hero_name] = None
                elif prev_hero is not None and prev_hero.pos.to_string() == hero.pos.to_string():
                    print('英雄卡住了,取消撤退')
                    self.hero_strategy[hero.hero_name] = None
                else:
                    print('仍然在撤退 ' + str(dist))
                    continue

            # 开始根据策略决定当前的行动
            # 对线情况下,首先拿到兵线,朝最前方的兵线移动
            # 如果周围有危险(敌方单位)则启动对线模型
            # 如果周围有小兵或者塔,需要他们都是在指定线上的小兵或者塔
            line_index = 1
            near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index)
            nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index)
            if len(near_enemy_units_in_line) == 0 and len(nearest_enemy_tower_in_line) == 0 and (len(near_enemy_heroes) == 0 or
                    StateUtil.if_in_line(hero, line_index, 4000) == -1):
                self.hero_strategy[hero.hero_name] = ActionEnum.line_1
                # print("策略层:因为附近没有指定兵线的敌人所以开始吃线 " + hero.hero_name)
                # 跟兵线
                front_soldier = StateUtil.get_frontest_soldier_in_line(state_info, line_index, hero.team)
                if front_soldier is None:
                    action_str = StateUtil.build_action_command(hero.hero_name, 'HOLD', {})
                    action_strs.append(action_str)
                else:
                    # 得到最前方的兵线位置
                    move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, front_soldier.pos, None, None, None, None)
                    action_str = StateUtil.build_command(move_action)
                    action_strs.append(action_str)
            else:
                # 使用模型进行决策
                # print("使用对线模型决定英雄%s的行动" % hero.hero_name)
                self.hero_strategy[hero.hero_name] = ActionEnum.line_model
                enemies = []
                enemies.extend((hero.hero_name for hero in near_enemy_heroes))
                enemies.extend((unit.unit_name for unit in near_enemy_units))
                if nearest_enemy_tower is not None:
                    enemies.append(nearest_enemy_tower.unit_name)
                # print('对线模型决策,因为周围有敌人 ' + ' ,'.join(enemies))

                # 目前对线只涉及到两名英雄
                rival_hero = '28' if hero.hero_name == '27' else '27'
                action = line_model.get_action(prev_state_info, state_info, hero.hero_name, rival_hero)
                action_str = StateUtil.build_command(action)
                action_strs.append(action_str)

                # 如果是要求英雄施法回城,更新英雄状态,这里涉及到后续多帧是否等待回城结束
                if action.action == CmdActionEnum.CAST and int(action.skillid) == 6:
                    print("英雄%s释放了回城" % hero_name)
                    self.hero_strategy[hero.hero_name] = ActionEnum.town_ing

                # 如果是选择了撤退,进行特殊标记,会影响到后续的行为
                if action.action == CmdActionEnum.RETREAT:
                    print("英雄%s释放了撤退,撤退点为%s" % (hero_name, action.tgtpos.to_string()))
                    self.hero_strategy[hero.hero_name] = ActionEnum.retreat
                    self.retreat_pos = action.tgtpos

                # 保存action信息到状态帧中
                state_info.add_action(action)
        return action_strs
Пример #15
0
    def guess_player_action(prev_state_info, state_info, next_state_info,
                            next_next_state_info, hero_name, rival_hero_name):
        #针对每一帧,结合后一帧信息,判断英雄在该帧的有效操作
        #仅对于一对一线上模型有效
        #技能>攻击>走位
        #技能:检查cd和mp变化,hitstateinfo,attackstateinfo,dmgstateinifo,回推pos,fwd,tgt,selected
        #攻击:检查hit,damage,attack
        #检查pos变化

        prev_hero = prev_state_info.get_hero(hero_name)
        prev_viral_hero = prev_state_info.get_hero(rival_hero_name)
        current_hero = state_info.get_hero(hero_name)

        hero_attack_info = state_info.get_hero_attack_info(hero_name)
        if hero_attack_info is not None:
            skill = hero_attack_info.skill

            # 看十位来决定技能id
            skillid = int(hero_attack_info.skill % 100 / 10)
            tgtid = int(hero_attack_info.defer) if (
                hero_attack_info.defer is not None
                and hero_attack_info.defer != 'None') else 0
            tgtpos = hero_attack_info.tgtpos

            # 回城
            if hero_attack_info.skill == 10000:
                action = CmdAction(hero_name, CmdActionEnum.CAST, 6, None,
                                   None, None, None, 49, None)
                return action
            # 普攻,不会以自己为目标
            output_idx = None
            if skillid == 0:
                # 打塔
                if StateUtil.if_unit_tower(tgtid):
                    output_idx = 8
                # 普通攻击敌方英雄
                elif tgtid == prev_viral_hero.hero_name:  # 普通攻击敌方英雄
                    output_idx = 9
                # 普通攻击敌方小兵
                elif tgtid != 0:
                    creeps = StateUtil.get_nearby_enemy_units(
                        prev_state_info, hero_name)
                    n = len(creeps)
                    for i in range(n):
                        if creeps[i].unit_name == str(tgtid):
                            output_idx = i + 10
                # attacinfo里没有目标,从hit里找目标
                elif tgtid == 0:
                    # hitinfo 和 dmginfo都有延迟,尤其是超远距离的攻击技能
                    hit_infos = state_info.get_hero_hit_with_skill(
                        hero_name, skill)
                    hit_infos.extend(
                        next_state_info.get_hero_hit_with_skill(
                            hero_name, skill))
                    if len(hit_infos) > 0:
                        # 首先检查是否敌方英雄被击中,这种优先级最高
                        if rival_hero_name in [hit.tgt for hit in hit_infos]:
                            output_idx = 9
                        else:
                            # 找到被攻击者中血量最少的,认为是目标对象
                            tgtid_list = [
                                state_info.get_obj(hit.tgt)
                                for hit in hit_infos
                            ]
                            tgt_unit = min(tgtid_list, key=lambda x: x.hp)

                            if StateUtil.if_unit_tower(tgt_unit.unit_name):
                                output_idx = 8
                            else:
                                # 从英雄附近的小兵中,检索它的编号
                                # 注:极端情况下有可能丢失,比如在这0.5秒钟内,英雄接近了小兵并进行了攻击
                                # 扩大搜索的范围
                                creeps = StateUtil.get_nearby_enemy_units(
                                    prev_state_info,
                                    hero_name,
                                    max_distance=StateUtil.ATTACK_HERO_RADIUS +
                                    2)
                                for i in range(len(creeps)):
                                    if creeps[i].unit_name == tgtid:
                                        output_idx = i + 10
                if output_idx is not None:
                    action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0,
                                       tgtid, tgtpos, None, None, output_idx,
                                       None)
                    return action
            # 使用技能,不考虑以敌方塔为目标(若真以敌方塔为目标则暂时先不管吧,现在的两个英雄技能都对建筑无效)
            # TODO 暂时忽略技能为方向/范围型并且放空的情况(部分技能无任何目标,tgt为0)。这种情况下应该会有个pos记录释放点,后续可以考虑如何学习
            else:
                # 对自身施法
                if tgtid == int(
                        hero_name
                ):  # or (tgtid=='0' and Replayer.skill_tag[skillid]==1):
                    tgtpos = prev_hero.pos
                    output_idx = 8 + skillid * 10
                # 对敌方英雄施法
                elif tgtid == int(rival_hero_name):
                    tgtpos = prev_viral_hero.pos
                    output_idx = 9 + skillid * 10
                # 对小兵施法
                elif tgtid != 0 and not StateUtil.if_unit_tower(tgtid):
                    creeps = StateUtil.get_nearby_enemy_units(
                        prev_state_info, hero_name)
                    n = len(creeps)
                    for i in range(n):
                        if creeps[i].unit_name == str(tgtid):
                            output_idx = i + skillid * 10 + 10
                # attacinfo里没有目标,从hit里找目标
                elif tgtid == 0:
                    # 远程技能的伤害延迟可能会比较长
                    hit_infos = state_info.get_hero_hit_with_skill(
                        hero_name, skill)
                    hit_infos.extend(
                        next_state_info.get_hero_hit_with_skill(
                            hero_name, skill))
                    hit_infos.extend(
                        next_next_state_info.get_hero_hit_with_skill(
                            hero_name, skill))

                    if len(hit_infos) > 0:
                        # 首先检查是否敌方英雄被击中,这种优先级最高
                        if rival_hero_name in [hit.tgt for hit in hit_infos]:
                            tgtid = rival_hero_name
                            output_idx = 9 + skillid * 10
                        else:
                            # 找到被攻击者中血量最少的,认为是目标对象
                            tgtid_list = [
                                state_info.get_obj(hit.tgt)
                                for hit in hit_infos
                            ]
                            tgt_unit = min(tgtid_list, key=lambda x: x.hp)

                            # 从英雄附近的小兵中,检索它的编号
                            # 注:极端情况下有可能丢失,比如在这0.5秒钟内,英雄接近了小兵并进行了攻击
                            creeps = StateUtil.get_nearby_enemy_units(
                                prev_state_info, hero_name)
                            for i in range(len(creeps)):
                                if creeps[i].unit_name == tgt_unit.unit_name:
                                    tgtid = creeps[i].unit_name
                                    output_idx = i + 10 + skillid * 10

                # 组装结果
                if output_idx is not None:
                    action = CmdAction(hero_name, CmdActionEnum.CAST, skillid,
                                       tgtid, tgtpos, None, None, output_idx,
                                       None)
                    return action
                # 任然没有hit,技能空放
                if tgtid == 0:
                    # attackinfo里没有攻击目标id,只有坐标,根据位置找最近的目标作为输出
                    if tgtpos != None:
                        search_radius = 1
                        # 首先寻找目标为对方英雄, 目前,如果在范围内有敌人英雄,选第一个作为主目标
                        nearby_rival_heros = StateUtil.get_nearby_enemy_heros(
                            prev_state_info, hero_name, search_radius)
                        if len(nearby_rival_heros) > 0:
                            tgtid = nearby_rival_heros[0].hero_name
                            output_idx = 9 + skillid * 10
                        else:
                            # 其次检查是否可以释放给自己
                            skill_info = SkillUtil.get_skill_info(
                                prev_hero.cfg_id, skillid)
                            if skill_info is not None:
                                if skill_info.cast_target != SkillTargetEnum.rival:
                                    tgtid = hero_name
                                    output_idx = 8 + skillid * 10
                                # 最后检查是否可以释放给小兵
                                else:
                                    nearby_soldiers = StateUtil.get_nearby_enemy_units(
                                        prev_state_info, hero_name,
                                        search_radius)
                                    if len(nearby_soldiers) > 0:
                                        target_unit = min(nearby_soldiers,
                                                          key=lambda u: u.hp)
                                        for i in range(len(nearby_soldiers)):
                                            if nearby_soldiers[
                                                    i].unit_name == target_unit.unit_name:
                                                tgtid = nearby_soldiers[
                                                    i].unit_name
                                                output_idx = i + 10 + skillid * 10
                    # 组装结果
                    if output_idx is not None:
                        action = CmdAction(hero_name, CmdActionEnum.CAST,
                                           skillid, tgtid, tgtpos, None, None,
                                           output_idx, None)
                        return action
                    else:  # 真的技能空放了
                        action = CmdAction(hero_name, CmdActionEnum.HOLD, None,
                                           None, prev_hero.pos, None, None, 48,
                                           None)
                        return action
                action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                                   prev_hero.pos, None, None, 48, None)
                return action
        # 没有角色进行攻击或使用技能,英雄在移动或hold
        if current_hero.pos.x != prev_hero.pos.x or current_hero.pos.z != prev_hero.pos.z or current_hero.pos.y != prev_hero.pos.y:  # 移动
            fwd = current_hero.pos.fwd(prev_hero.pos)
            [fwd, output_index] = Replayer.get_closest_fwd(fwd)
            action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, None,
                               fwd, None, output_index, None)
            return action
        else:  # hold
            action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None,
                               prev_hero.pos, None, None, 48, None)
            return action
Пример #16
0
def start_model_process(battle_id_num, init_signal, train_queue, action_queue,
                        results, save_batch, save_dir, lock):
    model_1, model1_save_header, model_2, model2_save_header = HttpUtil.build_models_ppo(
        save_dir,
        # model1_path=None,
        # model2_path=None,
        model1_path=
        '/Users/sky4star/Github/zy2go/data/20171218/model_2017-12-14192241.120603/line_model_1_v460/model',
        model2_path=
        '/Users/sky4star/Github/zy2go/data/20171218/model_2017-12-14192241.120603/line_model_2_v460/model',
        # model1_path='/Users/sky4star/Github/zy2go/data/20171204/model_2017-12-01163333.956214/line_model_1_v430/model',
        # model2_path='/Users/sky4star/Github/zy2go/data/20171204/model_2017-12-01163333.956214/line_model_2_v430/model',
        # model1_path='/Users/sky4star/Github/zy2go/data/all_trained/battle_logs/trained/171127/line_model_1_v380/model', #'/Users/sky4star/Github/zy2go/data/20171115/model_2017-11-14183346.557007/line_model_1_v730/model', #'/Users/sky4star/Github/zy2go/battle_logs/model_2017-11-17123006.954281/line_model_1_v10/model',
        # model2_path='/Users/sky4star/Github/zy2go/data/all_trained/battle_logs/trained/171127/line_model_2_v380/model', #'/Users/sky4star/Github/zy2go/data/20171121/model_2017-11-20150651.200368/line_model_2_v120/model',
        schedule_timesteps=1000000,
        model1_initial_p=0.5,
        model1_final_p=0.1,
        model1_gamma=0.93,
        model2_initial_p=0.5,
        model2_final_p=0.1,
        model2_gamma=0.93)
    init_signal.set()
    print('模型进程启动')

    time_cache = []
    num_cache = []

    o4r_list_model1 = {}
    o4r_list_model2 = {}
    done_signals = {}
    while True:
        try:
            # 从训练队列中提取请求
            # 只有当训练集中有所有的战斗的数据时候才会开始训练
            with lock:
                if not train_queue.empty():
                    (battle_id, train_model_name, o4r,
                     batch_size) = train_queue.get()
                    print('model_process', battle_id, train_model_name,
                          'receive train signal, batch size', batch_size)
                    if train_model_name == ModelProcess.NAME_MODEL_1:
                        o4r_list_model1[battle_id] = o4r
                        print(
                            'model_process model1 train collection', ';'.join(
                                (str(k) for k in o4r_list_model1.keys())))
                    elif train_model_name == ModelProcess.NAME_MODEL_2:
                        o4r_list_model2[battle_id] = o4r
                        print(
                            'model_process model2 train collection', ';'.join(
                                (str(k) for k in o4r_list_model2.keys())))

            trained = False
            if len(o4r_list_model1) >= battle_id_num and len(
                    o4r_list_model2) >= battle_id_num:
                print('model_process1', train_model_name, 'begin to train')
                begin_time = time.time()
                model_1.replay(o4r_list_model1.values(), batch_size)
                o4r_list_model1.clear()

                # 由自己来决定什么时候缓存模型
                if_save_model(model_1, model1_save_header, save_batch)

                print('model_process2', train_model_name, 'begin to train')
                model_2.replay(o4r_list_model2.values(), batch_size)
                o4r_list_model2.clear()
                end_time = time.time()
                delta_millionseconds = (end_time - begin_time) * 1000

                print('model train time', delta_millionseconds)

                # 由自己来决定什么时候缓存模型
                if_save_model(model_2, model2_save_header, save_batch)

                trained = True

            if trained:
                with lock:
                    print('model process, add trained events')
                    restartCmd = CmdAction(ModelProcess.NAME_MODEL_1,
                                           CmdActionEnum.RESTART, 0, None,
                                           None, None, None, None, None)
                    for battle_id in range(1, battle_id_num + 1):
                        # 给每个客户端添加一个训练结束的通知
                        done_signals[(battle_id,
                                      ModelProcess.NAME_MODEL_1)] = (
                                          restartCmd, None, None)

            # 从行为队列中拿请求
            # 等待在这里(阻塞),加上等待超时确保不会出现只有个train信号进来导致死锁的情况
            state_inputs = []
            if not action_queue.empty():
                # 考虑到目前的并发情况,没有必要批量读取所有等待中的请求,因为基本只有一个等待的请求
                # state_inputs是个数组,可能含有多个请求(MCTS下)
                (battle_id, act_model_name,
                 state_inputs) = action_queue.get(timeout=1)

                with lock:
                    # 如果上一条还没有消耗掉,则忽略本条请求,这种情况应该只会出现在训练后
                    if (battle_id, act_model_name) in done_signals:
                        results[(battle_id, act_model_name)] = done_signals[(
                            battle_id, act_model_name)]
                        del done_signals[(battle_id, act_model_name)]
                        continue

            if len(state_inputs) == 0:
                continue

            begin_time = time.time()
            if act_model_name == ModelProcess.NAME_MODEL_1:
                actions_list, explor_value, vpreds = model_1.get_actions(
                    state_inputs)
            elif act_model_name == ModelProcess.NAME_MODEL_2:
                actions_list, explor_value, vpreds = model_2.get_actions(
                    state_inputs)
            end_time = time.time()
            delta_millionseconds = (end_time - begin_time) * 1000
            time_cache.append(delta_millionseconds)
            num_cache.append(len(state_inputs))
            if len(time_cache) >= 1000:
                print("model get_action average calculate time(ms)",
                      sum(time_cache) // float(len(time_cache)),
                      sum(num_cache) / float(len(num_cache)))
                time_cache = []
                num_cache = []

            with lock:
                results[(battle_id, act_model_name)] = (actions_list,
                                                        explor_value, vpreds)
        except queue.Empty:
            continue
        except Exception as e:
            type, value, traceback = sys.exc_info()
            traceback.print_exc()