def get_attack_unit_action(state_info, hero_name, unit_name, skill_id): creeps = StateUtil.get_nearby_enemy_units(state_info, hero_name) unit_idx = [c.unit_name for c in creeps].index(unit_name) action_idx = unit_idx + 10 * skill_id + 10 if skill_id >= 1: action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, unit_name, None, None, None, action_idx, None) else: tgtpos = creeps[unit_idx].pos hero = state_info.get_hero(hero_name) fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id, unit_name, tgtpos, fwd, None, action_idx, None) return action
def get_attack_hero_action(state_info, hero_name, rival_hero_name, skill_id): action_idx = 10 * skill_id + 9 if skill_id == 0: action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, rival_hero_name, None, None, None, action_idx, None) else: tgtpos = state_info.get_hero(rival_hero_name).pos hero = state_info.get_hero(hero_name) fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id, rival_hero_name, tgtpos, fwd, None, action_idx, None) return action
def get_self_cast_action(state_info, hero_name, rival_hero_name, skill_id): action_idx = 10 * skill_id + 8 hero = state_info.get_hero(hero_name) tgtpos = hero.pos fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skill_id, rival_hero_name, tgtpos, fwd, None, action_idx, None) return action
def get_action(selected, state_info, hero, hero_name, rival_hero, revert=False): if selected < 8: # move fwd = StateUtil.mov(selected, revert) tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, selected, None) return action elif selected < 18: # 对敌英雄,塔,敌小兵1~8使用普攻 if selected == 8: # 敌方塔 tower = StateUtil.get_nearest_enemy_tower( state_info, hero_name, StateUtil.ATTACK_UNIT_RADIUS) tgtid = tower.unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected == 9: # 敌方英雄 tgtid = rival_hero action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action else: # 小兵 creeps = StateUtil.get_nearby_enemy_units( state_info, hero_name) n = selected - 10 tgtid = creeps[n].unit_name action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, None, None, None, selected, None) return action elif selected < 48: # skill skillid = int((selected - 18) / 10 + 1) [tgtid, tgtpos] = LineModel.choose_skill_target( selected - 18 - (skillid - 1) * 10, state_info, skillid, hero_name, hero.pos, rival_hero) if tgtpos is None: fwd = None else: fwd = tgtpos.fwd(hero.pos) action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid, tgtpos, fwd, None, selected, None) return action elif selected == 48: # hold # print("轮到了48号行为-hold") action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, hero.pos, None, None, 48, None) return action else: # 撤退 retreat_pos = StateUtil.get_retreat_pos(state_info, hero, line_index=1) action = CmdAction(hero_name, CmdActionEnum.RETREAT, None, None, retreat_pos, None, None, selected, None) return action
def upgrade_skills(self, state_info, hero_name): # 如果有可以升级的技能,优先升级技能3 hero = state_info.get_hero(hero_name) skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: skillid = 3 if 3 in skills else skills[0] update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None) update_str = StateUtil.build_command(update_cmd) return update_str
def get_or_insert_reward(self, hero_name): for action in self.actions: if action.hero_name == hero_name: return action.reward # 如果没有的话,添加一个空行为 empty_action = CmdAction(hero_name, CmdActionEnum.EMPTY, None, None, None, None, None, None, 0) self.add_action(empty_action) return empty_action.reward
def policy_move_retreat(hero_info): if hero_info.team == 0: mov_idx = 6 else: mov_idx = 0 fwd = StateUtil.mov(mov_idx) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15, hero_info.pos.y + fwd.y * 15, hero_info.pos.z + fwd.z * 15) action = CmdAction(hero_info.hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, mov_idx, None) return action
def get_attack_tower_action(hero_name, hero_info, tower_unit): # 因为目前模型中侦测塔的范围较大,可能出现攻击不到塔的情况 # 所以需要先接近塔 # 使用tgtpos,而不是fwd。move命令中fwd坐标系比较奇怪 if StateUtil.cal_distance( hero_info.pos, tower_unit.pos) > StateUtil.ATTACK_UNIT_RADIUS: fwd = tower_unit.pos.fwd(hero_info.pos) [fwd, output_index] = Replayer.get_closest_fwd(fwd) tgtpos = PosStateInfo(hero_info.pos.x + fwd.x * 15, hero_info.pos.y + fwd.y * 15, hero_info.pos.z + fwd.z * 15) print("朝塔移动,", hero_name, "hero_pos", hero_info.pos.to_string(), "tower_pos", tower_unit.pos.to_string(), "fwd", fwd.to_string(), "output_index", output_index) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, output_index, None) else: action_idx = 11 action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tower_unit.unit_name, None, None, None, action_idx, None) return action
def decode(obj): battleid = obj['wldstatic']['ID'] tick = obj['wldruntime']['tick'] if 'tick' in obj['wldruntime'] else -1 # 忽略了第一帧中的兵线信息 # 貌似从27-36是英雄 heros = [] hero_id = 27 while True: hero_info = StateInfo.decode_hero(obj, hero_id) if hero_info is not None: heros.append(hero_info) hero_id += 1 else: break # 最后一次递增需要回滚 hero_id -= 1 # 其它单位 units = [] for key in obj.keys(): if key.isdigit(): key1 = int(key) # todo: in my python3 version, there is a type error, so I use a int key1 replace key if key1 < 27 or key1 > hero_id: units.append(UnitStateInfo.decode(obj[key], key)) attack_infos = [] if 'attackinfos' in obj: for ai in obj['attackinfos']: attack_infos.append(AttackStateInfo.decode(ai)) hit_infos = [] if 'hitinfos' in obj: for hi in obj['hitinfos']: hit_infos.append(HitStateInfo.decode(hi)) dmg_infos = [] if 'dmginfos' in obj: for di in obj['dmginfos']: dmg_infos.append(DmgStateInfo.decode(di)) actions = [] if 'actions' in obj: for ac in obj['actions']: actions.append(CmdAction.decode(ac)) return StateInfo(battleid, tick, heros, units, attack_infos, hit_infos, dmg_infos, actions, None)
def buy_equip(state_info, hero_name): hero = state_info.get_hero(hero_name) if hero.cfg_id in EquipUtil.equip_plans: plan = EquipUtil.equip_plans[hero.cfg_id] # 按顺序查找玩家还不具有的装备 owned_equips = [int(item.id) for item in hero.equips] for equip_id in plan: if equip_id not in owned_equips: equip_info = EquipUtil.get_equip_info(equip_id) if equip_info.buy_price <= hero.gold: print(state_info.battleid, hero_name, '购买道具', equip_id, '当前拥有', ','.join(str(e) for e in owned_equips), '金币', hero.gold, '价格', equip_info.buy_price, '名称', equip_info.name) return CmdAction(hero_name, CmdActionEnum.BUY, None, None, None, None, equip_id, None, None) else: # 如果钱不够直接返回空,而不是购买下一件商品 return None return None
def build_response(self, state_cache, state_index, hero_name): action_strs=[] restart = False # 对于模型,分析当前帧的行为 if self.real_hero != hero_name: state_info = state_cache[state_index] prev_hero = state_cache[state_index-1].get_hero(hero_name) if len(state_cache) >= 2 is not None else None # 如果有真实玩家,我们需要一些历史数据,所以分析3帧前的行为 elif len(state_cache) > 3: state_info = state_cache[state_index-3] next1_state_info = state_cache[state_index-2] next2_state_info = state_cache[state_index-1] next3_state_info = state_cache[state_index] else: return action_strs, False # 决定是否购买道具 buy_action = EquipUtil.buy_equip(state_info, hero_name) if buy_action is not None: buy_str = StateUtil.build_command(buy_action) action_strs.append(buy_str) # 如果有可以升级的技能,优先升级技能3 hero = state_info.get_hero(hero_name) skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: skillid = 3 if 3 in skills else skills[0] update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None) update_str = StateUtil.build_command(update_cmd) action_strs.append(update_str) # 回城相关逻辑 # 如果在回城中且没有被打断则继续回城,什么也不用返回 if prev_hero is not None: if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.town_ing \ and prev_hero.hp <= hero.hp \ and not StateUtil.if_hero_at_basement(hero): if not hero.skills[6].canuse: print(self.battle_id, hero.hero_name, '回城中,继续回城') return action_strs, False else: print(self.battle_id, hero.hero_name, '回城失败') town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None) action_str = StateUtil.build_command(town_action) action_strs.append(action_str) return action_strs, False if hero.hp <= 0: self.hero_strategy[hero.hero_name] = None return action_strs, False # # 补血逻辑 # if prev_hero is not None and hero.hero_name in self.hero_strategy and self.hero_strategy[ # hero.hero_name] == ActionEnum.hp_restore: # if StateUtil.cal_distance2(prev_hero.pos, hero.pos) < 100: # print(self.battle_id, hero_name, '到达补血点', '血量增长', hero.hp - prev_hero.hp) # del self.hero_strategy[hero_name] # if hero == self.model1_hero: # self.model1_hp_restore = time.time() # else: # self.model2_hp_restore = time.time() # 撤退逻辑 # TODO 甚至可以使用移动技能移动 if prev_hero is not None and hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.retreat_to_town: if StateUtil.cal_distance2(prev_hero.pos, hero.pos) < 100: print(self.battle_id, hero_name, '开始回城') self.hero_strategy[hero.hero_name] = ActionEnum.town_ing town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None) action_str = StateUtil.build_command(town_action) action_strs.append(action_str) else: print(self.battle_id, hero_name, '还在撤退中', StateUtil.cal_distance2(prev_hero.pos, hero.pos)) return action_strs, False # 如果击杀了对方英雄,扫清附近小兵之后则启动撤退回城逻辑 if prev_hero is not None: if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.town_ing and prev_hero.hp <= hero.hp \ and not StateUtil.if_hero_at_basement(hero): if not hero.skills[6].canuse: return action_strs, False else: town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None) action_str = StateUtil.build_command(town_action) action_strs.append(action_str) if hero.hp <= 0: self.hero_strategy[hero.hero_name] = None return action_strs, False # 检查周围状况 near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) near_enemy_units = StateUtil.get_nearby_enemy_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS + 3) nearest_friend_units = StateUtil.get_nearby_friend_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) line_index = 1 near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index) nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index) # 如果击杀对面英雄就回城补血。整体逻辑为,周围没有兵的情况下启动撤退逻辑,到达撤退地点之后启动回城。补满血之后再跟兵出来 # 处在泉水之中的时候设置策略层为吃线 if len(near_enemy_units_in_line) == 0 and len(near_enemy_heroes) == 0: if (hero_name == self.model1_hero and self.model2_just_dead == 1 and not StateUtil.if_hero_at_basement(hero)) \ or (hero_name == self.model2_hero and self.model1_just_dead == 1 and not StateUtil.if_hero_at_basement(hero)): if hero.hp / float(hero.maxhp) > 0.8: if hero_name == self.model1_hero: self.model2_just_dead = 0 else: self.model1_just_dead = 0 else: print(self.battle_id, hero_name, '选择撤退') self.hero_strategy[hero_name] = ActionEnum.retreat_to_town retreat_pos = StateUtil.get_retreat_pos(state_info, hero, line_index=1) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, retreat_pos, None, None, -1, None) action_str = StateUtil.build_command(action) action_strs.append(action_str) if hero_name == self.model1_hero: self.model2_just_dead = 0 else: self.model1_just_dead = 0 return action_strs, False if StateUtil.if_hero_at_basement(hero): if hero_name == self.model1_hero: self.model2_just_dead = 0 else: self.model1_just_dead = 0 if hero.hp < hero.maxhp: if hero_name in self.hero_strategy: del self.hero_strategy[hero_name] return action_strs, False # # 残血并且周围没有敌人的情况下,可以去塔后吃加血 # if hero.hp / float(hero.maxhp) < 0.9 and hero not in self.hero_strategy: # print('补血条件', self.battle_id, hero_name, time.time(), self.model1_hp_restore, self.model2_hp_restore) # if hero == self.model1_hero and time.time() - self.model1_hp_restore > LineTrainerPPO.HP_RESTORE_GAP: # print(self.battle_id, hero_name, '选择加血') # self.hero_strategy[hero_name] = ActionEnum.hp_restore # elif hero == self.model2_hero and time.time() - self.model2_hp_restore > LineTrainerPPO.HP_RESTORE_GAP: # print(self.battle_id, hero_name, '选择加血') # self.hero_strategy[hero_name] = ActionEnum.hp_restore # # if self.hero_strategy[hero_name] == ActionEnum.hp_restore: # restore_pos = StateUtil.get_hp_restore_place(state_info, hero) # action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, restore_pos, None, None, -1, None) # action_str = StateUtil.build_command(action) # action_strs.append(action_str) # return action_strs, False # 开始根据策略决定当前的行动 # 对线情况下,首先拿到兵线,朝最前方的兵线移动 # 如果周围有危险(敌方单位)则启动对线模型 # 如果周围有小兵或者塔,需要他们都是在指定线上的小兵或者塔 if (len(near_enemy_units_in_line) == 0 and len(nearest_enemy_tower_in_line) == 0 and ( len(near_enemy_heroes) == 0 or StateUtil.if_in_line(hero, line_index, 4000) == -1) ) or\ (len(nearest_friend_units) == 0 and len(near_enemy_units_in_line) == 0 and len(near_enemy_heroes) == 0 and len(nearest_enemy_tower_in_line) == 1): # 跟兵线或者跟塔,优先跟塔 self.hero_strategy[hero.hero_name] = ActionEnum.line_1 # print("策略层:因为附近没有指定兵线的敌人所以开始吃线 " + hero.hero_name) front_soldier = StateUtil.get_frontest_soldier_in_line(state_info, line_index, hero.team) first_tower = StateUtil.get_first_tower(state_info, hero) if front_soldier is None or (hero.team == 0 and first_tower.pos.x > front_soldier.pos.x) or (hero.team == 1 and first_tower.pos.x < front_soldier.pos.x): # 跟塔,如果塔在前面的话 follow_tower_pos = StateUtil.get_tower_behind(first_tower, hero, line_index=1) move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, follow_tower_pos, None, None, None, None) action_str = StateUtil.build_command(move_action) action_strs.append(action_str) else: # 得到最前方的兵线位置 move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, front_soldier.pos, None, None, None, None) action_str = StateUtil.build_command(move_action) action_strs.append(action_str) else: if self.real_hero != hero_name: # 使用模型进行决策 # print("使用对线模型决定英雄%s的行动" % hero.hero_name) self.hero_strategy[hero.hero_name] = ActionEnum.line_model # 目前对线只涉及到两名英雄 rival_hero = '28' if hero.hero_name == '27' else '27' action, explorer_ratio, action_ratios = self.get_action(state_info, hero_name, rival_hero) # 考虑使用固定策略 # 如果决定使用策略,会连续n条行为全都采用策略(比如确保对方残血时候连续攻击的情况) # 如果策略返回为空则表示策略中断 if self.policy_ratio > 0 and ( 0 < self.cur_policy_act_idx_map[hero_name] < self.policy_continue_acts or random.uniform(0, 1) <= self.policy_ratio ): policy_action = LineTrainerPolicy.choose_action(state_info, action_ratios, hero_name, rival_hero, near_enemy_units, nearest_friend_units) if policy_action is not None: policy_action.vpred = action.vpred action = policy_action self.cur_policy_act_idx_map[hero_name] += 1 print("英雄 " + hero_name + " 使用策略,策略行为计数 idx " + str(self.cur_policy_act_idx_map[hero_name])) if self.cur_policy_act_idx_map[hero_name] >= self.policy_continue_acts: self.cur_policy_act_idx_map[hero_name] = 0 else: # 策略中断,清零 if self.cur_policy_act_idx_map[hero_name] > 0: print("英雄 " + hero_name + " 策略中断,清零") self.cur_policy_act_idx_map[hero_name] = 0 action_str = StateUtil.build_command(action) action_strs.append(action_str) # 如果是要求英雄施法回城,更新英雄状态,这里涉及到后续多帧是否等待回城结束 if action.action == CmdActionEnum.CAST and int(action.skillid) == 6: print("英雄%s释放了回城" % hero_name) self.hero_strategy[hero.hero_name] = ActionEnum.town_ing # 如果是选择了撤退,进行特殊标记,会影响到后续的行为 if action.action == CmdActionEnum.RETREAT: print("英雄%s释放了撤退,撤退点为%s" % (hero_name, action.tgtpos.to_string())) self.hero_strategy[hero.hero_name] = ActionEnum.retreat self.retreat_pos = action.tgtpos # 如果批量训练结束了,这时候需要清空未使用的训练集,然后重启游戏 if action.action == CmdActionEnum.RESTART: restart = True else: # 保存action信息到状态帧中 state_info.add_action(action) else: # 还是需要模型来计算出一个vpred rival_hero = '28' if hero.hero_name == '27' else '27' action, explorer_ratio, action_ratios = self.get_action(state_info, hero_name, rival_hero) # 推测玩家的行为 guess_action = Replayer.guess_player_action(state_info, next1_state_info, next2_state_info, next3_state_info, hero_name, rival_hero) guess_action.vpred = action.vpred action_str = StateUtil.build_command(guess_action) action_str['tick'] = state_info.tick print('猜测玩家行为为:' + JSON.dumps(action_str)) # 保存action信息到状态帧中 state_info.add_action(guess_action) return action_strs, restart
def get_action_cmd(action_list, unaval_list, recommmend_list, state_info, hero_name, friends, opponents, revert=False): hero = state_info.get_hero(hero_name) found = False # 如果有推荐的行为,只从中挑选 if len(recommmend_list) > 0: for i in range(len(action_list)): if i not in recommmend_list: action_list[i] = -1 print("battle_id", state_info.battleid, "tick", state_info.tick, "hero", hero_name, "根据推荐,只从以下行为中挑选", ",".join(str("%f" % float(act)) for act in action_list), ",".join(str("%f" % float(act)) for act in recommmend_list)) while not found: max_q = max(action_list) if max_q <= -1: action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, hero.pos, None, None, 48, None) return action, max_q, -1 selected = action_list.index(max_q) avail_type = unaval_list[selected] if avail_type == -1: #TODO avail_type == 0: 是否考虑技能不可用时候不接近对方 # 不可用行为 action_list[selected] = -1 continue if selected < 8: # move fwd = StateUtil.mov(selected, revert) # 根据我们的移动公式计算一个目的地,缺点是这样可能被障碍物阻挡,同时可能真的可以移动距离比我们计算的长 tgtpos = TeamBattleUtil.set_move_target(hero, fwd) # tgtpos = PosStateInfo(hero.pos.x + fwd.x * 15, hero.pos.y + fwd.y * 15, hero.pos.z + fwd.z * 15) action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, tgtpos, None, None, selected, None) return action, max_q, selected elif selected < 13: # 对敌英雄使用普攻 target_index = selected - 8 target_hero = TeamBattleUtil.get_target_hero( hero.hero_name, friends, opponents, target_index) target_hero_info = state_info.get_hero(target_hero) avail_type = unaval_list[selected] if avail_type == 0: action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, target_hero_info.pos, None, None, selected, None) else: action = CmdAction(hero.hero_name, CmdActionEnum.ATTACK, 0, target_hero, None, None, None, selected, None) return action, max_q, selected elif selected < 28: # skill skillid = int((selected - 13) / 5 + 1) tgt_index = selected - 13 - (skillid - 1) * 5 skill_info = SkillUtil.get_skill_info(hero.cfg_id, skillid) is_buff = True if skill_info.cast_target == SkillTargetEnum.buff else False is_self = True if skill_info.cast_target == SkillTargetEnum.self else False tgt_hero = TeamBattleUtil.get_target_hero( hero.hero_name, friends, opponents, tgt_index, is_buff, is_self) tgt_pos = state_info.get_hero(tgt_hero).pos fwd = tgt_pos.fwd(hero.pos) avail_type = unaval_list[selected] if avail_type == 0: action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, selected, None) else: action = CmdAction(hero.hero_name, CmdActionEnum.CAST, skillid, tgt_hero, tgt_pos, fwd, None, selected, None) return action, max_q, selected
def build_response(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len( self.state_cache) > 0 else None response_strs = [] # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return {"ID": raw_state_info.battleid, "tick": -1} if raw_state_info.tick <= StateUtil.TICK_PER_STATE and ( prev_state_info is None or prev_state_info.tick > raw_state_info.tick): print("clear") prev_state_info = None self.state_cache = [] self.battle_started = -1 self.battle_heroes_cache = [] self.dead_heroes = [] self.dead_heroes_cache = [] self.data_inputs = [] self.rebooting = False elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE: # 不是开始帧的话直接返回重启游戏 # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗 print("battle_id", self.battle_id, "tick", raw_state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) hero = state_info.get_hero("27") if hero is None or hero.hp is None: # 偶然情况处理,如果找不到英雄,直接重开 print("battle_id", self.battle_id, "tick", state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str # 战斗前准备工作 if len(self.state_cache) == 0: # 第一帧的时候,添加金钱和等级 for hero in self.heros: add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None, None, None, None, None, None, None) add_gold_cmd.gold = 3000 add_gold_str = StateUtil.build_command(add_gold_cmd) response_strs.append(add_gold_str) add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None, None, None, None, None, None) add_lv_cmd.lv = 9 add_lv_str = StateUtil.build_command(add_lv_cmd) response_strs.append(add_lv_str) elif len(self.state_cache) > 1: # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧 for hero in self.heros: upgrade_cmd = self.upgrade_skills(state_info, hero) if upgrade_cmd is not None: response_strs.append(upgrade_cmd) buy_cmd = self.buy_equip(state_info, hero) if buy_cmd is not None: response_strs.append(buy_cmd) for hero in self.heros: # 判断是否英雄死亡 if prev_state_info is not None: dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) if dead == 1 and hero not in self.dead_heroes: print("battle_id", self.battle_id, "tick", state_info.tick, "英雄死亡", hero, "tick", state_info.tick) self.dead_heroes.append(hero) # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定 # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动 #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来 # 这里会排除掉死亡的英雄,他们不需要再加入团战 # 团战范围在收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range( state_info, self.heros, self.dead_heroes, battle_range) # 存活英雄 battle_heros = list(heroes_in_range) battle_heros.extend(heroes_out_range) # 缓存参战情况和死亡情况,用于后续训练 self.battle_heroes_cache.append(battle_heros) self.dead_heroes_cache.append(list(self.dead_heroes)) if state_info.tick >= 142560: debuginfo = True # 团战还没有开始,有英雄还在圈外 if len(heroes_out_range) > 0: if self.battle_started > -1: print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外", ','.join(heroes_out_range), "battle_range", battle_range) # 移动到两个开始战斗地点附近 # 如果是团战开始之后,移动到团战中心点 for hero in heroes_out_range: start_point_x = randint(0, 8000) start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0 start_point_z += randint(-4000, 4000) if TeamBattleUtil.get_hero_team(hero) == 0: start_point_z *= -1 start_point_z += TeamBattleTrainer.BATTLE_POINT_Z tgt_pos = PosStateInfo(start_point_x, 0, start_point_z) move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) mov_cmd_str = StateUtil.build_command(move_action) response_strs.append(mov_cmd_str) # 团战已经开始 elif not self.rebooting: if self.battle_started == -1: self.battle_started = len(self.state_cache) # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态 state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible( state_info, self.state_cache) # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range) # 跟队伍,每个队伍得到行为 team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range) team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team( state_info, team_a, heroes_in_range) team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team( state_info, team_b, heroes_in_range) # 如果模型已经开战,重启战斗 if (model_upgrade_a or model_upgrade_b ) and self.battle_started < len(self.state_cache) + 1: print("battle_id", self.battle_id, "因为模型升级,重启战斗", self.battle_started, len(self.state_cache)) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str data_input_map = {} for action_cmd, data_input in zip(team_actions_a + team_actions_b, input_list_a + input_list_b): action_str = StateUtil.build_command(action_cmd) response_strs.append(action_str) state_info.add_action(action_cmd) data_input_map[action_cmd.hero_name] = data_input # 缓存所有的模型输入,用于后续训练 self.data_inputs.append(data_input_map) # 添加记录到缓存中 self.state_cache.append(state_info) # 将模型行为加入训练缓存,同时计算奖励值 # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟 last_x_index = 2 if self.battle_started > -1 and len(self.data_inputs) >= last_x_index: if self.rebooting: # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练 print("battle_id", self.battle_id, "tick", state_info.tick, "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启") # 重启游戏 response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] else: state_index = len(self.state_cache) - last_x_index win, win_team, left_heroes = self.remember_replay_heroes( -last_x_index, state_index, battle_range) # 团战结束条件 # 首先战至最后一人 # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range) # if self.battle_started: # if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1): if win == 1: # 重启游戏 print('battle_id', self.battle_id, "重启游戏", "剩余人员", ','.join(left_heroes)) response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] self.rebooting = True # battle_heros = self.search_team_battle(state_info) # if len(battle_heros) > 0: # print("team battle heros", ';'.join(battle_heros)) # # heros_need_model = [] # for hero in self.heros: # # 判断是否英雄死亡 # if prev_state_info is not None: # dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) # if dead == 1 and hero not in self.dead_heroes: # self.dead_heroes.append(hero) # # # 复活的英雄不要再去参团 # if hero in self.dead_heroes: # continue # # # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE) # if hero not in battle_heros: # # 移动到团战点附近,添加部分随机 # rdm_delta_x = randint(0, 1000) # rdm_delta_z = randint(0, 1000) # tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z) # move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) # mov_cmd_str = StateUtil.build_command(move_action) # response_strs.append(mov_cmd_str) # else: # # 启动模型决策 # heros_need_model.append(hero) # # if len(heros_need_model) > 0: # action_cmds = self.get_model_actions(state_info, heros_need_model) # for action_cmd in action_cmds: # action_str = StateUtil.build_command(action_cmd) # response_strs.append(action_str) # state_info.add_action(action_cmd) #TODO 记录模型输出,用于后续训练 # 返回结果给游戏端 rsp_obj = { "ID": state_info.battleid, "tick": state_info.tick, "cmd": response_strs } rsp_str = JSON.dumps(rsp_obj) print('battle_id', self.battle_id, 'response', rsp_str) return rsp_str
def build_response(self, state_info, prev_state_info, line_model, hero_names=None): battle_id = state_info.battleid tick = state_info.tick if tick >= 139062: db = 1 action_strs=[] if hero_names is None: hero_names = [hero.hero_name for hero in state_info.heros] for hero_name in hero_names: hero = state_info.get_hero(hero_name) prev_hero = prev_state_info.get_hero(hero.hero_name) if prev_state_info is not None else None # 检查是否重启游戏 # 线上第一个塔被摧毁 # 如果有可以升级的技能,优先升级技能3 skills = StateUtil.get_skills_can_upgrade(hero) if len(skills) > 0: skillid = 3 if 3 in skills else skills[0] update_cmd = CmdAction(hero.hero_name, CmdActionEnum.UPDATE, skillid, None, None, None, None, None, None) update_str = StateUtil.build_command(update_cmd) action_strs.append(update_str) # 检查周围状况 near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) near_enemy_units = StateUtil.get_nearby_enemy_units(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS) nearest_enemy_tower = StateUtil.get_nearest_enemy_tower(state_info, hero.hero_name, StateUtil.LINE_MODEL_RADIUS + 3) # 回城相关逻辑 # 如果在回城中且没有被打断则继续回城,什么也不用返回 if prev_hero is not None: if self.hero_strategy[hero.hero_name] == ActionEnum.town_ing and prev_hero.hp <= hero.hp \ and not StateUtil.if_hero_at_basement(hero): if not hero.skills[6].canuse: print('回城中,继续回城') continue else: print('回城失败') if hero.hp <= 0: self.hero_strategy[hero.hero_name] = None continue # 处在少血状态是,且周围没有地方单位的情况下选择回城 # if len(near_enemy_heroes) == 0 and len(near_enemy_units) == 0 and nearest_enemy_tower is None: # if hero.hp/float(hero.maxhp) < LineTrainer.TOWN_HP_THRESHOLD: # print('策略层:回城') # # 检查英雄当前状态,如果在回城但是上一帧中受到了伤害,则将状态设置为正在回城,开始回城 # if self.hero_strategy[hero.hero_name] == ActionEnum.town_ing: # if prev_hero.hp > hero.hp: # town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None) # action_str = StateUtil.build_command(town_action) # action_strs.append(action_str) # # 检查英雄当前状态,如果不在回城,则将状态设置为正在回城,开始回城 # elif self.hero_strategy[hero.hero_name] != ActionEnum.town_ing: # self.hero_strategy[hero.hero_name] = ActionEnum.town_ing # town_action = CmdAction(hero.hero_name, CmdActionEnum.CAST, 6, hero.hero_name, None, None, None, None, None) # action_str = StateUtil.build_command(town_action) # action_strs.append(action_str) # # # 无论上面怎么操作,玩家下面的动作应该都是在回城中,所以跳过其它的操作 # continue # 处在泉水之中的时候设置策略层为吃线 if StateUtil.if_hero_at_basement(hero): if hero.hp < hero.maxhp: continue # 撤退逻辑 # TODO 甚至可以使用移动技能移动 if hero.hero_name in self.hero_strategy and self.hero_strategy[hero.hero_name] == ActionEnum.retreat: dist = StateUtil.cal_distance(hero.pos, self.retreat_pos) if dist <= 2: print('到达撤退点附近') self.hero_strategy[hero.hero_name] = None elif prev_hero is not None and prev_hero.pos.to_string() == hero.pos.to_string(): print('英雄卡住了,取消撤退') self.hero_strategy[hero.hero_name] = None else: print('仍然在撤退 ' + str(dist)) continue # 开始根据策略决定当前的行动 # 对线情况下,首先拿到兵线,朝最前方的兵线移动 # 如果周围有危险(敌方单位)则启动对线模型 # 如果周围有小兵或者塔,需要他们都是在指定线上的小兵或者塔 line_index = 1 near_enemy_units_in_line = StateUtil.get_units_in_line(near_enemy_units, line_index) nearest_enemy_tower_in_line = StateUtil.get_units_in_line([nearest_enemy_tower], line_index) if len(near_enemy_units_in_line) == 0 and len(nearest_enemy_tower_in_line) == 0 and (len(near_enemy_heroes) == 0 or StateUtil.if_in_line(hero, line_index, 4000) == -1): self.hero_strategy[hero.hero_name] = ActionEnum.line_1 # print("策略层:因为附近没有指定兵线的敌人所以开始吃线 " + hero.hero_name) # 跟兵线 front_soldier = StateUtil.get_frontest_soldier_in_line(state_info, line_index, hero.team) if front_soldier is None: action_str = StateUtil.build_action_command(hero.hero_name, 'HOLD', {}) action_strs.append(action_str) else: # 得到最前方的兵线位置 move_action = CmdAction(hero.hero_name, CmdActionEnum.MOVE, None, None, front_soldier.pos, None, None, None, None) action_str = StateUtil.build_command(move_action) action_strs.append(action_str) else: # 使用模型进行决策 # print("使用对线模型决定英雄%s的行动" % hero.hero_name) self.hero_strategy[hero.hero_name] = ActionEnum.line_model enemies = [] enemies.extend((hero.hero_name for hero in near_enemy_heroes)) enemies.extend((unit.unit_name for unit in near_enemy_units)) if nearest_enemy_tower is not None: enemies.append(nearest_enemy_tower.unit_name) # print('对线模型决策,因为周围有敌人 ' + ' ,'.join(enemies)) # 目前对线只涉及到两名英雄 rival_hero = '28' if hero.hero_name == '27' else '27' action = line_model.get_action(prev_state_info, state_info, hero.hero_name, rival_hero) action_str = StateUtil.build_command(action) action_strs.append(action_str) # 如果是要求英雄施法回城,更新英雄状态,这里涉及到后续多帧是否等待回城结束 if action.action == CmdActionEnum.CAST and int(action.skillid) == 6: print("英雄%s释放了回城" % hero_name) self.hero_strategy[hero.hero_name] = ActionEnum.town_ing # 如果是选择了撤退,进行特殊标记,会影响到后续的行为 if action.action == CmdActionEnum.RETREAT: print("英雄%s释放了撤退,撤退点为%s" % (hero_name, action.tgtpos.to_string())) self.hero_strategy[hero.hero_name] = ActionEnum.retreat self.retreat_pos = action.tgtpos # 保存action信息到状态帧中 state_info.add_action(action) return action_strs
def guess_player_action(prev_state_info, state_info, next_state_info, next_next_state_info, hero_name, rival_hero_name): #针对每一帧,结合后一帧信息,判断英雄在该帧的有效操作 #仅对于一对一线上模型有效 #技能>攻击>走位 #技能:检查cd和mp变化,hitstateinfo,attackstateinfo,dmgstateinifo,回推pos,fwd,tgt,selected #攻击:检查hit,damage,attack #检查pos变化 prev_hero = prev_state_info.get_hero(hero_name) prev_viral_hero = prev_state_info.get_hero(rival_hero_name) current_hero = state_info.get_hero(hero_name) hero_attack_info = state_info.get_hero_attack_info(hero_name) if hero_attack_info is not None: skill = hero_attack_info.skill # 看十位来决定技能id skillid = int(hero_attack_info.skill % 100 / 10) tgtid = int(hero_attack_info.defer) if ( hero_attack_info.defer is not None and hero_attack_info.defer != 'None') else 0 tgtpos = hero_attack_info.tgtpos # 回城 if hero_attack_info.skill == 10000: action = CmdAction(hero_name, CmdActionEnum.CAST, 6, None, None, None, None, 49, None) return action # 普攻,不会以自己为目标 output_idx = None if skillid == 0: # 打塔 if StateUtil.if_unit_tower(tgtid): output_idx = 8 # 普通攻击敌方英雄 elif tgtid == prev_viral_hero.hero_name: # 普通攻击敌方英雄 output_idx = 9 # 普通攻击敌方小兵 elif tgtid != 0: creeps = StateUtil.get_nearby_enemy_units( prev_state_info, hero_name) n = len(creeps) for i in range(n): if creeps[i].unit_name == str(tgtid): output_idx = i + 10 # attacinfo里没有目标,从hit里找目标 elif tgtid == 0: # hitinfo 和 dmginfo都有延迟,尤其是超远距离的攻击技能 hit_infos = state_info.get_hero_hit_with_skill( hero_name, skill) hit_infos.extend( next_state_info.get_hero_hit_with_skill( hero_name, skill)) if len(hit_infos) > 0: # 首先检查是否敌方英雄被击中,这种优先级最高 if rival_hero_name in [hit.tgt for hit in hit_infos]: output_idx = 9 else: # 找到被攻击者中血量最少的,认为是目标对象 tgtid_list = [ state_info.get_obj(hit.tgt) for hit in hit_infos ] tgt_unit = min(tgtid_list, key=lambda x: x.hp) if StateUtil.if_unit_tower(tgt_unit.unit_name): output_idx = 8 else: # 从英雄附近的小兵中,检索它的编号 # 注:极端情况下有可能丢失,比如在这0.5秒钟内,英雄接近了小兵并进行了攻击 # 扩大搜索的范围 creeps = StateUtil.get_nearby_enemy_units( prev_state_info, hero_name, max_distance=StateUtil.ATTACK_HERO_RADIUS + 2) for i in range(len(creeps)): if creeps[i].unit_name == tgtid: output_idx = i + 10 if output_idx is not None: action = CmdAction(hero_name, CmdActionEnum.ATTACK, 0, tgtid, tgtpos, None, None, output_idx, None) return action # 使用技能,不考虑以敌方塔为目标(若真以敌方塔为目标则暂时先不管吧,现在的两个英雄技能都对建筑无效) # TODO 暂时忽略技能为方向/范围型并且放空的情况(部分技能无任何目标,tgt为0)。这种情况下应该会有个pos记录释放点,后续可以考虑如何学习 else: # 对自身施法 if tgtid == int( hero_name ): # or (tgtid=='0' and Replayer.skill_tag[skillid]==1): tgtpos = prev_hero.pos output_idx = 8 + skillid * 10 # 对敌方英雄施法 elif tgtid == int(rival_hero_name): tgtpos = prev_viral_hero.pos output_idx = 9 + skillid * 10 # 对小兵施法 elif tgtid != 0 and not StateUtil.if_unit_tower(tgtid): creeps = StateUtil.get_nearby_enemy_units( prev_state_info, hero_name) n = len(creeps) for i in range(n): if creeps[i].unit_name == str(tgtid): output_idx = i + skillid * 10 + 10 # attacinfo里没有目标,从hit里找目标 elif tgtid == 0: # 远程技能的伤害延迟可能会比较长 hit_infos = state_info.get_hero_hit_with_skill( hero_name, skill) hit_infos.extend( next_state_info.get_hero_hit_with_skill( hero_name, skill)) hit_infos.extend( next_next_state_info.get_hero_hit_with_skill( hero_name, skill)) if len(hit_infos) > 0: # 首先检查是否敌方英雄被击中,这种优先级最高 if rival_hero_name in [hit.tgt for hit in hit_infos]: tgtid = rival_hero_name output_idx = 9 + skillid * 10 else: # 找到被攻击者中血量最少的,认为是目标对象 tgtid_list = [ state_info.get_obj(hit.tgt) for hit in hit_infos ] tgt_unit = min(tgtid_list, key=lambda x: x.hp) # 从英雄附近的小兵中,检索它的编号 # 注:极端情况下有可能丢失,比如在这0.5秒钟内,英雄接近了小兵并进行了攻击 creeps = StateUtil.get_nearby_enemy_units( prev_state_info, hero_name) for i in range(len(creeps)): if creeps[i].unit_name == tgt_unit.unit_name: tgtid = creeps[i].unit_name output_idx = i + 10 + skillid * 10 # 组装结果 if output_idx is not None: action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid, tgtpos, None, None, output_idx, None) return action # 任然没有hit,技能空放 if tgtid == 0: # attackinfo里没有攻击目标id,只有坐标,根据位置找最近的目标作为输出 if tgtpos != None: search_radius = 1 # 首先寻找目标为对方英雄, 目前,如果在范围内有敌人英雄,选第一个作为主目标 nearby_rival_heros = StateUtil.get_nearby_enemy_heros( prev_state_info, hero_name, search_radius) if len(nearby_rival_heros) > 0: tgtid = nearby_rival_heros[0].hero_name output_idx = 9 + skillid * 10 else: # 其次检查是否可以释放给自己 skill_info = SkillUtil.get_skill_info( prev_hero.cfg_id, skillid) if skill_info is not None: if skill_info.cast_target != SkillTargetEnum.rival: tgtid = hero_name output_idx = 8 + skillid * 10 # 最后检查是否可以释放给小兵 else: nearby_soldiers = StateUtil.get_nearby_enemy_units( prev_state_info, hero_name, search_radius) if len(nearby_soldiers) > 0: target_unit = min(nearby_soldiers, key=lambda u: u.hp) for i in range(len(nearby_soldiers)): if nearby_soldiers[ i].unit_name == target_unit.unit_name: tgtid = nearby_soldiers[ i].unit_name output_idx = i + 10 + skillid * 10 # 组装结果 if output_idx is not None: action = CmdAction(hero_name, CmdActionEnum.CAST, skillid, tgtid, tgtpos, None, None, output_idx, None) return action else: # 真的技能空放了 action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, prev_hero.pos, None, None, 48, None) return action action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, prev_hero.pos, None, None, 48, None) return action # 没有角色进行攻击或使用技能,英雄在移动或hold if current_hero.pos.x != prev_hero.pos.x or current_hero.pos.z != prev_hero.pos.z or current_hero.pos.y != prev_hero.pos.y: # 移动 fwd = current_hero.pos.fwd(prev_hero.pos) [fwd, output_index] = Replayer.get_closest_fwd(fwd) action = CmdAction(hero_name, CmdActionEnum.MOVE, None, None, None, fwd, None, output_index, None) return action else: # hold action = CmdAction(hero_name, CmdActionEnum.HOLD, None, None, prev_hero.pos, None, None, 48, None) return action
def start_model_process(battle_id_num, init_signal, train_queue, action_queue, results, save_batch, save_dir, lock): model_1, model1_save_header, model_2, model2_save_header = HttpUtil.build_models_ppo( save_dir, # model1_path=None, # model2_path=None, model1_path= '/Users/sky4star/Github/zy2go/data/20171218/model_2017-12-14192241.120603/line_model_1_v460/model', model2_path= '/Users/sky4star/Github/zy2go/data/20171218/model_2017-12-14192241.120603/line_model_2_v460/model', # model1_path='/Users/sky4star/Github/zy2go/data/20171204/model_2017-12-01163333.956214/line_model_1_v430/model', # model2_path='/Users/sky4star/Github/zy2go/data/20171204/model_2017-12-01163333.956214/line_model_2_v430/model', # model1_path='/Users/sky4star/Github/zy2go/data/all_trained/battle_logs/trained/171127/line_model_1_v380/model', #'/Users/sky4star/Github/zy2go/data/20171115/model_2017-11-14183346.557007/line_model_1_v730/model', #'/Users/sky4star/Github/zy2go/battle_logs/model_2017-11-17123006.954281/line_model_1_v10/model', # model2_path='/Users/sky4star/Github/zy2go/data/all_trained/battle_logs/trained/171127/line_model_2_v380/model', #'/Users/sky4star/Github/zy2go/data/20171121/model_2017-11-20150651.200368/line_model_2_v120/model', schedule_timesteps=1000000, model1_initial_p=0.5, model1_final_p=0.1, model1_gamma=0.93, model2_initial_p=0.5, model2_final_p=0.1, model2_gamma=0.93) init_signal.set() print('模型进程启动') time_cache = [] num_cache = [] o4r_list_model1 = {} o4r_list_model2 = {} done_signals = {} while True: try: # 从训练队列中提取请求 # 只有当训练集中有所有的战斗的数据时候才会开始训练 with lock: if not train_queue.empty(): (battle_id, train_model_name, o4r, batch_size) = train_queue.get() print('model_process', battle_id, train_model_name, 'receive train signal, batch size', batch_size) if train_model_name == ModelProcess.NAME_MODEL_1: o4r_list_model1[battle_id] = o4r print( 'model_process model1 train collection', ';'.join( (str(k) for k in o4r_list_model1.keys()))) elif train_model_name == ModelProcess.NAME_MODEL_2: o4r_list_model2[battle_id] = o4r print( 'model_process model2 train collection', ';'.join( (str(k) for k in o4r_list_model2.keys()))) trained = False if len(o4r_list_model1) >= battle_id_num and len( o4r_list_model2) >= battle_id_num: print('model_process1', train_model_name, 'begin to train') begin_time = time.time() model_1.replay(o4r_list_model1.values(), batch_size) o4r_list_model1.clear() # 由自己来决定什么时候缓存模型 if_save_model(model_1, model1_save_header, save_batch) print('model_process2', train_model_name, 'begin to train') model_2.replay(o4r_list_model2.values(), batch_size) o4r_list_model2.clear() end_time = time.time() delta_millionseconds = (end_time - begin_time) * 1000 print('model train time', delta_millionseconds) # 由自己来决定什么时候缓存模型 if_save_model(model_2, model2_save_header, save_batch) trained = True if trained: with lock: print('model process, add trained events') restartCmd = CmdAction(ModelProcess.NAME_MODEL_1, CmdActionEnum.RESTART, 0, None, None, None, None, None, None) for battle_id in range(1, battle_id_num + 1): # 给每个客户端添加一个训练结束的通知 done_signals[(battle_id, ModelProcess.NAME_MODEL_1)] = ( restartCmd, None, None) # 从行为队列中拿请求 # 等待在这里(阻塞),加上等待超时确保不会出现只有个train信号进来导致死锁的情况 state_inputs = [] if not action_queue.empty(): # 考虑到目前的并发情况,没有必要批量读取所有等待中的请求,因为基本只有一个等待的请求 # state_inputs是个数组,可能含有多个请求(MCTS下) (battle_id, act_model_name, state_inputs) = action_queue.get(timeout=1) with lock: # 如果上一条还没有消耗掉,则忽略本条请求,这种情况应该只会出现在训练后 if (battle_id, act_model_name) in done_signals: results[(battle_id, act_model_name)] = done_signals[( battle_id, act_model_name)] del done_signals[(battle_id, act_model_name)] continue if len(state_inputs) == 0: continue begin_time = time.time() if act_model_name == ModelProcess.NAME_MODEL_1: actions_list, explor_value, vpreds = model_1.get_actions( state_inputs) elif act_model_name == ModelProcess.NAME_MODEL_2: actions_list, explor_value, vpreds = model_2.get_actions( state_inputs) end_time = time.time() delta_millionseconds = (end_time - begin_time) * 1000 time_cache.append(delta_millionseconds) num_cache.append(len(state_inputs)) if len(time_cache) >= 1000: print("model get_action average calculate time(ms)", sum(time_cache) // float(len(time_cache)), sum(num_cache) / float(len(num_cache))) time_cache = [] num_cache = [] with lock: results[(battle_id, act_model_name)] = (actions_list, explor_value, vpreds) except queue.Empty: continue except Exception as e: type, value, traceback = sys.exc_info() traceback.print_exc()