def read_process(json_str, p_request_dict, p_result_dict, lock): begin_time = time.time() obj = JSON.loads(json_str) raw_state_info = StateInfo.decode(obj) p_battle_id = raw_state_info.battleid # if raw_state_info.tick == -1: # print('read_process: need to handle ', p_battle_id, raw_state_info.tick, 'raw log', json_str) # else: # print('read_process: need to handle ', p_battle_id, raw_state_info.tick) with lock: # print('read_process', p_battle_id, 'send a request', raw_state_info.tick) p_request_dict[p_battle_id] = json_str try: while True: if p_battle_id in p_result_dict.keys(): with lock: # print('read_process', p_battle_id, 'get a result', raw_state_info.tick) result = p_result_dict[p_battle_id] del p_result_dict[p_battle_id] end_time = time.time() print('read_process', p_battle_id, raw_state_info.tick, (end_time - begin_time) * 1000, '取得结果', result) return result except queue.Empty: print("LineTrainerManager Exception empty") return '{}' except Exception: print("LineTrainerManager Exception") type, value, traceback = sys.exc_info() traceback.print_exc() return '{}'
def start_consumer(battle_id_num, request_queues, result_queues): consumer_times = [] while True: indexs = [] requests = [] for index, request_queue in enumerate(request_queues): if not request_queue.empty(): request = request_queue.get() requests.append(request) indexs.append(index) begin_time = time.time() for index, json_str in zip(indexs, requests): obj = JSON.loads(json_str) raw_state_info = StateInfo.decode(obj) rand = np.random.rand(3, 3700) result_queues[index].put(rand) end_time = time.time() delta_millionseconds = (end_time - begin_time) * 1000 consumer_times.append(delta_millionseconds) if len(consumer_times) >= 1000: print("model get_action average calculate time(ms)", sum(consumer_times) // float(len(consumer_times))) consumer_times = []
def parse_state_log(json_str): # print(json_str) json_str = json_str[23:] # todo maybe becasu python3, the time before the { should be cut off state_json = JSON.loads(json_str) state_info = StateInfo.decode(state_json) return state_info
def do_GET(self): content_length = int( self.headers['Content-Length']) # <--- Gets the size of data get_data = self.rfile.read(content_length) # <--- Gets the data itself # decode for python3 version get_data = get_data.decode() # 解析客户端发送的请求 obj = JSON.loads(get_data) raw_state_info = StateInfo.decode(obj) if raw_state_info.battleid not in self.line_trainers: # DQN # self.line_trainer[raw_state_info.battleid] = LineTrainer(self.save_dir, ['27'], self.model1, # self.model1_save_header, # ['28'], self.model2, # self.model2_save_header # ) # PPO ob = np.zeros(183, dtype=float).tolist() model1_cache = PPO_CACHE2(ob, 1, horizon=self.model_1.optim_batchsize) model2_cache = PPO_CACHE2(ob, 1, horizon=self.model_2.optim_batchsize) self.line_trainers[raw_state_info.battleid] = LineTrainerPPO( self.save_dir, '27', self.model_1, self.model1_save_header, model1_cache, '28', self.model_2, self.model2_save_header, model2_cache, real_hero=None, policy_ratio=-1, policy_continue_acts=3) # 交给对线训练器来进行训练 rsp_str = self.line_trainers[raw_state_info.battleid].train_line_model( get_data) print(rsp_str) rsp_str = rsp_str.encode(encoding="utf-8") #给客户端提供对应的指令 self._set_headers() self.wfile.write(rsp_str)
def read_process(self, json_str): begin_time = time.time() obj = JSON.loads(json_str) raw_state_info = StateInfo.decode(obj) p_battle_id = raw_state_info.battleid try: response = self.battle_trainers[p_battle_id].build_response(json_str) return response except queue.Empty: print("LineTrainerManager Exception empty") return '{}' except Exception: print("LineTrainerManager Exception") traceback.print_exc(file=sys.stdout) return '{}'
def train_line_model(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return {"ID": raw_state_info.battleid, "tick": -1} if raw_state_info.tick >= 193512: debug_i = 1 # 根据之前帧更新当前帧信息,变成完整的信息 # 发现偶然的情况下,其实的tick会是66,然后第二条tick是528 if raw_state_info.tick <= StateUtil.TICK_PER_STATE and (prev_state_info is None or prev_state_info.tick > raw_state_info.tick): print("clear") prev_state_info = None self.state_cache = [] self.hero_strategy = {} self.model1_just_dead = 0 self.model2_just_dead = 0 elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick: print("clear %s %s" % (prev_state_info.tick, raw_state_info.tick)) self.state_cache = [] elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE: # 不是开始帧的话直接返回重启游戏 # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗 print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [StateUtil.build_action_command('27', 'RESTART', None)] rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs} rsp_str = JSON.dumps(rsp_obj) return rsp_str state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) # Test hero = state_info.get_hero(self.model1_hero) if hero is None or hero.hp is None: print(self.battle_id, self.model1_hero, state_info.tick, '读取信息为空,异常') print(self.battle_id, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [StateUtil.build_action_command('27', 'RESTART', None)] rsp_obj = {"ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs} rsp_str = JSON.dumps(rsp_obj) return rsp_str # 持久化 self.state_cache.append(state_info) # self.save_state_log(state_info) # 首先得到模型的选择,同时会将选择action记录到当前帧中 action_strs = [] restart = False if self.model1_hero is not None and self.real_hero != self.model1_hero: actions_model1, restart = self.build_response(self.state_cache, -1, self.model1_hero) action_strs.extend(actions_model1) if self.model2_hero is not None and not restart and self.real_hero != self.model2_hero: actions_model2, restart = self.build_response(self.state_cache, -1, self.model2_hero) action_strs.extend(actions_model2) # 计算奖励值,如果有真实玩家,因为需要推测行为的原因,则多往前回朔几帧 reward_state_idx = -2 if self.real_hero is None else -4 new = 0 if len(self.state_cache) + reward_state_idx > 0: new, loss_team = self.if_restart(self.state_cache, reward_state_idx) if self.model1_hero is not None: self.remember_replay(self.state_cache, reward_state_idx, self.model1_cache, self.model_process, self.model1_hero, self.model2_hero, new, loss_team) if self.model2_hero is not None: self.remember_replay(self.state_cache, reward_state_idx, self.model2_cache, self.model_process, self.model2_hero, self.model1_hero, new, loss_team) # 这里为了尽量减少重启次数,在训练结束之后,我们只是清空上个模型的行为串 if restart: self.model1_cache.clear_cache() self.model2_cache.clear_cache() # 当前帧返回空的行为串 action_strs = {} # 如果达到了重开条件,重新开始游戏 # 当线上第一个塔被摧毁时候重开 if new == 1: action_strs = [StateUtil.build_action_command('27', 'RESTART', None)] # 返回结果给游戏端 rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs} rsp_str = JSON.dumps(rsp_obj) return rsp_str
def build_response(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len( self.state_cache) > 0 else None response_strs = [] # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return {"ID": raw_state_info.battleid, "tick": -1} if raw_state_info.tick <= StateUtil.TICK_PER_STATE and ( prev_state_info is None or prev_state_info.tick > raw_state_info.tick): print("clear") prev_state_info = None self.state_cache = [] self.battle_started = -1 self.battle_heroes_cache = [] self.dead_heroes = [] self.dead_heroes_cache = [] self.data_inputs = [] self.rebooting = False elif prev_state_info is None and raw_state_info.tick > StateUtil.TICK_PER_STATE: # 不是开始帧的话直接返回重启游戏 # 还有偶然情况下首帧没有tick(即-1)的情况,这种情况下只能重启本场战斗 print("battle_id", self.battle_id, "tick", raw_state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) hero = state_info.get_hero("27") if hero is None or hero.hp is None: # 偶然情况处理,如果找不到英雄,直接重开 print("battle_id", self.battle_id, "tick", state_info.tick, '不是开始帧的话直接返回重启游戏', raw_state_info.tick) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str # 战斗前准备工作 if len(self.state_cache) == 0: # 第一帧的时候,添加金钱和等级 for hero in self.heros: add_gold_cmd = CmdAction(hero, CmdActionEnum.ADDGOLD, None, None, None, None, None, None, None) add_gold_cmd.gold = 3000 add_gold_str = StateUtil.build_command(add_gold_cmd) response_strs.append(add_gold_str) add_lv_cmd = CmdAction(hero, CmdActionEnum.ADDLV, None, None, None, None, None, None, None) add_lv_cmd.lv = 9 add_lv_str = StateUtil.build_command(add_lv_cmd) response_strs.append(add_lv_str) elif len(self.state_cache) > 1: # 第二帧时候开始,升级技能,购买装备,这个操作可能会持续好几帧 for hero in self.heros: upgrade_cmd = self.upgrade_skills(state_info, hero) if upgrade_cmd is not None: response_strs.append(upgrade_cmd) buy_cmd = self.buy_equip(state_info, hero) if buy_cmd is not None: response_strs.append(buy_cmd) for hero in self.heros: # 判断是否英雄死亡 if prev_state_info is not None: dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) if dead == 1 and hero not in self.dead_heroes: print("battle_id", self.battle_id, "tick", state_info.tick, "英雄死亡", hero, "tick", state_info.tick) self.dead_heroes.append(hero) # 首先要求所有英雄站到团战圈内,然后开始模型计算,这时候所有的行动都有模型来决定 # 需要过滤掉无效的行动,同时屏蔽会离开战斗圈的移动 #TODO 开始团战后,如果有偶尔的技能移动会离开圈,则拉回来 # 这里会排除掉死亡的英雄,他们不需要再加入团战 # 团战范围在收缩 battle_range = self.cal_battle_range( len(self.state_cache) - self.battle_started) heroes_in_range, heroes_out_range = TeamBattleTrainer.all_in_battle_range( state_info, self.heros, self.dead_heroes, battle_range) # 存活英雄 battle_heros = list(heroes_in_range) battle_heros.extend(heroes_out_range) # 缓存参战情况和死亡情况,用于后续训练 self.battle_heroes_cache.append(battle_heros) self.dead_heroes_cache.append(list(self.dead_heroes)) if state_info.tick >= 142560: debuginfo = True # 团战还没有开始,有英雄还在圈外 if len(heroes_out_range) > 0: if self.battle_started > -1: print('battle_id', self.battle_id, "战斗已经开始,但是为什么还有英雄在团战圈外", ','.join(heroes_out_range), "battle_range", battle_range) # 移动到两个开始战斗地点附近 # 如果是团战开始之后,移动到团战中心点 for hero in heroes_out_range: start_point_x = randint(0, 8000) start_point_z = TeamBattleTrainer.BATTLE_CIRCLE_RADIUS_BATTLE_START * 1000 if self.battle_started == -1 else 0 start_point_z += randint(-4000, 4000) if TeamBattleUtil.get_hero_team(hero) == 0: start_point_z *= -1 start_point_z += TeamBattleTrainer.BATTLE_POINT_Z tgt_pos = PosStateInfo(start_point_x, 0, start_point_z) move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) mov_cmd_str = StateUtil.build_command(move_action) response_strs.append(mov_cmd_str) # 团战已经开始 elif not self.rebooting: if self.battle_started == -1: self.battle_started = len(self.state_cache) # 对特殊情况。比如德古拉使用大招hp会变1,修改帧状态 state_info, _ = TeamBattlePolicy.modify_status_4_draculas_invincible( state_info, self.state_cache) # action_cmds, input_list, model_upgrade = self.get_model_actions(state_info, heroes_in_range) # 跟队伍,每个队伍得到行为 team_a, team_b = TeamBattleUtil.get_teams(heroes_in_range) team_actions_a, input_list_a, model_upgrade_a = self.get_model_actions_team( state_info, team_a, heroes_in_range) team_actions_b, input_list_b, model_upgrade_b = self.get_model_actions_team( state_info, team_b, heroes_in_range) # 如果模型已经开战,重启战斗 if (model_upgrade_a or model_upgrade_b ) and self.battle_started < len(self.state_cache) + 1: print("battle_id", self.battle_id, "因为模型升级,重启战斗", self.battle_started, len(self.state_cache)) action_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] rsp_obj = { "ID": raw_state_info.battleid, "tick": raw_state_info.tick, "cmd": action_strs } rsp_str = JSON.dumps(rsp_obj) return rsp_str data_input_map = {} for action_cmd, data_input in zip(team_actions_a + team_actions_b, input_list_a + input_list_b): action_str = StateUtil.build_command(action_cmd) response_strs.append(action_str) state_info.add_action(action_cmd) data_input_map[action_cmd.hero_name] = data_input # 缓存所有的模型输入,用于后续训练 self.data_inputs.append(data_input_map) # 添加记录到缓存中 self.state_cache.append(state_info) # 将模型行为加入训练缓存,同时计算奖励值 # 注意:因为奖励值需要看后续状态,所以这个计算会有延迟 last_x_index = 2 if self.battle_started > -1 and len(self.data_inputs) >= last_x_index: if self.rebooting: # 测试发现重启指令发出之后,可能下一帧还没开始重启战斗,这种情况下抛弃训练 print("battle_id", self.battle_id, "tick", state_info.tick, "warn", "要求重启战斗,但是还在收到后续帧状态, 继续重启") # 重启游戏 response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] else: state_index = len(self.state_cache) - last_x_index win, win_team, left_heroes = self.remember_replay_heroes( -last_x_index, state_index, battle_range) # 团战结束条件 # 首先战至最后一人 # all_in_team = TeamBattleUtil.all_in_one_team(heroes_in_range) # if self.battle_started: # if len(self.dead_heroes) >= 9 or (len(self.dead_heroes) >= 5 and all_in_team > -1): if win == 1: # 重启游戏 print('battle_id', self.battle_id, "重启游戏", "剩余人员", ','.join(left_heroes)) response_strs = [ StateUtil.build_action_command('27', 'RESTART', None) ] self.rebooting = True # battle_heros = self.search_team_battle(state_info) # if len(battle_heros) > 0: # print("team battle heros", ';'.join(battle_heros)) # # heros_need_model = [] # for hero in self.heros: # # 判断是否英雄死亡 # if prev_state_info is not None: # dead = StateUtil.if_hero_dead(prev_state_info, state_info, hero) # if dead == 1 and hero not in self.dead_heroes: # self.dead_heroes.append(hero) # # # 复活的英雄不要再去参团 # if hero in self.dead_heroes: # continue # # # near_enemy_heroes = StateUtil.get_nearby_enemy_heros(state_info, hero, TeamBattleTrainer.MODEL_RANGE) # if hero not in battle_heros: # # 移动到团战点附近,添加部分随机 # rdm_delta_x = randint(0, 1000) # rdm_delta_z = randint(0, 1000) # tgt_pos = PosStateInfo(TeamBattleTrainer.BATTLE_POINT_X + rdm_delta_x, 0, TeamBattleTrainer.BATTLE_POINT_Z + rdm_delta_z) # move_action = CmdAction(hero, CmdActionEnum.MOVE, None, None, tgt_pos, None, None, None, None) # mov_cmd_str = StateUtil.build_command(move_action) # response_strs.append(mov_cmd_str) # else: # # 启动模型决策 # heros_need_model.append(hero) # # if len(heros_need_model) > 0: # action_cmds = self.get_model_actions(state_info, heros_need_model) # for action_cmd in action_cmds: # action_str = StateUtil.build_command(action_cmd) # response_strs.append(action_str) # state_info.add_action(action_cmd) #TODO 记录模型输出,用于后续训练 # 返回结果给游戏端 rsp_obj = { "ID": state_info.battleid, "tick": state_info.tick, "cmd": response_strs } rsp_str = JSON.dumps(rsp_obj) print('battle_id', self.battle_id, 'response', rsp_str) return rsp_str
def train_line_model(self, raw_state_str): self.save_raw_log(raw_state_str) prev_state_info = self.state_cache[-1] if len(self.state_cache) > 0 else None # 解析客户端发送的请求 obj = JSON.loads(raw_state_str) raw_state_info = StateInfo.decode(obj) # 重开时候会有以下报文 {"wldstatic":{"ID":9051},"wldruntime":{"State":0}} if raw_state_info.tick == -1: return '' if raw_state_info.tick == 285516: debug_i = 1 # 根据之前帧更新当前帧信息,变成完整的信息 if raw_state_info.tick <= StateUtil.TICK_PER_STATE: print("clear") self.state_cache = [] prev_state_info = None elif prev_state_info is not None and prev_state_info.tick >= raw_state_info.tick: print ("clear %s %s" % (prev_state_info.tick, raw_state_info.tick)) self.state_cache = [] state_info = StateUtil.update_state_log(prev_state_info, raw_state_info) # 首先得到模型的选择,同时会将选择action记录到当前帧中 action_strs = self.build_response(state_info, prev_state_info, self.model1, self.model1_heros) if self.model2_heros is not None: actions_model2 = self.build_response(state_info, prev_state_info, self.model2, self.model2_heros) action_strs.extend(actions_model2) # 缓存 self.state_cache.append(state_info) self.save_state_log(state_info) # 更新玩家行为以及奖励值,有一段时间延迟 reward_state_idx = len(self.state_cache) - LineModel.REWARD_DELAY_STATE_NUM # print('reward_state_idx: ' + str(reward_state_idx)) state_with_reward = None if reward_state_idx > 1: if self.state_cache[reward_state_idx].tick >= 686004: debug = 1 self.guess_hero_actions(reward_state_idx, self.real_heros) prev_4_m = self.state_cache[reward_state_idx - 1] state_with_reward = LineModel_DQN.update_state_rewards(self.state_cache, reward_state_idx) if state_with_reward is not None: # 将中间结果写入文件 next_state_4_m = self.state_cache[reward_state_idx + 1] self.save_reward_log(state_with_reward) added = self.model1.remember(prev_4_m, state_with_reward, next_state_4_m) # 学习 if added: model1_memory_len = self.model1.get_memory_size() if self.model1.if_replay(64): # print ('开始模型训练') self.model1.replay(64) if model1_memory_len > 0 and model1_memory_len % 1000 == 0: self.model1.save(self.model1_save_header + str(self.model1.get_memory_size()) + '/model') # print ('结束模型训练') if self.model2 is not None: # TODO 过滤之后放入相应的模型 added = self.model2.remember(prev_4_m, state_with_reward, next_state_4_m) # 学习 if added: model2_memory_len = self.model2.get_memory_size() if self.model2.if_replay(64): # print ('开始模型训练') self.model2.replay(64) if model2_memory_len > 0 and model2_memory_len % 1000 == 0: self.model2.save(self.model2_save_header + str(self.model2.get_memory_size()) + '/model') # print ('结束模型训练') # 如果达到了重开条件,重新开始游戏 # 当线上第一个塔被摧毁时候重开 if StateUtil.if_first_tower_destroyed_in_middle_line(state_info): print('重新开始游戏') action_strs = [StateUtil.build_action_command('27', 'RESTART', None)] # 返回结果给游戏端 rsp_obj = {"ID": state_info.battleid, "tick": state_info.tick, "cmd": action_strs} rsp_str = JSON.dumps(rsp_obj) return rsp_str