def __init__(self, env_id, server_port, agents, config, replay): """对战环境初始化""" # env_id为仿真环境编号; server_port为基准端口号, 由此生成映射到容器内部各端口的宿主机端口号 # agents为描述对战双方智能体的字典; config为仿真环境的基本配置信息(想定、镜像名等) super().__init__() random.seed(os.getpid() + env_id) self.env_id = env_id self.server_port = server_port + env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] scene_name = self.config['scene_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] # 构建管理本地容器化仿真环境的实例对象 self.env_manager = EnvManager(self.env_id, server_port, scene_name, prefix, image_name=image_name) self.env_client = None self.agents_conf = agents self.agents = self._init_agents() # 记录出错信息 logger_name = "exceptions" self.logger = logging.getLogger(logger_name) self.logger.setLevel(level=logging.DEBUG) log_path = "./logs.txt" handler = logging.FileHandler(log_path) handler.setLevel(logging.DEBUG) self.logger.addHandler(handler) # 回放记录与保存路径 self.save_replay = replay['save_replay'] self.replay_dir = replay['replay_dir']
def __init__(self, env_id, server_port, agents, config): """对战环境初始化""" # env_id为仿真环境编号; server_port为基准端口号, 由此生成映射到容器内部各端口的宿主机端口号 # agents为描述对战双方智能体的字典; config为仿真环境的基本配置信息(想定、镜像名等) super().__init__() random.seed(os.getpid() + env_id) self.env_id = env_id self.server_port = server_port + env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] scene_name = self.config['scene_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] # 构建管理本地容器化仿真环境的实例对象 self.env_manager = EnvManager(self.env_id, server_port, scene_name, prefix, image_name=image_name) self.env_client = None self.agents_conf = agents self.agents = self._init_agents()
def __init__(self, config): self.env_id = config['server_host'] + 1 self.server_port = config['server_port'] + self.env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] self.max_game_time = self.config.get('max_game_time', 7200) self.sim_speed = self.config.get('sim_speed', 20) random.seed(os.getpid() + self.env_id) scen_name = self.config['scen_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] self.env_manager = EnvManager(self.env_id, self.server_port, scen_name, prefix, image_name=image_name) self.env_client = EnvClient('127.0.0.1', self.env_manager.get_server_port()) self.last_time = 0
class EnvRunner(object): def __init__(self, env_id, server_port, agents, config, replay): """对战环境初始化""" # env_id为仿真环境编号; server_port为基准端口号, 由此生成映射到容器内部各端口的宿主机端口号 # agents为描述对战双方智能体的字典; config为仿真环境的基本配置信息(想定、镜像名等) super().__init__() random.seed(os.getpid() + env_id) self.env_id = env_id self.server_port = server_port + env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] scene_name = self.config['scene_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] # 构建管理本地容器化仿真环境的实例对象 self.env_manager = EnvManager(self.env_id, server_port, scene_name, prefix, image_name=image_name) self.env_client = None self.agents_conf = agents self.agents = self._init_agents() # 记录出错信息 logger_name = "exceptions" self.logger = logging.getLogger(logger_name) self.logger.setLevel(level=logging.DEBUG) log_path = "./logs.txt" handler = logging.FileHandler(log_path) handler.setLevel(logging.DEBUG) self.logger.addHandler(handler) # 回放记录与保存路径 self.save_replay = replay['save_replay'] self.replay_dir = replay['replay_dir'] def __del__(self): self.env_manager.stop_docker() # 停止并删除旧环境 pass def _start_env(self): """启动仿真环境""" # 查找是否存在同名的旧容器, 有的话先删除再启动新环境 docker_name = 'env_{}'.format(self.env_id) docker_search = 'docker ps -a --filter name=^/{}$'.format(docker_name) # 捕获终端输出结果 p = subprocess.Popen(docker_search, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() # decode()将bytes对象转成str对象, strip()删除头尾字符空白符 # split()默认以分隔符, 包括空格, 换行符\n, 制表符\t来对字符串进行分割 out_str = out.decode() str_split = out_str.strip().split() if docker_name in str_split: print('存在同名旧容器,先删除之\n', out_str) self.env_manager.stop_docker() self.env_manager.start_docker(self.volume_list) # 启动新环境 time.sleep(10) def _init_agents(self): """根据配置信息构建红蓝双方智能体""" agents = [] for name, agent_conf in self.agents_conf.items(): cls = agent_conf['class'] agent = cls(name, agent_conf) agents.append(agent) return agents def _reset(self): """智能体重置""" for agent in self.agents: agent.reset() def _run_env(self): pass def _get_observation(self): """态势获取""" return self.env_client.get_observation() def _run_actions(self, actions): """客户端向服务端发送指令""" self.env_client.take_action(actions) @func_timeout.func_set_timeout(0.3) def _agent_step(self, agent, sim_time, obs): actions = agent.step(sim_time, obs) return actions def _run_agents(self, observation): """调用智能体的step方法生成指令, 然后发送指令""" sim_time = observation['sim_time'] obs_red = observation['red'] obs_blue = observation['blue'] actions_red = [] actions_blue = [] for agent in self.agents: side = agent.side if side == 'red': # 获取决策指令 try: actions_red = self._agent_step(agent, sim_time, obs_red) except func_timeout.exceptions.FunctionTimedOut: print("red agent time out at this step") self.logger.debug( "在{}秒时红方智能体决策超时\n".format(sim_time)) # 记录超时信息 # 指令检查 assert actions_red is not None for action in actions_red: if self._action_validate(action, obs_red, side, sim_time): self.logger.debug("在{}秒时删除红方无效指令{}\n".format( sim_time, action)) # 记录无效指令 idx = actions_red.index(action) del actions_red[idx] # 删除掉该无效指令 else: # 获取决策指令 try: actions_blue = self._agent_step(agent, sim_time, obs_blue) except func_timeout.exceptions.FunctionTimedOut: print("blue agent time out at this step") self.logger.debug( "在{}秒时蓝方智能体决策超时\n".format(sim_time)) # 记录超时信息 # 指令检查 assert actions_blue is not None for action in actions_blue: if self._action_validate(action, obs_blue, side, sim_time): self.logger.debug("在{}秒时删除蓝方无效指令{}\n".format( sim_time, action)) # 记录无效指令 idx = actions_blue.index(action) del actions_blue[idx] # 删除掉该无效指令 self._run_actions(actions_red + actions_blue) @staticmethod def _get_done(observation): """推演是否结束""" sim_time = observation['sim_time'] obs_blue = observation['blue'] obs_red = observation['red'] cmd_posts = [u for u in obs_blue['units'] if u['LX'] == 41] blue_score = ( len([u for u in obs_blue['units'] if u['LX'] != 41]) + obs_blue['airports'][0]['NM']) / 27 * 10 + len(cmd_posts) * 30 red_score = (len( [u for u in obs_red['units'] if u['LX'] != 41 and u['LX'] != 14] ) + obs_red['airports'][0]['NM']) / 41 * 10 + (2 - len(cmd_posts)) * 30 done = [False, 0, 0] # [对战是否结束, 红方得分, 蓝方得分] # 若蓝方态势平台信息里没有指挥所, 说明两个指挥所已经被打掉 if len(cmd_posts) == 0 or sim_time >= 9000: done[0] = True if blue_score < red_score: # red win done[1:] = [1, -1] elif blue_score > red_score: # blue win done[1:] = [-1, 1] return done def _action_validate(self, action, obs_own, side, sim_time): """指令有效性基本检查""" # 检查项: 执行主体, 目标, 护航对象, 机场相关指令是否有效, # 同时检查速度设置是否越界 # 检查项: 地防/护卫舰的初始部署位置只能在己方半场, 且只允许在开局2分钟内进行调整; results = [] maintype = action['maintype'] if maintype in ['Ship_Move_Deploy', 'Ground_Move_Deploy']: pos2d = (action['point_x'], action['point_y']) results.append(self._validate_deploy(side, pos2d, sim_time)) if 'self_id' in action: speed = int(action['speed']) if 'speed' in action else None self_id = int(action['self_id']) results.append( self._validate_self_id(maintype, self_id, speed, obs_own)) if 'target_id' in action: target_id = int(action['target_id']) results.append(self._validate_target_id(target_id, obs_own)) if 'cov_id' in action: cov_id = int(action['cov_id']) results.append(self._validate_cov_id(cov_id, obs_own)) if 'airport_id' in action: airport_id = int(action['airport_id']) speed = int(action['speed']) if 'speed' in action else None results.append( self._validate_airport(airport_id, speed, action, obs_own)) # 返回指令检查结果 return True if any(results) else False # 返回True说明发现错误 @staticmethod def _validate_deploy(side, pos2d, sim_time): if sim_time > 120 or (side == 'red' and pos2d[0] < 0) or (side == 'blue' and pos2d[0] > 0): print("护卫舰初始部署指令无效") return True # 返回True, 说明指令检查发现错误(下同) return False def _validate_self_id(self, maintype, self_id, speed, obs_own): """判断执行主体是否有效""" # 空中拦截指令(make_airattack)执行主体只能是单平台(需要选手考虑目标分配问题) # 返航指令(make_returntobase)执行主体可以是单平台也可以是编队(考虑编队内单机油量不足提前返航) # 其他指令执行主体原则上必须是编队, 为防止出错目前服务端也支持给单平台下指令. unit = [u for u in obs_own['units'] if u['ID'] == self_id] team = [u for u in obs_own['teams'] if u['TMID'] == self_id] if maintype == 'airattack': if len(unit) == 0: print("无效平台编号%s" % self_id) return True else: if len(unit) == 0 and len(team) == 0: print("无效执行主体编号%s" % self_id) return True else: obj = unit[0] if len(unit) > 0 else team[0] if obj['LX'] not in type4cmd[maintype]: print("类型为%s的平台或者编队%s无法执行%s指令" % (obj['LX'], self_id, maintype)) return True # 检查速度设置是否越界 if speed is not None and self._validate_speed( obj['LX'], speed): return True return False @staticmethod def _validate_target_id(target_id, obs_own): """判断目标编号是否合法""" # 所有指令的目标编号, 必须是敌方单平台号 unit = [u for u in obs_own['qb'] if u['ID'] == target_id] if len(unit) == 0: print("无效目标平台编号%s" % target_id) return True return False @staticmethod def _validate_cov_id(cov_id, obs_own): """护航对象是否有效""" # 护航对象编号, 必须为己方编队号 team = [u for u in obs_own['teams'] if u['TMID'] == cov_id] if len(team) == 0: print("无效护航对象编号%s" % cov_id) return True else: type4cov = [12, 13, 15] if int(team[0]['LX']) not in type4cov: print("非法护航目标类型%s" % team[0]['LX']) return True return False def _validate_airport(self, airport_id, speed, action, obs_own): """判断机场相关指令的有效性""" airports = obs_own['airports'] airport = [u for u in airports if u['ID'] == airport_id] # 根据机场情况判断指令合法性 maintype = action['maintype'] if len(airport) == 0: print("无效机场编号%s" % airport_id) return True elif maintype == 'returntobase': return False else: obj = airport[0] if obj['DA'] > 0: print("机场%s修复中无法执行起飞指令" % obj['ID']) return True if maintype == 'takeoffprotect': fly_type = 11 # 起飞护航指令默认起飞歼击机 elif maintype in ['takeoffareahunt', 'takeofftargethunt']: fly_type = 15 # 起飞突击类指令默认起飞轰炸机 else: fly_type = action['fly_type'] fly_num = action['fly_num'] if int(fly_type) not in type4cmd[maintype]: print("机场无法起降类型为%s的单位" % fly_type) return True type_map = {11: 'AIR', 12: 'AWCS', 13: 'JAM', 14: 'UAV', 15: 'BOM'} attr = type_map[fly_type] if fly_num > obj[attr]: print('指令>>>', action) print("起飞数量%d大于机场可起飞数量%d" % (fly_num, obj[attr])) return True # 检查速度设置是否越界 if speed is not None and self._validate_speed(fly_type, speed): return True return False @staticmethod def _validate_speed(unit_type, speed): """判断速度设置是否越界(单位: m/s),范围适当放宽""" speed_range = { 11: [100, 300], # 歼击机速度约为900-1000km/h 12: [100, 250], # 预警机速度约为600-800km/h 13: [100, 250], # 预警机速度约为600-800km/h 14: [50, 100], # 无人机速度约为180-350km/h 15: [100, 250], # 轰炸机速度约为600-800km/h 21: [0, 20], # 舰船速度约为0-30节(白皮书书写有误), 等价于0-54km/h 31: [0, 30] # 地防速度约为0-90km/h(白皮书书写有误) } sp_limit = speed_range[unit_type] if speed < sp_limit[0] or speed > sp_limit[1]: print("类型为%d的单位速度%d越界" % (unit_type, speed)) return True return False
class Env(object): def __init__(self, config): self.env_id = config['server_host'] + 1 self.server_port = config['server_port'] + self.env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] self.max_game_time = self.config.get('max_game_time', 7200) self.sim_speed = self.config.get('sim_speed', 20) random.seed(os.getpid() + self.env_id) scen_name = self.config['scen_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] self.env_manager = EnvManager(self.env_id, self.server_port, scen_name, prefix, image_name=image_name) self.env_client = EnvClient('127.0.0.1', self.env_manager.get_server_port()) self.last_time = 0 def __del__(self): self.env_manager.stop_docker() def reset(self): while True: self.env_manager.stop_docker() # time.sleep(random.randint(0, 20)) self.env_manager.start_docker(self.volume_list) # time.sleep(10) success = self.env_client.connect_server() if success == True: self.env_client.take_action( [EnvCmd.make_simulation("SPEED", "", self.sim_speed)]) self.last_time = 0 break self._run_env() ob = self.env_client.get_observation() return ob def step(self, commands): for name, command in commands.items(): self.env_client.take_action(command) self._run_env() ob = self.env_client.get_observation() done = self._get_done(ob) return ob, done def _get_done(self, observation): curr_time = observation['sim_time'] return curr_time >= self.max_game_time def _run_env(self, request_period=5): new_time = self.env_client.get_time() self.env_client.take_action([EnvCmd.make_simulation("RESUME", "", "")]) while new_time - self.last_time < request_period: time.sleep(0.01) new_time = self.env_client.get_time() self.last_time = new_time self.env_client.take_action( [EnvCmd.make_simulation("PAUSE", "", new_time)])
class EnvRunner(object): def __init__(self, env_id, server_port, agents, config): """对战环境初始化""" # env_id为仿真环境编号; server_port为基准端口号, 由此生成映射到容器内部各端口的宿主机端口号 # agents为描述对战双方智能体的字典; config为仿真环境的基本配置信息(想定、镜像名等) super().__init__() random.seed(os.getpid() + env_id) self.env_id = env_id self.server_port = server_port + env_id self.config = config self.volume_list = self.config['volume_list'] self.max_game_len = self.config['max_game_len'] scene_name = self.config['scene_name'] prefix = self.config['prefix'] image_name = self.config['image_name'] # 构建管理本地容器化仿真环境的实例对象 self.env_manager = EnvManager(self.env_id, server_port, scene_name, prefix, image_name=image_name) self.env_client = None self.agents_conf = agents self.agents = self._init_agents() def __del__(self): # self.env_manager.stop_docker() # 停止并删除旧环境 pass def _start_env(self): """启动仿真环境""" # 查找是否存在同名的旧容器, 有的话先删除再启动新环境 docker_name = 'env_{}'.format(self.env_id) docker_search = 'docker ps -a --filter name=^/{}$'.format(docker_name) # 捕获终端输出结果 p = subprocess.Popen(docker_search, stdout=subprocess.PIPE, shell=True) out, err = p.communicate() # decode()将bytes对象转成str对象, strip()删除头尾字符空白符 # split()默认以分隔符, 包括空格, 换行符\n, 制表符\t来对字符串进行分割 out_str = out.decode() str_split = out_str.strip().split() if docker_name in str_split: print('存在同名旧容器,先删除之\n', out_str) self.env_manager.stop_docker() self.env_manager.start_docker(self.volume_list) # 启动新环境 time.sleep(10) def _init_agents(self): """根据配置信息构建红蓝双方智能体""" agents = [] for name, agent_conf in self.agents_conf.items(): cls = agent_conf['class'] agent = cls(name, agent_conf) agents.append(agent) return agents def _reset(self): """智能体重置""" for agent in self.agents: agent.reset() def _run_env(self): pass def _get_observation(self): """态势获取""" return self.env_client.get_observation() def _run_actions(self, actions): """客户端向服务端发送指令""" self.env_client.take_action(actions) def _run_agents(self, observation): """调用智能体的step方法生成指令, 然后发送指令""" for agent in self.agents: side = agent.side sim_time = observation['sim_time'] obs_red = observation['red'] obs_blue = observation['blue'] if side == 'red': actions = agent.step(sim_time, obs_red) self._action_validate(actions, obs_red, side, sim_time) self._run_actions(actions) else: actions = agent.step(sim_time, obs_blue) self._action_validate(actions, obs_blue, side, sim_time) self._run_actions(actions) @staticmethod def _get_done(observation): """推演是否结束""" sim_time = observation['sim_time'] obs_blue = observation['blue'] cmd_posts = [u for u in obs_blue['units'] if u['LX'] == 41] done = [False, 0, 0] # [对战是否结束, 红方得分, 蓝方得分] # 若蓝方态势平台信息里没有指挥所, 说明两个指挥所已经被打掉 if len(cmd_posts) == 0 or sim_time >= 9000: done[0] = True if len(cmd_posts) == 0: # red win done[1:] = [1, -1] elif len(cmd_posts) == 1: # tie done[1:] = [0, 0] elif len(cmd_posts) == 2: # blue win done[1:] = [-1, 1] return done def _action_validate(self, actions, obs_own, side, sim_time): """指令有效性基本检查""" # 检查项: 执行主体, 目标, 护航对象, 机场相关指令是否有效 # 检查项: 地防/护卫舰的初始部署位置只能在己方半场, 且只允许在开局2分钟内进行调整 for action in actions: maintype = action['maintype'] if maintype in ['Ship_Move_Deploy', 'Ground_Move_Deploy']: pos2d = (action['point_x'], action['point_y']) self._validate_deploy(side, pos2d, sim_time) if 'self_id' in action: self_id = int(action['self_id']) self._validate_self_id(maintype, self_id, obs_own) if 'target_id' in action: target_id = int(action['target_id']) self._validate_target_id(target_id, obs_own) if 'cov_id' in action: cov_id = int(action['cov_id']) self._validate_cov_id(cov_id, obs_own) if 'airport_id' in action: airport_id = int(action['airport_id']) self._validate_airport(airport_id, action, obs_own) @staticmethod def _validate_deploy(side, pos2d, sim_time): assert sim_time <= 120 if side == 'red': assert pos2d[0] >= 0 else: assert pos2d[0] <= 0 @staticmethod def _validate_self_id(maintype, self_id, obs_own): """判断执行主体是否有效""" # 空中拦截指令(make_airattack)执行主体只能是单平台(需要选手考虑目标分配问题) # 返航指令(make_returntobase)执行主体可以是单平台也可以是编队(考虑编队内单机油量不足提前返航) # 其他指令执行主体原则上必须是编队, 为防止出错目前服务端也支持给单平台下指令. unit = [u for u in obs_own['units'] if u['ID'] == self_id] team = [u for u in obs_own['teams'] if u['TMID'] == self_id] if maintype == 'airattack': if len(unit) == 0: raise Exception("无效平台编号%s" % self_id) else: if len(unit) == 0 and len(team) == 0: raise Exception("无效执行主体编号%s" % self_id) else: obj = unit[0] if len(unit) > 0 else team[0] if obj['LX'] not in type4cmd[maintype]: raise Exception("类型为%s的编队%s无法执行%s指令" % (obj[0]['LX'], self_id, maintype)) @staticmethod def _validate_target_id(target_id, obs_own): """判断目标编号是否合法""" # 所有指令的目标编号, 必须是敌方单平台号 unit = [u for u in obs_own['qb'] if u['ID'] == target_id] if len(unit) == 0: raise Exception("无效目标平台编号%s" % target_id) @staticmethod def _validate_cov_id(cov_id, obs_own): """护航对象是否有效""" # 护航对象编号, 必须为己方编队号 team = [u for u in obs_own['teams'] if u['TMID'] == cov_id] if len(team) == 0: raise Exception("无效护航对象编号%s" % cov_id) else: type4cov = [12, 13, 15] if int(team[0]['LX']) not in type4cov: raise Exception("非法护航目标类型%s" % team[0]['LX']) @staticmethod def _validate_airport(airport_id, action, obs_own): """判断机场相关指令的有效性""" airports = obs_own['airports'] airport = [u for u in airports if u['ID'] == airport_id] # 根据机场情况判断指令合法性 maintype = action['maintype'] if len(airport) == 0: raise Exception("无效机场编号%s" % airport_id) elif maintype == 'returntobase': pass else: obj = airport[0] if not obj['WH']: raise Exception("机场%s修复中无法执行起飞指令" % obj['ID']) if maintype == 'takeoffprotect': fly_type = 11 # 起飞护航指令默认起飞歼击机 elif maintype in ['takeoffareahunt', 'takeofftargethunt']: fly_type = 15 # 起飞突击类指令默认起飞轰炸机 else: fly_type = action['fly_type'] fly_num = action['fly_num'] if int(fly_type) not in type4cmd[maintype]: raise Exception("机场无法起降类型为%s的单位" % fly_type) type_map = {11: 'AIR', 12: 'AWCS', 13: 'JAM', 14: 'UAV', 15: 'BOM'} attr = type_map[fly_type] if fly_num > obj[attr]: print('指令>>>', action) raise Exception("起飞数量%d大于机场可起飞数量%d" % (fly_num, obj[attr]))