def main(): take_off() print("move to {:},{:}".format(target_x, target_y)) move(target_x, target_y) print("Ready to be controlled from keyboard") rospy.Subscriber("key_value", String, key_callback) print("Get ready in:") util.count_down(5) while (out_flag == 0): global target_x global target_y global key_flag if (key_flag > 0 and transition_flag == 1): if (key_flag == 1): print("move to {:},{:}".format(target_x, target_y)) move(target_x, target_y) if (key_flag == 2): print("Yaw rotation") rotate_right_yaw() print(target_x, target_y, out_flag, key_flag) global key_flag key_flag = 0 drone_land()
def take_off(): # Taking off with height: ~1.5m start_pos = get_telemetry() print(start_pos) print("Height: ~{:}m".format(VIEW_HEIGHT)) navigate(z=VIEW_HEIGHT, speed=SPEED, frame_id='body', auto_arm=True) #util.wait_till_arrive(get_telemetry, 0, 0, start_pos.z+VIEW_HEIGHT, 'map',1) util.count_down(4)
def drone_land(): print("Landing") res = land() if (res.success): print("Drone has been landed") util.count_down(5) #to ensure that it is landed already #there is a problem in landing that it reads that it lands already and it is not arming(False)
def rotate_right_yaw(): print("Rotate to: {:} Degree -> {:} Rad".format( target_yaw * 90, target_yaw * ((math.pi) / 2.0))) navigate(x=target_x, y=target_y, z=VIEW_HEIGHT, yaw=target_yaw * ((math.pi) / 2.0), speed=SPEED, frame_id='aruco_map') util.count_down(4)
def move(target_x, target_y): #current_pos = get_telemetry(frame_id='aruco_map') navigate(x=target_x, y=target_y, z=VIEW_HEIGHT, yaw=float('nan'), speed=SPEED, frame_id='aruco_map') #util.wait_till_arrive(get_telemetry, target_x, target_y, current_pos.z) util.count_down(4)
def drone_land(land, arming): print("Landing") res = land() # Start landing of the drone #util.wait_till_arrive(telemetry, 0, 0, 0, 'body') # If the a landing is success if(res.success): print("Drone has been landed") util.count_down(5) #to ensure that it is landed already #there is a problem in landing that it reads that it lands already and it is not arming(False) # Stop the motors of the drone
for local_filepath, history_file_prefix in reversed(upload_list): # 逆序遍历,确保同一个网盘目录中,列在前面的最后才上传,从而在网盘显示时显示在最前方 total_try_count = 1 for try_index in range_from_one(total_try_count): upload_ok = uploader.upload_to_lanzouyun( local_filepath, upload_folder, history_file_prefix=history_file_prefix) if upload_ok: break logger.warning( f"第{try_index}/{total_try_count}次尝试上传{local_filepath}失败,等待一会后重试" ) if try_index < total_try_count: count_down("上传到网盘", 5 * try_index) else: logger.error("蓝奏云登录失败") # ---------------推送版本到github # 打包完成后git添加标签 os.chdir(dir_src) show_head_line(f"开始推送到github", color("bold_yellow")) push_github(version) # ---------------结束 logger.info('+' * 40) logger.info( color("bold_yellow") + f"{version} 发布完成,共用时{datetime.now() - run_start_time},请检查上传至蓝奏云流程是否OK")
def release(): # ---------------准备工作 prompt = f"如需直接使用默认版本号:{now_version} 请直接按回车\n或手动输入版本号后按回车:" version = input(prompt) or now_version version_reg = r"\d+\.\d+\.\d+" if re.match(version_reg, version) is None: logger.info(f"版本号格式有误,正确的格式类似:1.0.0 ,而不是 {version}") pause_and_exit(-1) # 最大化窗口 change_console_window_mode_async(disable_min_console=True) version = "v" + version run_start_time = datetime.now() show_head_line(f"开始发布版本 {version}", color("bold_yellow")) set_title_cmd = f"title 发布 {version}" os.system(set_title_cmd) # 先声明一些需要用到的目录的地址 dir_src = os.path.realpath(".") dir_all_release = os.path.realpath(os.path.join("releases")) release_dir_name = f"DNF蚊子腿小助手_{version}_by风之凌殇" release_7z_name = f"{release_dir_name}.7z" dir_github_action_artifact = "_github_action_artifact" # ---------------构建 # 调用构建脚本 os.chdir(dir_src) build() # ---------------清除一些历史数据 make_sure_dir_exists(dir_all_release) os.chdir(dir_all_release) clear_github_artifact(dir_all_release, dir_github_action_artifact) # ---------------打包 os.chdir(dir_src) package(dir_src, dir_all_release, release_dir_name, release_7z_name, dir_github_action_artifact) # ---------------构建增量补丁 create_patch_for_latest_n_version = 3 # ---------------构建增量包 os.chdir(dir_all_release) show_head_line(f"开始构建增量包,最多包含过去{create_patch_for_latest_n_version}个版本到最新版本的补丁", color("bold_yellow")) create_patch(dir_src, dir_all_release, create_patch_for_latest_n_version, dir_github_action_artifact) # ---------------获取补丁地址(分开方便调试) os.chdir(dir_all_release) patch_file_name = create_patch( dir_src, dir_all_release, create_patch_for_latest_n_version, dir_github_action_artifact, get_final_patch_path_only=True, ) # ---------------标记新版本 show_head_line("提交版本和版本变更说明,并同步到docs目录,用于生成github pages", color("bold_yellow")) os.chdir(dir_src) commit_new_version() # ---------------上传到蓝奏云 show_head_line("开始上传到蓝奏云", color("bold_yellow")) os.chdir(dir_src) with open("upload_cookie.json") as fp: cookie = json.load(fp) os.chdir(dir_all_release) uploader = Uploader() uploader.login(cookie) if uploader.login_ok: logger.info("蓝奏云登录成功,开始上传压缩包") def path_in_src(filepath_relative_to_src: str) -> str: return os.path.realpath(os.path.join(dir_src, filepath_relative_to_src)) realpath = os.path.realpath upload_info_list = [ ( uploader.folder_djc_helper, [ (realpath(release_7z_name), uploader.history_version_prefix), (path_in_src("utils/auto_updater.exe"), ""), (path_in_src("使用教程/使用文档.docx"), ""), (path_in_src("使用教程/视频教程.txt"), ""), (path_in_src("付费指引/付费指引.docx"), ""), (path_in_src("utils/不要下载增量更新文件_这个是给自动更新工具使用的.txt"), ""), (realpath(patch_file_name), uploader.history_patches_prefix), ], ), ( uploader.folder_dnf_calc, [ (realpath(release_7z_name), uploader.history_version_prefix), ], ), ] logger.info(color("bold_green") + "具体上传列表如下:") for upload_folder, upload_list in upload_info_list: logger.info(color("bold_cyan") + f"\t{upload_folder.name}:") for local_filepath, _history_file_prefix in upload_list: logger.info(f"\t\t{local_filepath}") logger.info("\n") for upload_folder, upload_list in upload_info_list: for local_filepath, history_file_prefix in reversed(upload_list): # 逆序遍历,确保同一个网盘目录中,列在前面的最后才上传,从而在网盘显示时显示在最前方 total_try_count = 1 for try_index in range_from_one(total_try_count): upload_ok = uploader.upload_to_lanzouyun( local_filepath, upload_folder, history_file_prefix=history_file_prefix ) if upload_ok: break logger.warning(f"第{try_index}/{total_try_count}次尝试上传{local_filepath}失败,等待一会后重试") if try_index < total_try_count: count_down("上传到网盘", 5 * try_index) else: logger.error("蓝奏云登录失败") # ---------------推送版本到github # 打包完成后git添加标签 os.chdir(dir_src) show_head_line("开始推送到github", color("bold_yellow")) push_github(version) # ---------------结束 logger.info("+" * 40) logger.info(color("bold_yellow") + f"{version} 发布完成,共用时{datetime.now() - run_start_time},请检查上传至蓝奏云流程是否OK") logger.info("+" * 40) os.system("PAUSE")
def train(): counter = 1 net = RNNResnet().cuda() train_parameters = [ param for param in net.parameters() if param.requires_grad == True ] optimizer = optim.SGD(train_parameters, lr=0.001, momentum=0.9) # 倒计时 count_down(1) # 倒计时 两秒钟 buffer = [] # 记录回放 max_buff_num = 500 # 记录回访最大容量 act_batch_num = 20 # 每次训练之前在线执行次数 goodEpisode = 0 # 成功完成回合的次数 badEposode = 0 # 回合失败的次数 learn_batch_num = 24 # 每次训练时batch的大小 num_step_to_check_trapped = 6 # 没 6 次检查车轮是否空转(trapped) speeds = [] # 车辆速度列表 长度为 num_step_to_check_trapped wheel_speeds = [] # 车轮速度列表 长度为 num_step_to_check_trapped gamma = 0.97 # reward discount n = 16 # 对这样长的序列计算 discount 的 reward least_buffer_for_learning = learn_batch_num + n # buffer里面至少有这么多才进行训练 len_rnn_seq = 3 # 进入 rnn 网络的序列长度 previous_dataes = [] # 用于上一帧的预测的 data 列表 ,长度为 len_rnn_seq previous_xes = [] # 用于上一帧的预测的 截屏(x) 列表 ,长度为 len_rnn_seq previous_infoes = [] # 用于上一帧的预测的 info(辅助信息) 列表 ,长度为 len_rnn_seq dataes = [] # 用于这一帧的预测的 data 列表 ,长度为 len_rnn_seq xes = [] # 用于这一帧的预测的 截屏(x) 列表 ,长度为 len_rnn_seq infoes = [] # 用于这一帧的预测的 info(辅助信息) 列表 ,长度为 len_rnn_seq num_step_from_last_down = 0 # 距离上次项目结束已经有多少个step 了(不是counter) max_num_step_a_episode = 1500 while True: print("counter " + str(counter)) for i in range(act_batch_num): # 每一个counter 运行 act_batch_num 次 if len(dataes) > 0: if len(previous_dataes) >= len_rnn_seq: previous_dataes.pop() # 去掉最旧的数据(第一个数据) previous_dataes.append(data) # 新数据放在最后 if len(previous_xes) >= len_rnn_seq: previous_xes.pop() previous_xes.append(x) if len(previous_infoes) >= len_rnn_seq: previous_infoes.pop() previous_infoes.append(info) with torch.no_grad(): # 仅预测,不训练 actions, state = net.forward( torch.cat(previous_xes, dim=0), torch.cat(previous_infoes, dim=0)) rand_num = random.random() if rand_num > 0.9: # 10 % 概率随机探索 actions = random_action() control_by_vec(list(actions[0]), duration=0.2) # 执行动作 else: control_by_vec(list(actions[0].cpu().data.numpy()), duration=0.2) # 执行动作 time.sleep(0.1) # 游戏执行动作的时间 x = getScreenPreprocess() # 动作执行之后的图像 if len(xes) >= len_rnn_seq: xes.pop() xes.append(x) data, info = retriveInputAgainstException() # 动作执行之后的游戏数据 if len(dataes) >= len_rnn_seq: dataes.pop() dataes.append(info) if len(infoes) >= len_rnn_seq: infoes.pop() infoes.append(info) else: # 第一次执行预测,没有数据可输入网络,所以随机执行动作 actions = random_action() control_by_vec(actions[0], duration=0.2, with_shift=False) time.sleep(0.1) state = 0 x = getScreenPreprocess() # 动作执行之后的图像 data, info = retriveInputAgainstException() # 动作执行之后的游戏数据 xes.append(x) dataes.append(info) infoes.append(info) reward = getReward(data, previous_dataes) # 本次动作的reward down = False speeds.append(data.car.Speed) # 车辆速度 wheel_speeds.append(data.car.WheelSpeed) # 车轮速度 if len(speeds) > num_step_to_check_trapped: # 检车车辆是否控制 max_speed = max(speeds) # 最大速度 mean_wheel_speed = sum(wheel_speeds) / \ len(wheel_speeds) # 平均车轮速度 if max_speed < 1 and mean_wheel_speed > 6: # 空转(撞上车辆/墙壁等) reward = -5 # 空转的reward负数 down = True # 本回合失败 speeds.clear() # 清空列表 wheel_speeds.clear() # 清空列表 # 车辆位置到起点终点直线的距离太大,表示偏离航线 if getDisctanceFromPointToLine(data.startPosition, data.endPosition, data.car.Position) > 35: reward = -5 # 偏离航线负分 down = True # 本回合失败 if distance(data.car.Position, data.endPosition) > 220: reward = -10 # 远离终点负分 down = True # 本回合失败 if distance(data.car.Position, data.endPosition) < 20: reward = 25 # 到达终点高分 down = True # 回合成功 reward = reward / 10 if (len(dataes) >= len_rnn_seq) \ and (len(previous_dataes) >= len_rnn_seq) \ and (len(previous_xes) >= len_rnn_seq)\ and (len(xes) >= len_rnn_seq)\ and (len(infoes) >= len_rnn_seq)\ and (len(previous_infoes) >= len_rnn_seq): # 序列的长度达到了可训练的标准 record = Record() # 必须将tensor转换为 numpy保存,否则先前的计算图不会被清理,将导致内存溢出 record.p_xes = [x.cpu().data.numpy() for x in previous_xes] record.p_infoes = [ info.cpu().data.numpy() for info in previous_infoes ] record.xes = [x.cpu().data.numpy() for x in xes] record.state = state.cpu().data.numpy() record.reward = reward record.infoes = [info.cpu().data.numpy() for info in infoes] record.down = down record.actions = actions.cpu().data.numpy() if isinstance( actions, torch.Tensor) else actions logger.info("r: {};actions: {}; down: {}".format( reward, record.actions, down)) if len(buffer) >= max_buff_num: buffer.pop() # 去掉最旧的数据 buffer.append(record) # 加上最新的数据 disToEnd = distance(data.endPosition, data.car.Position) # 到终点的距离 print("reward at {} is {}, distance2end {}".format( counter, reward, disToEnd)) if num_step_from_last_down >= max_num_step_a_episode: down = True # 跑了很多步也到不了终点,应当判失败 if down: # 如果回合结束(成功或失败) backToStartPoint() # 回到起点 print("back to start at step {}".format(counter)) logger.info("back to start at step {}".format(counter)) time.sleep(1) if down and reward >= 10: # 回合成功 goodEpisode += 1 print("aowsome agent finitshed the game") logger.info("aowsome agent finitshed the game") break if down and reward < 0: # 回合失败 badEposode += 1 print("too bad agent crash the game") logger.info("too bad agent crash the game") break if down: num_step_from_last_down = 0 if not down: num_step_from_last_down += 1 # loss and learn if len(buffer) > least_buffer_for_learning: # 如果buffer 的 长度达到学习的标准 print("step {} learn".format(counter)) down_index = -1 # 在buffer里面有没有回合结束的帧,如果没有就是 -1 for i, record in enumerate(buffer): if record.down == True: down_index = i break if down_index != -1 and down_index < learn_batch_num: # 如果回合结束的帧,并在本次训练范围之内, 那么结束帧后面的数据不进入训练 learn_batch_num = down_index rewards = [] for i, record in enumerate(buffer[:learn_batch_num]): # 每次选取buffer 的前 learn_batch_num 个数据进行训练 for j, next_record in enumerate(buffer[i + 1:i + n]): # 使用 discount 计算 target reward if next_record.down == False: reward += gamma**(j + 1) * next_record.reward else: break rewards.append(reward) _p_xes = torch.cat( [ torch.tensor(np.array(x), dtype=dtype, device=device) for record in buffer[:learn_batch_num] for x in record.p_xes ], dim=0 ) # shape (learn_batch_num,len_rnn_seq) => (learn_batch_num * len_rnn_seq,) _p_infoes = torch.cat( [ torch.tensor(np.array(info), dtype=dtype, device=device) for record in buffer[:learn_batch_num] for info in record.p_infoes ], dim=0 ) # shape (learn_batch_num,len_rnn_seq) => (learn_batch_num * len_rnn_seq,) _xes = torch.cat( [ torch.tensor(np.array(x), dtype=dtype, device=device) for record in buffer[:learn_batch_num + n] for x in record.xes ], dim=0 ) # shape (learn_batch_num + n,len_rnn_seq) => ((learn_batch_num + n) * len_rnn_seq,) _infoes = torch.cat( [ torch.tensor(np.array(info), dtype=dtype, device=device) for record in buffer[:learn_batch_num + n] for info in record.infoes ], dim=0 ) # shape (learn_batch_num + n,len_rnn_seq) => ((learn_batch_num + n) * len_rnn_seq,) p_policise, p_states = net.forward(_p_xes, _p_infoes) _, states = net.forward(_xes, _infoes) _states = [] for i, record in enumerate(buffer[:learn_batch_num]): state = states[i] for j, next_record in enumerate(buffer[i + 1:i + n]): # 使用 discount 计算 n 步 后的target state(值函数) if next_record.down == False: state += gamma**(j + 1) * states[i + j] else: break _states.append(state) rewards = torch.tensor( [[torch.tensor(reward, dtype=dtype, device=device)] for reward in rewards], dtype=dtype, device=device) states = torch.stack(_states) yes = rewards + states state_loss = torch.mean(torch.pow(p_states - yes, 2)) advantages = -p_states max_policy, _ = torch.max(p_policise, 1) min_policy, _ = torch.min(p_policise, 1) smaller_than_0_index = (max_policy < 0) # max_policy 中 < 0 的索引 smaller_than_0_index = smaller_than_0_index.float() max_policy = max_policy - smaller_than_0_index * \ min_policy # 如果有 < 0,那么 max_policy -= 1 * min_policy policise_loss = torch.mean(torch.log(max_policy) * advantages) print("policy loss {}, state loss {}".format( policise_loss.cpu().data.numpy(), state_loss.cpu().data.numpy())) loss = state_loss + policise_loss # 总的loss optimizer.zero_grad() # 梯度清零 loss.backward() # 梯度计算 torch.nn.utils.clip_grad_norm_(train_parameters, 0.5) # 梯度清理 optimizer.step() # 梯度下降 # record counter += 1 if counter % 100 == 0 and counter != 0: # 保存 state_dict torch.save( net.state_dict(), "./saved_model/AC_resnet_{}_state_dict.pt".format(counter)) print("model saved in {} iterations".format(counter)) logger.info("model saved in {} iterations".format(counter))
move(target_x, target_y) print("Ready to be controlled from keyboard") rospy.Subscriber("key_value", String, key_callback) print("Get ready in:") util.count_down(5) while (out_flag == 0): global target_x global target_y global key_flag if (key_flag > 0 and transition_flag == 1): if (key_flag == 1): print("move to {:},{:}".format(target_x, target_y)) move(target_x, target_y) if (key_flag == 2): print("Yaw rotation") rotate_right_yaw() print(target_x, target_y, out_flag, key_flag) global key_flag key_flag = 0 drone_land() if __name__ == '__main__': print("Starting the task after:") util.count_down(3) print("--------------Start--------------") main() print("--------------Done--------------")