def main(): hillshade_dir = os.path.join(work_dir,'hillshade_sub_images') dem_slope_8bit_dir = os.path.join(work_dir,'dem_slope_8bit_sub_images') dem_relative_8bit_dir = os.path.join(work_dir,'dem_relative_8bit_sub_images') other_dirs = [dem_slope_8bit_dir,dem_relative_8bit_dir] other_dirs_tifs = [ io_function.get_file_list_by_ext('.tif', o_dir, bsub_folder=True) for o_dir in other_dirs] json_list = io_function.get_file_list_by_ext('.json', hillshade_dir, bsub_folder=True) json_base_list = [os.path.basename(item) for item in json_list] for json_path, base_name in zip(json_list, json_base_list): date_str, poly_num = get_date_str_poly_num(base_name) for tif_list in other_dirs_tifs: for tif in tif_list: name_noext = io_function.get_name_no_ext(tif) if date_str in name_noext and poly_num in name_noext: # modify and save the json file dst_path = os.path.join(os.path.dirname(tif), name_noext+'.json') # io_function.copy_file_to_dst(json_path,dst_path) data_dict = io_function.read_dict_from_txt_json(json_path) data_dict['imagePath'] = os.path.basename(tif) data_dict['imageData'] = None io_function.save_dict_to_txt_json(dst_path, data_dict) print('saving %s'%dst_path) break pass
def boxes_minXYmaxXY_to_imageXY(idx, total, json_file, ref_image_src): # return: class_id_list, name_list, confidence,box_poly_list # ref_image_src is open rasterio image object. objects = io_function.read_dict_from_txt_json(json_file) if objects is None or len(objects) < 1: return [], [], [], [] class_id_list = [] name_list = [] confidence_list = [] box_poly_list = [] transform = ref_image_src.transform for object in objects: [xmin, ymin, xmax, ymax] = object['bbox'] class_id_list.append(object['class_id']) name_list.append(object['name']) confidence_list.append(object['confidence']) x0_geo, y0_geo = pixel_xy_to_geo_xy(xmin, ymin, transform) x1_geo, y1_geo = pixel_xy_to_geo_xy(xmax, ymax, transform) # minX, minY, maxX, maxY that is: bounds # because Y direction in geo is opposite to the in pixel, so y0_geo > y1_geo box_poly_list.append([x0_geo, y1_geo, x1_geo, y0_geo]) return class_id_list, name_list, confidence_list, box_poly_list
def plot_miou_loss_main(txt_path, save_file_pre=None, train_count=None, val_count=None,batch_size=None): ''' plot miou or loss curve :param txt_path: :param save_file_pre: :param train_count: the number of training samples :param val_count: the number of validation samples :param batch_size: t :return: ''' if os.path.isfile(txt_path) is False: return False if save_file_pre is None: file_name = os.path.splitext(os.path.basename(txt_path))[0] else: file_name = save_file_pre save_dir = os.path.dirname(txt_path) dict_data = io_function.read_dict_from_txt_json(txt_path) # print(dict_data) # for key in dict_data.keys(): # print(key) save_path = os.path.join(save_dir, file_name + '.jpg') if 'miou' in file_name: plot_miou_step_time(dict_data, save_path, train_count, val_count,batch_size) elif 'loss' in file_name: plot_loss_learnRate_step_time(dict_data, save_path,train_count, val_count,batch_size) else: raise ValueError('Cannot recognize the file name of miou of loss: %s'%os.path.basename(txt_path)) return save_path
def get_time_other_info_from_tune(work_dir, train_output): res_json = os.path.join(work_dir, 'result.json') if os.path.isfile(res_json) and os.stat(res_json).st_size > 0: tune_res_dict = io_function.read_dict_from_txt_json(res_json) train_output['time_total_h'].append(tune_res_dict['time_total_s'] / 3600.0) else: train_output['time_total_h'].append(0)
def merge_patch_json_files_to_one(res_json_files, save_path): all_objects = [] for idx, f_json in enumerate(res_json_files): objects = io_function.read_dict_from_txt_json(f_json) if len(objects) < 1: continue all_objects.extend(objects) json_data = json.dumps(all_objects, indent=2) with open(save_path, "w") as f_obj: f_obj.write(json_data)
def update_subset_info(txt_path, key_list=None, info_list=None): # maintain a info of subset for processing, dict # id: subset_id # shp: the shapefile contain all grids in this subset # "pre_status": the status of downloading and registration of ArcticDEM, has values: 'notYet', 'working', 'done' # 'proc_status': the status of processing ArcticDEM, has values of 'notYet', 'working', 'done' info_dict = {} if os.path.isfile(txt_path): info_dict = io_function.read_dict_from_txt_json(txt_path) if isinstance(key_list, str): key_list = [key_list] if isinstance(info_list, str): info_list = [info_list] for key, info in zip(key_list, info_list): info_dict[key] = info io_function.save_dict_to_txt_json(txt_path, info_dict)
def get_miou_of_overall_and_class_1_step(work_dir,para_file,train_output): exp_name = parameters.get_string_parameters(os.path.join(work_dir,para_file), 'expr_name') miou_path = os.path.join(work_dir,exp_name,'eval','miou.txt') if os.path.isfile(miou_path) is False: print("warning, no miou.txt in %s"%work_dir) train_output['class_1'].append(0) train_output['overall'].append(0) train_output['step'].append(0) return False iou_dict = io_function.read_dict_from_txt_json(miou_path) train_output['class_1'].append(iou_dict['class_1'][-1]) train_output['overall'].append(iou_dict['overall'][-1]) train_output['step'].append(iou_dict['step'][-1]) return True
def get_overall_miou(miou_path): import basic_src.io_function as io_function # exp8/eval/miou.txt iou_dict = io_function.read_dict_from_txt_json(miou_path) return iou_dict['overall'][-1]
def yolo_results_to_shapefile(curr_dir, img_idx, area_save_dir, nms_overlap_thr, test_id): img_save_dir = os.path.join(area_save_dir, 'I%d' % img_idx) res_yolo_json = img_save_dir + '_result.json' res_json_files = [] if os.path.isfile(res_yolo_json): print('found %s in %s, will get shapefile from it' % (res_yolo_json, area_save_dir)) else: if os.path.isdir(img_save_dir): res_json_files = io_function.get_file_list_by_ext( '.json', img_save_dir, bsub_folder=False) if len(res_json_files) < 1: print('Warning, no YOLO results in %s, skip' % (img_save_dir)) return None print( 'found %d json files for patches in %s, will get shapefile from them' % (len(res_json_files), img_save_dir)) else: print('Warning, folder: %s doest not exist, skip' % img_save_dir) return None out_name = os.path.basename(area_save_dir) + '_' + test_id # to shapefile out_shp = 'I%d' % img_idx + '_' + out_name + '.shp' out_shp_path = os.path.join(img_save_dir, out_shp) if os.path.isfile(out_shp_path): print('%s already exist' % out_shp_path) else: class_id_list = [] name_list = [] box_bounds_list = [] confidence_list = [] source_image_list = [] if len(res_json_files) < 1: # use the result in *_result.json yolo_res_dict_list = io_function.read_dict_from_txt_json( res_yolo_json) total_frame = len(yolo_res_dict_list) image1 = yolo_res_dict_list[0]['filename'] for idx, res_dict in enumerate(yolo_res_dict_list): id_list, na_list, con_list, box_list, image1 = boxes_yoloXY_to_imageXY( idx, total_frame, res_dict, ref_image=None) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_list)) else: # use the results in I0/*.json image1 = io_function.read_list_from_txt( os.path.join(area_save_dir, '%d.txt' % img_idx))[0] total_frame = len(res_json_files) # the patch numbers # only open image once with rasterio.open(image1) as src: for idx, f_json in enumerate(res_json_files): id_list, na_list, con_list, box_list = boxes_minXYmaxXY_to_imageXY( idx, total_frame, f_json, src) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_bounds_list)) if len(box_bounds_list) < 1: print('Warning, no predicted boxes in %s' % img_save_dir) return None # apply non_max_suppression # print('box_bounds_list',box_bounds_list) # print('confidence_list',confidence_list) pick_index = non_max_suppression(np.array(box_bounds_list), probs=np.array(confidence_list), overlapThresh=nms_overlap_thr, b_geo=True) # print('pick_index', pick_index) class_id_list = [class_id_list[idx] for idx in pick_index] name_list = [name_list[idx] for idx in pick_index] confidence_list = [confidence_list[idx] for idx in pick_index] box_bounds_list = [box_bounds_list[idx] for idx in pick_index] source_image_list = [source_image_list[idx] for idx in pick_index] # to polygon box_poly_list = [ vector_gpd.convert_image_bound_to_shapely_polygon(item) for item in box_bounds_list ] # box_poly_list # save to shapefile detect_boxes_dict = { 'class_id': class_id_list, 'name': name_list, 'source_img': source_image_list, 'confidence': confidence_list, "Polygon": box_poly_list } save_pd = pd.DataFrame(detect_boxes_dict) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(image1) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', ref_prj, out_shp_path) return out_shp_path
linewidth=1) ax[0].plot(train_dict['step'], train_dict['class_1'], linestyle='-', color='black', label="Rock glaciers", linewidth=0.8) ax[0].plot(train_dict['step'], train_dict['overall'], 'k-.', label="Overall", linewidth=0.8) ax[0].grid(axis='both', ls='--', alpha=0.5, lw=0.4, color='grey') ax[0].set_ylabel('Training IoU') ax2 = ax[0].twiny() #have another x-axis for time min_t, max_t = wall_time_to_relative_time(train_dict['wall_time']) ax2.set_xlim([min_t, max_t]) ax2.set_xlabel("Training time (hours)", fontsize=10) plt.savefig('/Users/huyan/Data/WKL/Plots/IoU_new.png', dpi=300) # plt.show() file_path = '/Users/huyan/Data/WKL/automapping/WestKunlun_Sentinel2_2018_westKunlun_beta_exp14_Area30k/' train_txt_path = file_path + 'westKunlun_beta_exp14_training_miou.txt' val_txt_path = file_path + 'westKunlun_beta_exp14_val_miou.txt' train_dict_data = io_function.read_dict_from_txt_json(train_txt_path) val_dict_data = io_function.read_dict_from_txt_json(val_txt_path) plot_miou_step_time(train_dict_data, val_dict_data)
def get_overall_miou(miou_path): # exp8/eval/miou.txt iou_dict = io_function.read_dict_from_txt_json(miou_path) return iou_dict['overall'][-1]
def get_subset_info(txt_path): return io_function.read_dict_from_txt_json(txt_path)
def produce_dem_products(tasks, b_remove_job_folder=True, no_slurm=False): # this function run on process node, such as curc global no_subset_to_proc subset_txt_list = get_subset_info_txt_list('proc_status', ['notYet', 'working']) if len(subset_txt_list) < 1: print( datetime.now(), 'checking times: %d: No subset (%s) for processing, wait 300 seconds' % (no_subset_to_proc, msg_file_pre + '*.txt')) time.sleep(300) no_subset_to_proc += 1 if no_subset_to_proc > 60: # if has continued waiting from 6o times (10 hours), then return Flase, will exit the while loop return False return True no_subset_to_proc = 0 # reset the count if it found a job to process # task_job_names are from parallel_processing_curc.py task_job_name = { 'dem_diff': 'demD', 'dem_headwall_grid': 'gHW', 'hillshade_headwall_line': 'dLi', 'segment': 'seg' } task_depend = { 'dem_diff': [], 'dem_headwall_grid': [], 'hillshade_headwall_line': ['dem_headwall_grid'], 'segment': ['dem_diff'] } subset_txt_list = sorted(subset_txt_list) for sub_txt in subset_txt_list: update_subset_info(sub_txt, key_list=['proc_status'], info_list=['working']) subset_info = io_function.read_dict_from_txt_json(sub_txt) ext_shp = subset_info['shp'] # submit tasks with dependencies tasks_no_depend = [ item for item in tasks if len(task_depend[item]) < 1 ] for task in tasks_no_depend: if no_slurm: res = os.system('./run_local.sh %s %s' % (ext_shp, task)) else: res = os.system('./run.sh %s %s' % (ext_shp, task)) if res != 0: sys.exit(1) time.sleep(5) # wait # submit tasks with dependencies tasks_with_depend = [ item for item in tasks if len(task_depend[item]) > 0 ] while len(tasks_with_depend) > 0: for task in tasks_with_depend: depend_tasks = task_depend[task] job_count_list = [ slurm_utility.get_submit_job_count( curc_username, job_name_substr=task_job_name[item]) for item in depend_tasks ] if sum(job_count_list) > 0: print( machine_name, datetime.now(), 'task: %s need results of task:%s whose jobs are not completed, need to wait' % (task, str(depend_tasks))) else: if no_slurm: res = os.system('./run_local.sh %s %s' % (ext_shp, task)) else: res = os.system('./run.sh %s %s' % (ext_shp, task)) if res != 0: sys.exit(1) tasks_with_depend.remove(task) # if submit, remove it time.sleep(60) # wait until all jobs finished while True: if slurm_utility.get_submit_job_count(curc_username, job_name_substr=None) > 0: print( machine_name, datetime.now(), 'wait 300 seconds until all submitted jobs are completed ') time.sleep(300) continue if b_remove_job_folder: # remove temporal folders if 'dem_diff' in tasks: os.system('rm -r dem_diff_*') if 'segment' in tasks: os.system('rm -r seg_dem_diff_*') if 'dem_headwall_grid' in tasks: os.system('rm -r extract_headwall_grid_*') if 'hillshade_headwall_line' in tasks: os.system('rm -r hillshade_newest_headwall_line_*') break # if allow grid has been submit, then marked as done, we don't check results for each grids here update_subset_info(sub_txt, key_list=['proc_status'], info_list=['done']) return True
def remove_no_need_dem_files(b_remove=True): # if os.path.isfile(grid_complete_list_txt): # completed_id_list = [int(item) for item in io_function.read_list_from_txt(grid_complete_list_txt)] # else: # print(datetime.now(), 'no complete grids') # return True # # if os.path.isfile(grid_excluded_list_txt): # exclude_id_list = [int(item) for item in io_function.read_list_from_txt(grid_excluded_list_txt)] # completed_id_list.extend(exclude_id_list) completed_id_list = get_complete_ignore_grid_ids() if len(completed_id_list) < 1: print(datetime.now(), 'no complete grids') return True if len(completed_id_list) < 1: return True completed_id_set = set(completed_id_list) # check four folders: arcticDEM_tile_tarball_dir,arcticDEM_tile_reg_tif_dir,tarball_dir,arcticDEM_reg_tif_dir strip_dem_cover_grids = io_function.read_dict_from_txt_json( strip_dem_cover_grids_txt) strip_no_need_list = [ strip for strip in strip_dem_cover_grids.keys() if set(strip_dem_cover_grids[strip]).issubset(completed_id_set) ] tile_dem_cover_grids = io_function.read_dict_from_txt_json( tile_dem_cover_grids_txt) tile_no_need_list = [ tile for tile in tile_dem_cover_grids.keys() if set(tile_dem_cover_grids[tile]).issubset(completed_id_set) ] if b_remove is False: save_list_no_need_dem_files('no_need_ArcticDEM_strip_names.txt', strip_no_need_list) save_list_no_need_dem_files('no_need_ArcticDEM_mosaic_names.txt', tile_no_need_list) else: # remove basic.outputlogMessage( 'there are %d no need strip DEM, downloaded files will be or have been removed' % len(strip_no_need_list)) for strip in strip_no_need_list: file_list = io_function.get_file_list_by_pattern( tarball_dir, strip + '*') file_list_2 = io_function.get_file_list_by_pattern( arcticDEM_reg_tif_dir, strip + '*') file_list.extend(file_list_2) if len(file_list) > 0: for path in file_list: basic.outputlogMessage('removing %s' % path) io_function.delete_file_or_dir(path) basic.outputlogMessage( 'there are %d no need tile DEM, downloaded files will be or have been removed' % len(tile_no_need_list)) for tile in tile_no_need_list: file_list = io_function.get_file_list_by_pattern( arcticDEM_tile_tarball_dir, tile + '*') file_list_2 = io_function.get_file_list_by_pattern( arcticDEM_tile_reg_tif_dir, tile + '*') file_list.extend(file_list_2) # remove slope file derived ArcticDEM (mosaic) file_list_3 = io_function.get_file_list_by_pattern( arcticDEM_tile_slope_dir, tile + '*') file_list.extend(file_list_3) if len(file_list) > 0: for path in file_list: basic.outputlogMessage('removing %s' % path) io_function.delete_file_or_dir(path)
def merge_subImages_from_gan(multi_gan_source_regions, multi_gan_regions, gan_working_dir, gan_dir_pre_name, save_image_dir, save_label_dir): ''' merge translate subimages from GAN to orginal sub_images :param multi_gan_regions: :param gan_working_dir: :param gan_dir_pre_name: :return: ''' current_dir = os.getcwd() sub_img_label_txt_noGAN, sub_img_label_txt, area_ini_sub_images_labels_dict = original_sub_images_labels_list_before_gan( ) # # get original sub-images and labels # org_sub_images = [] # org_sub_labels = [] # with open(sub_img_label_txt_noGAN) as txt_obj: # line_list = [name.strip() for name in txt_obj.readlines()] # for line in line_list: # sub_image, sub_label = line.split(':') # org_sub_images.append(os.path.join(current_dir,sub_image)) # org_sub_labels.append(os.path.join(current_dir,sub_label)) # # # merge new sub images, and copy sub labels if necessary. new_sub_images = [] new_sub_labels = [] area_ini_sub_images_labels = io_function.read_dict_from_txt_json( area_ini_sub_images_labels_dict) # copy the original sub images and labels before GAN for key in area_ini_sub_images_labels.keys(): for line in area_ini_sub_images_labels[key]: sub_image, sub_label = line.split(':') new_sub_images.append(sub_image) new_sub_labels.append(sub_label) for area_idx, (area_ini, area_src_ini) in enumerate( zip(multi_gan_regions, multi_gan_source_regions)): area_name = parameters.get_string_parameters(area_ini, 'area_name') area_remark = parameters.get_string_parameters(area_ini, 'area_remark') area_time = parameters.get_string_parameters(area_ini, 'area_time') gan_project_save_dir = get_gan_project_save_dir( gan_working_dir, gan_dir_pre_name, area_name, area_remark, area_time, area_src_ini) org_sub_images = [] org_sub_labels = [] for line in area_ini_sub_images_labels[os.path.basename(area_src_ini)]: sub_image, sub_label = line.split(':') org_sub_images.append(os.path.join(current_dir, sub_image)) org_sub_labels.append(os.path.join(current_dir, sub_label)) # the new images, keep the same order of original images for idx, (org_img, org_label) in enumerate(zip(org_sub_images, org_sub_labels)): new_img = os.path.join(gan_project_save_dir, 'subImages_translate', 'I%d.tif' % idx) if os.path.isfile(new_img) is False: basic.outputlogMessage( 'warning, %d th image does not exist, ' 'may exceed gen_max_dataset_size, skip the following images ' % idx) break # check height, width, band count, datatype height, width, count, dtype = raster_io.get_height_width_bandnum_dtype( new_img) o_height, o_width, o_count, o_dtype = raster_io.get_height_width_bandnum_dtype( org_img) if height != o_height or width != o_width or count != o_count or dtype != o_dtype: raise ValueError( 'inconsistence between new GAN image and original images: %s vs %s' % (str([height, width, count, dtype ]), str([o_height, o_width, o_count, o_dtype]))) # copy subimage and sublabel new_file_name_no_ext = io_function.get_name_no_ext( org_img) + '_' + os.path.basename(gan_project_save_dir) save_img_path = os.path.join(save_image_dir, new_file_name_no_ext + '_gan.tif') save_label_path = os.path.join(save_label_dir, new_file_name_no_ext + '_label.tif') io_function.copy_file_to_dst(new_img, save_img_path, overwrite=False) io_function.copy_file_to_dst(org_label, save_label_path, overwrite=False) new_sub_images.append(save_img_path) new_sub_labels.append(save_label_path) # save new images_labels_list.txt, overwrite the original one with open(sub_img_label_txt, 'w') as f_obj: lines = [ img + ':' + label + '\n' for img, label in zip(new_sub_images, new_sub_labels) ] f_obj.writelines(lines) return True
def image_translate_train_generate_one_domain(gan_working_dir, gan_para_file, area_src_ini, area_gan_ini, gpu_ids, domainB_imgList): current_dir = os.getcwd() # get orignal sub-images _, _, area_ini_sub_images_labels_dict = original_sub_images_labels_list_before_gan( ) sub_img_label_txt = os.path.join(current_dir, area_ini_sub_images_labels_dict) if os.path.isfile(area_ini_sub_images_labels_dict) is False: raise IOError( '%s not in the current folder, please get subImages first' % sub_img_label_txt) # prepare image list of domain A # what if the size of some images are not fit with CUT input? domain_A_images = [] # domain_A_labels = [] # with open(sub_img_label_txt) as txt_obj: # line_list = [name.strip() for name in txt_obj.readlines()] # for line in line_list: # sub_image, sub_label = line.split(':') # domain_A_images.append(os.path.join(current_dir,sub_image)) # # domain_A_labels.append(os.path.join(current_dir,sub_label)) area_ini_sub_images_labels = io_function.read_dict_from_txt_json( 'area_ini_sub_images_labels.txt') for line in area_ini_sub_images_labels[os.path.basename(area_src_ini)]: sub_image, sub_label = line.split(':') domain_A_images.append(os.path.join(current_dir, sub_image)) # domain_A_labels.append(os.path.join(current_dir,sub_label)) os.chdir(gan_working_dir) io_function.save_list_to_txt('image_A_list.txt', domain_A_images) # read target images, that will consider as target domains # what if there are too many images in domain B? io_function.save_list_to_txt('image_B_list.txt', domainB_imgList) gan_python = parameters.get_file_path_parameters(gan_para_file, 'python') cut_dir = parameters.get_directory(gan_para_file, 'gan_script_dir') train_script = os.path.join(cut_dir, 'train.py') generate_script = os.path.join(cut_dir, 'generate_image.py') # training of CUT if train_CUT_gan(gan_python, train_script, gan_para_file, gpu_ids) is False: os.chdir(current_dir) return False # genenerate image using CUT, convert images in domain A to domain B save_tran_img_folder = 'subImages_translate' if generate_image_CUT(gan_python, generate_script, gan_para_file, gpu_ids, domain_A_images, save_tran_img_folder) is False: os.chdir(current_dir) return False # change working directory back os.chdir(current_dir) pass