def update_complete_grid_list(grid_ids, task_list): # based on some criteria, to check if results exist, then update grid_complete_list_txt completed_id_list = [] if os.path.isfile(grid_complete_list_txt): completed_id_list = [ int(item) for item in io_function.read_list_from_txt(grid_complete_list_txt) ] n_task = len(task_list) if n_task < 1: raise ValueError('No task in %s' % str(task_list)) for g_id in grid_ids: if g_id in completed_id_list: continue # check if it has been completed based on multiple criteria complete_count = 0 if 'dem_diff' in task_list and b_exist_gid_dem_diff(g_id): complete_count += 1 if 'hillshade_headwall_line' in task_list and b_exist_dem_hillshade_newest_HWLine_grid( g_id): complete_count += 1 if 'dem_headwall_grid' in task_list and b_exist_grid_headwall_shp( g_id): complete_count += 1 if 'segment' in task_list and b_exist_grid_dem_subsidence(g_id): complete_count += 1 # we may check more task results: segment, dem_headwall if complete_count == n_task: completed_id_list.append(g_id) # save the txt completed_id_list = [str(item) for item in completed_id_list] io_function.save_list_to_txt(grid_complete_list_txt, completed_id_list)
def generate_image_CUT(python_path, generate_script, gan_para_file, gpu_ids, image_list, save_folder): if os.path.isfile('generate.txt_done'): basic.outputlogMessage( 'generate of new images using GAN in %s has completed previoulsy, please remove them if necessary' % os.getcwd()) return True time0 = time.time() generate_tile_width = parameters.get_digit_parameters( gan_para_file, 'generate_tile_width', 'int') generate_tile_height = parameters.get_digit_parameters( gan_para_file, 'generate_tile_height', 'int') generate_overlay_x = parameters.get_digit_parameters( gan_para_file, 'generate_overlay_x', 'int') generate_overlay_y = parameters.get_digit_parameters( gan_para_file, 'generate_overlay_y', 'int') folder = os.path.basename(os.getcwd()) img_list_txt = 'image_to_generate_list.txt' io_function.save_list_to_txt(img_list_txt, image_list) command_string = python_path + ' ' + generate_script \ + ' --dataset_mode '+'satelliteimage' \ + ' --model '+ 'generate' \ + ' --image_A_dir_txt ' + img_list_txt \ + ' --tile_width ' + str(generate_tile_width) \ + ' --tile_height ' + str(generate_tile_height) \ + ' --overlay_x ' + str(generate_overlay_x) \ + ' --overlay_y ' + str(generate_overlay_y) \ + ' --name ' + folder \ + ' --results_dir ' + save_folder \ + ' --gpu_ids ' + ','.join([str(item) for item in gpu_ids]) train_max_dataset_size = parameters.get_digit_parameters_None_if_absence( gan_para_file, 'gen_max_dataset_size', 'int') if train_max_dataset_size is not None: command_string += ' --max_dataset_size ' + str(train_max_dataset_size) # if it's cycleGAN, need to assign A generator gan_model = parameters.get_string_parameters(gan_para_file, 'gan_model') if gan_model == 'cycle_gan': command_string += ' --model_suffix _A ' # from A to B # status, result = basic.exec_command_string(command_string) # this will wait command finished # os.system(command_string + "&") # don't know when it finished res = os.system(command_string) # this work # print('command_string deeplab_inf_script: res',res) if res != 0: sys.exit(1) duration = time.time() - time0 os.system( 'echo "$(date): time cost of generate images using a GAN : %.2f seconds">>"time_cost.txt"' % (duration)) # write a file to indicate that the process has completed. os.system('echo done > generate.txt_done') return True
def split_an_image(para_file, image_path, save_dir, patch_w, patch_h, overlay_x, overlay_y): split_format = parameters.get_string_parameters(para_file, 'split_image_format') out_format = 'PNG' # default is PNG if split_format == '.tif': out_format = 'GTIFF' if split_format == '.jpg': out_format = 'JPEG' if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) split_image.split_image(image_path, save_dir, patch_w, patch_h, overlay_x, overlay_y, out_format, pre_name=None, process_num=8) # get list patch_list = io_function.get_file_list_by_ext(split_format, save_dir, bsub_folder=False) if len(patch_list) < 1: print('Wanring, no images in %s' % save_dir) return None list_txt_path = save_dir + '_list.txt' io_function.save_list_to_txt(list_txt_path, patch_list) return list_txt_path
def main(): ntf_list = io_function.get_file_list_by_ext('.ntf', os.path.join(dir, 'DATA'), bsub_folder=True) io_function.save_list_to_txt('ntf_list.txt', ntf_list) dem_list = io_function.get_file_list_by_ext('.tif', os.path.join(dir, 'PRODUCTS'), bsub_folder=True) dem_list = [ item for item in dem_list if item.endswith('_dem.tif') and 'strips' in item ] io_function.save_list_to_txt('dem_list.txt', dem_list) for idx, ntf in enumerate(ntf_list): print(' (%d/%d) working on ' % (idx + 1, len(ntf_list)), ntf) name = os.path.basename(ntf) scene_id = name.split('_')[2] print('scene_id:', scene_id) dem_path = None for dem_tif in dem_list: if scene_id in os.path.basename(dem_tif): dem_path = dem_tif break if dem_path is None: raise ValueError('Cannot find the corresponding DEM') output = os.path.splitext(name)[0] + '_ortho_sub.tif' ortho_rectified_gdalwarp(ntf, output, dem_path) # break pass
def main(options, args): input = args[0] if input.endswith('.txt'): slope_tifs = io_function.read_list_from_txt(input) elif os.path.isdir(input): slope_tifs = io_function.get_file_list_by_ext('.tif',input, bsub_folder=True) else: slope_tifs = [ input] process_num = options.process_num working_dir = './' save_dir = dem_headwall_shp_dir if os.path.isdir(working_dir) is False: io_function.mkdir(working_dir) if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) failed_tifs = [] min_slope = options.min_slope min_size = options.min_area max_size = options.max_area max_axis_width = options.max_axis_width max_box_WH = options.max_box_WH for idx, slope in enumerate(slope_tifs): if extract_headwall_from_slope(idx, len(slope_tifs), slope,working_dir,save_dir, min_slope,min_size,max_size,max_axis_width,max_box_WH,process_num) is False: failed_tifs.append(slope) io_function.save_list_to_txt('extract_headwall_failed_tifs.txt',failed_tifs)
def mosaic_dem_list_gdal_merge(key, dem_list, save_tif_dir,save_source): # Use gdal_merge.py to create a mosaic, In areas of overlap, the last image will be copied over earlier ones. save_mosaic = os.path.join(save_tif_dir, key + '.tif') b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif') if b_save_mosaic is not False: basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic) return save_mosaic # save the source file for producing the mosaic if save_source: save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt') io_function.save_list_to_txt(save_mosaic_source_txt, dem_list) # if only one dem, then copy it if it's not VRT format if len(dem_list) == 1: if raster_io.get_driver_format(dem_list[0]) != 'VRT': io_function.copy_file_to_dst(dem_list[0], save_mosaic) return save_mosaic nodata = raster_io.get_nodata(dem_list[0]) # create mosaic, can handle only input one file, but is slow result = RSImageProcess.mosaics_images(dem_list,save_mosaic,nodata=nodata, compress='lzw', tiled='yes', bigtiff='if_safer') if result is False: sys.exit(1) # return False return save_mosaic
def merge_grid_ids_txt(task, fail_id_txt_list): id_list = [] for txt in fail_id_txt_list: id_list.extend(io_function.read_list_from_txt(txt)) id_list = list(set(id_list)) # remove redudant ones save_path = '%s_fail_grid_ids.txt' % task io_function.save_list_to_txt(save_path, id_list) return save_path
def save_grid_ids_need_to_process(grid_ids, ignore_ids=None, save_path='grid_ids_to_process.txt'): ''' save a list to txt, contain grid ids need to process, return the number of grids to process''' if ignore_ids is None: id_list = get_complete_ignore_grid_ids() else: id_list = ignore_ids ids_need_to_proc = [str(id) for id in grid_ids if id not in id_list] io_function.save_list_to_txt(save_path, ids_need_to_proc) return len(ids_need_to_proc)
def main(): file_list = io_function.get_file_list_by_pattern(arcticDEM_reg_tif_dir, '*_dem_reg.tif') print('Get %d dem_reg.tif from %s' % (len(file_list), arcticDEM_reg_tif_dir)) year_dates = [ timeTools.get_yeardate_yyyymmdd(os.path.basename(item), pattern='[0-9]{8}_') for item in file_list ] month_list = [item.month for item in year_dates] value_list = month_list # save unique date to txt file dates_unique = set(year_dates) dates_unique = sorted(dates_unique) dates_unique_str = [ timeTools.date2str(item, '%Y-%m-%d') for item in dates_unique ] io_function.save_list_to_txt('dates_unique.txt', dates_unique_str) # plot a histogram # bin_count = 12 bins = np.arange(0, 12, 1) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 8)) n, bins, patches = ax.hist(value_list, bins=bins, alpha=0.75, ec="black", linewidth='1.5', color='grey', hatch='', rwidth=1) # density = True, # label=labels, # ax.legend(prop={'size': 12}) plt.xticks(bins) ax.tick_params(axis='both', which='both', direction='out', length=7, labelsize=20) # ,width=50 #, # if xlabelrotation is not None: # ax.tick_params(axis='x', labelrotation=90) # if ylim is not None: # ax.set_ylim(ylim) plt.gcf().subplots_adjust(bottom=0.15) # plt.grid(True) plt.savefig('ArcticDEM_strip_date_hist.jpg') #
def main(options, args): # process_num = multiprocessing.cpu_count() process_num = options.process_num data_folder = arcticDEM_reg_tif_dir if len(args) > 0: data_folder = args[0] tifs = io_function.get_file_list_by_pattern( data_folder, '*.tif') # _dem_reg check all tifs save_invalid_txt_path = os.path.basename(data_folder) + '_invalid_list.txt' save_good_txt_path = os.path.basename(data_folder) + '_good_list.txt' tif_count = len(tifs) basic.outputlogMessage('get %d tif files in %s' % (tif_count, data_folder)) good_tifs = [] if os.path.isfile(save_good_txt_path): good_tifs.extend(io_function.read_list_from_txt(save_good_txt_path)) invalid_tif = [] # remove good one for the list if len(good_tifs) > 0: tifs = [ item for item in tifs if os.path.basename(item) not in good_tifs ] if process_num == 1: for idx, tif in enumerate(tifs): if check_one_tif(idx, tif_count, tif, good_tifs): good_tifs.append(os.path.basename(tif)) else: invalid_tif.append(os.path.basename(tif)) else: theadPool = Pool(process_num) # multi processes parameters_list = [(idx, tif_count, tif, good_tifs) for idx, tif in enumerate(tifs)] results = theadPool.starmap(check_one_tif, parameters_list) # need python3 for tif, res in zip(tifs, results): if res: good_tifs.append(os.path.basename(tif)) else: invalid_tif.append(os.path.basename(tif)) theadPool.close() io_function.save_list_to_txt(save_invalid_txt_path, invalid_tif) io_function.save_list_to_txt(save_good_txt_path, good_tifs)
def save_selected_girds_and_ids(selected_gird_id_list, select_grid_polys, proj, save_path): # save to shapefile to download and processing # change numpy.uint16 to int, avoid become negative when saving to shapefile selected_gird_id_list = [int(item) for item in selected_gird_id_list] save_pd = pd.DataFrame({ 'grid_id': selected_gird_id_list, 'Polygon': select_grid_polys }) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', proj, save_path) basic.outputlogMessage('saved %d grids to %s' % (len(select_grid_polys), save_path)) # save the ids to txt save_id_txt = os.path.splitext(save_path)[0] + '_grid_ids.txt' selected_grid_ids_str = [str(item) for item in selected_gird_id_list] io_function.save_list_to_txt(save_id_txt, selected_grid_ids_str)
def main(options, args): # process_num = multiprocessing.cpu_count() process_num = options.process_num if len(args) > 0: data_folder = args[0] else: data_folder = grid_dem_headwall_shp_dir basic.outputlogMessage('check shapefiles in %s, with %d processes'%(data_folder,process_num)) vector_files = io_function.get_file_list_by_ext('.shp',data_folder ,bsub_folder=True) save_invalid_txt_path = os.path.basename(data_folder) + '_incomplete_list.txt' save_good_txt_path = os.path.basename(data_folder) + '_good_list.txt' file_count = len(vector_files) basic.outputlogMessage('The number of vector files: %d'%file_count) good_files = [] if os.path.isfile(save_good_txt_path): good_files.extend(io_function.read_list_from_txt(save_good_txt_path)) incomplete_files = [] # remove good one for the list if len(good_files)>0: vector_files = [item for item in vector_files if os.path.basename(item) not in good_files] if process_num == 1: # tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_reg_tif_dir, bsub_folder=False) for idx, tif in enumerate(vector_files): if check_one_vector_file(idx, file_count, tif, good_files): good_files.append(os.path.basename(tif)) else: incomplete_files.append(os.path.basename(tif)) else: theadPool = Pool(process_num) # multi processes parameters_list = [(idx, file_count, tif, good_files) for idx, tif in enumerate(vector_files)] results = theadPool.starmap(check_one_vector_file, parameters_list) # need python3 for tif, res in zip(vector_files, results): if res: good_files.append(os.path.basename(tif)) else: incomplete_files.append(os.path.basename(tif)) theadPool.close() io_function.save_list_to_txt(save_invalid_txt_path, incomplete_files) io_function.save_list_to_txt(save_good_txt_path, good_files)
def main(): basic.setlogfile('log_convert_dem_diff_to8bit.txt') if os.path.isdir(grid_dem_diffs_8bit_dir) is False: io_function.mkdir(grid_dem_diffs_8bit_dir) dem_diff_list = io_function.get_file_list_by_pattern(grid_dem_diffs_dir,'*DEM_diff_grid*.tif') count = len(dem_diff_list) failed_tifs = [] for idx, tif in enumerate(dem_diff_list): print('%d/%d convert %s to 8 bit'%(idx+1, count, tif)) tif_8bit = io_function.get_name_by_adding_tail(tif, '8bit') output = os.path.join(grid_dem_diffs_8bit_dir, os.path.basename(tif_8bit)) if dem_tif_to_8bit(tif,output) is False: failed_tifs.append(tif) if len(failed_tifs)>0: io_function.save_list_to_txt('failed_dem_diff_to8bit.txt',failed_tifs)
def submit_hillshade_newest_headwall_line_grid_job(ids_list, idx, grid_base_name, max_job_count): wait_if_reach_max_jobs(max_job_count, 'dLi') # draw Line on hillshade job_name = 'dLi%d' % idx check_length_jobname(job_name) work_dir = working_dir_string(idx, 'hillshade_newest_headwall_line_', root=root_dir) if os.path.isdir(work_dir) is False: io_function.mkdir(work_dir) os.chdir(work_dir) ids_list = [str(item) for item in ids_list] io_function.save_list_to_txt(grid_base_name + '.txt', ids_list) # prepare job sh_list = [ 'hillshade_headwall_line_grid.sh', 'job_hillshade_headwall_line_grid.sh' ] copy_curc_job_files(jobsh_dir, work_dir, sh_list) slurm_utility.modify_slurm_job_sh( 'job_hillshade_headwall_line_grid.sh', 'job-name', job_name) else: os.chdir(work_dir) submit_job_names = slurm_utility.get_submited_job_names(curc_username) if job_name in submit_job_names: print( 'The folder: %s already exist and the job has been submitted, skip submitting a new job' % work_dir) return # job is completed if os.path.isfile('done.txt'): print('The job in the folder: %s is Done' % work_dir) return # submit the job # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note submit_job_curc_or_run_script_local('job_hillshade_headwall_line_grid.sh', 'hillshade_headwall_line_grid.sh') os.chdir(curr_dir_before_start)
def make_note_all_task_done(extent_shp, reomte_node): if os.path.isdir(grid_ids_txt_dir) is False: io_function.mkdir(grid_ids_txt_dir) shp_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # shp_grid_id_txt should be in the current folder if os.path.isfile(log_grid_ids_txt) is False: io_function.copy_file_to_dst(shp_grid_id_txt, log_grid_ids_txt) if os.path.isfile(log_grid_ids_txt_done) is False: io_function.save_list_to_txt(log_grid_ids_txt_done, ['Done']) # copy the curc r_grid_ids_txt_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/grid_ids_txt' scp_communicate.copy_file_folder_to_remote_machine( reomte_node, r_grid_ids_txt_dir, log_grid_ids_txt_done)
def organize_files(sub_img_dirs, save_dir): if os.path.isfile(save_dir) is False: io_function.mkdir(save_dir) # get all png files png_list = [] for img_dir in sub_img_dirs: pngs = io_function.get_file_list_by_pattern(img_dir, '*.png') png_list.extend(pngs) image_name_list = [] images_dir = os.path.join(save_dir, 'images') imageBound_dir = os.path.join(save_dir, 'imageBound') objectPolygons_dir = os.path.join(save_dir, 'objectPolygons') io_function.mkdir(images_dir) io_function.mkdir(imageBound_dir) io_function.mkdir(objectPolygons_dir) for idx, png in enumerate(png_list): basename = io_function.get_name_no_ext(png) new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename image_name_list.append(new_name) io_function.copy_file_to_dst( png, os.path.join(images_dir, new_name + '.png')) png_xml = png + '.aux.xml' if os.path.isfile(png_xml): io_function.copy_file_to_dst( png_xml, os.path.join(images_dir, new_name + '.png.aux.xml')) bound_path = png.replace('.png', '_bound.geojson') io_function.copy_file_to_dst( bound_path, os.path.join(imageBound_dir, new_name + '_bound.geojson')) digit_str = re.findall('_\d+', basename) id_str = digit_str[0][1:] object_path = os.path.join(os.path.dirname(png), 'id_%s.geojson' % id_str) io_function.copy_file_to_dst( object_path, os.path.join(objectPolygons_dir, new_name + '.geojson')) txt_path = os.path.join(save_dir, 'imageList.txt') io_function.save_list_to_txt(txt_path, image_name_list)
def get_augment_options(): from itertools import combinations # test_id = 0 img_aug_options = [] for count in range(1, 9): comb = combinations([ 'flip', 'blur', 'crop', 'scale', 'rotate', 'bright', 'contrast', 'noise' ], count) for idx, img_aug in enumerate(list(comb)): # spaces are not allow in img_aug_str img_aug_str = ','.join(img_aug) img_aug_options.append(img_aug_str) io_function.save_list_to_txt('img_aug_str.txt', img_aug_options) return img_aug_options
def save_id_grid_no_dem(grid_id): # grid_dem_diff_less2dem_txt if os.path.isdir(process_log_dir) is False: io_function.mkdir(process_log_dir) # update grid_dem_diff_less2dem_txt file id_list = [] if os.path.isfile(grid_no_dem_txt): id_list = io_function.read_list_from_txt( grid_no_dem_txt) # no need covert to int id_str = str(grid_id) if id_str in id_list: return True else: # save id_list.append(str(grid_id)) io_function.save_list_to_txt(grid_no_dem_txt, id_list) basic.outputlogMessage('Save gird id (%d) to %s' % (grid_id, grid_no_dem_txt)) return True
def submit_extract_headwall_job(slope_tifs, idx, max_job_count): wait_if_reach_max_jobs(max_job_count, 'HW') job_name = 'HW%d' % idx check_length_jobname(job_name) work_dir = working_dir_string(idx, 'extract_headwall_', root=root_dir) if os.path.isdir(work_dir) is False: io_function.mkdir(work_dir) os.chdir(work_dir) io_function.save_list_to_txt('slope_tif_list.txt', slope_tifs) # run segmentation sh_list = ['job_healwall.sh', 'extract_headwall_from_slope.sh'] copy_curc_job_files(jobsh_dir, work_dir, sh_list) slurm_utility.modify_slurm_job_sh('job_healwall.sh', 'job-name', job_name) else: os.chdir(work_dir) # job is completed if os.path.isfile('done.txt'): print('The job in the folder: %s is Done' % work_dir) return submit_job_names = slurm_utility.get_submited_job_names(curc_username) if job_name in submit_job_names: print( 'The folder: %s already exist and the job has been submitted, skip submitting a new job' % work_dir) return # submit the job # sometime, when submit a job, end with: singularity: command not found,and exist, wired, then try run submit a job in scomplie note submit_job_curc_or_run_script_local('job_healwall.sh', 'extract_headwall_from_slope.sh') os.chdir(curr_dir_before_start) return
def get_grid_ids_extent(extent_shp): if 'ArcticDEM_grid_20km' in os.path.basename(extent_shp): print( 'input %s like a grid files, read grid polygons and ids from it directly' % extent_shp) grid_polys, grid_ids = vector_gpd.read_polygons_attributes_list( extent_shp, 'grid_id') file_name_base = os.path.splitext(os.path.basename(extent_shp))[0] shp_corresponding_grid_ids_txt = file_name_base + '_grid_ids.txt' io_function.save_list_to_txt(shp_corresponding_grid_ids_txt, [str(item) for item in grid_ids]) else: # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') # in this file, it's "id", not "grid_id" print('time cost of read polygons and attributes', time.time() - time0) grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) return grid_ids
def save_list_no_need_dem_files(file_name, file_list): if len(file_list) < 1: return True # update the file list save_list = [] if os.path.isfile(file_name): save_list = io_function.read_list_from_txt(file_name) for item in file_list: if item in save_list: continue save_list.append(item) return io_function.save_list_to_txt(file_name, save_list)
def sync_log_files(process_node, r_log_dir, process_log_dir): # copy complete id list, dem info, grid_no_dem_ids.txt to remote machine files_to_processNode = [ 'strip_dem_cover_grids.txt', 'tile_dem_cover_grids.txt', 'grid_complete_ids.txt', 'grid_no_dem_ids.txt' ] for file in files_to_processNode: scp_communicate.copy_file_folder_to_remote_machine( process_node, os.path.join(r_log_dir, file), os.path.join(process_log_dir, file)) files_from_processNode = [ 'grid_dem_diff_less2dem_ids.txt', 'grid_no_valid_dem_ids.txt', 'grid_no_headwall_ids.txt', 'grid_no_subscidence_poly_ids.txt', 'grid_no_watermask_ids.txt' ] remote_name = process_node[1:].replace('_host', '') # change $curc_host to curc for file in files_from_processNode: # copy the file, do not overwrite the local file remote_file = os.path.join( process_log_dir, io_function.get_name_by_adding_tail(file, remote_name)) scp_communicate.copy_file_folder_from_remote_machine( process_node, os.path.join(r_log_dir, file), remote_file) # if they are new ids, then merged to "file" local_file = os.path.join(process_log_dir, file) remote_ids = io_function.read_list_from_txt( remote_file) if os.path.isfile(remote_file) else [ ] # no need, to int local_ids = io_function.read_list_from_txt( local_file) if os.path.isfile(local_file) else [] new_ids = [id for id in remote_ids if id not in local_ids] if len(new_ids) < 1: continue else: local_ids.extend(new_ids) io_function.save_list_to_txt(local_file, local_ids)
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt, log_grid_ids_txt): # if it in the logdir, not the current dir, then copy it if os.path.isfile( log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False: io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt, overwrite=False) return True # if not in the local dir, then generate it if os.path.isfile(local_grid_id_txt) is False: # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # this will create local_grid_id_txt grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # modify local_grid_id_txt by excluding grid_id already in adjacent extent other_grid_ids = read_grid_ids_from_other_extent() grid_ids = [id for id in grid_ids if id not in other_grid_ids] # over write local_grid_id_txt file grid_ids_str = [str(item) for item in grid_ids] io_function.copy_file_to_dst(local_grid_id_txt, io_function.get_name_by_adding_tail( local_grid_id_txt, 'noRMadj')) # save a copy io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str) # copy to log dir io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt) return True
def mosaic_dem_list(key, dem_list, save_tif_dir, resample_method, save_source, o_format): save_mosaic = os.path.join(save_tif_dir, key + '.tif') # check file existence # if os.path.isfile(save_mosaic): b_save_mosaic = io_function.is_file_exist_subfolder( save_tif_dir, key + '.tif') if b_save_mosaic is not False: basic.outputlogMessage('warning, mosaic file: %s exist, skip' % b_save_mosaic) return b_save_mosaic # mosaic_list.append(b_save_mosaic) # continue # save the source file for producing the mosaic if save_source: save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt') io_function.save_list_to_txt(save_mosaic_source_txt, dem_list) # if len(demTif_groups[key]) == 1: # io_function.copy_file_to_dst(demTif_groups[key][0],save_mosaic) # else: # # RSImageProcess.mosaics_images(dem_groups[key],save_mosaic) # RSImageProcess.mosaic_crop_images_gdalwarp(demTif_groups[key],save_mosaic,resampling_method=resample_method,o_format=o_format) # create mosaic, can handle only input one file result = RSImageProcess.mosaic_crop_images_gdalwarp( dem_list, save_mosaic, resampling_method=resample_method, o_format=o_format, compress='lzw', tiled='yes', bigtiff='if_safer') if result is False: return False return save_mosaic
def mosaic_dem_list(key, dem_list, save_tif_dir,resample_method,save_source, o_format, thread_num=1): # print('\n\n os.fork \n\n', os.fork()) # if os.fork()==0: # proc_id = multiprocessing.current_process().pid # basic.setlogfile('log_file_pid_%d.txt'%proc_id) save_mosaic = os.path.join(save_tif_dir, key + '.tif') # check file existence # if os.path.isfile(save_mosaic): b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif') if b_save_mosaic is not False: basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic) return save_mosaic # mosaic_list.append(b_save_mosaic) # continue # save the source file for producing the mosaic if save_source: save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt') io_function.save_list_to_txt(save_mosaic_source_txt, dem_list) # if only one dem, then copy it if it's not VRT format if len(dem_list) == 1: if raster_io.get_driver_format(dem_list[0]) != 'VRT': io_function.copy_file_to_dst(dem_list[0], save_mosaic) return save_mosaic # create mosaic, can handle only input one file, but is slow result = RSImageProcess.mosaic_crop_images_gdalwarp(dem_list, save_mosaic, resampling_method=resample_method, o_format=o_format, compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=thread_num) if result is False: sys.exit(1) # return False return save_mosaic
def main(): # get shapefile list headwall_shp_list = io_function.get_file_list_by_ext('.shp', dem_headwall_shp_dir, bsub_folder=False) if len(headwall_shp_list) < 1: raise ValueError('NO shapefile in %s' % dem_headwall_shp_dir) failed_shp = [] out_dir = dem_hillshade_subImages_headwall if len(sys.argv) == 2: # change the output dir out_dir = sys.argv[1] for idx, shp in enumerate(headwall_shp_list): print('(%d/%d) extract sub images for %s' % (idx, len(headwall_shp_list), shp)) if set_image_dir_patter_description(dem_hillshade_dir, shp) is False: continue save_dir = os.path.join(out_dir, os.path.splitext(os.path.basename(shp))[0]) if os.path.isdir(save_dir): print('Warning, skip due to subImages for %s may exist' % shp) continue io_function.mkdir(save_dir) res = os.system(extract_py + ' -p para_file_subImage.ini -o %s ' % save_dir + shp) if res != 0: failed_shp.append(shp) if len(failed_shp) > 0: io_function.save_list_to_txt('failed_shp.txt', failed_shp)
def produce_products_dem_subsidence(b_remove_job_folder=True): # run segment jobs in local workstations. task = 'segment' max_list_count = 20 if 'donostia' in machine_name: max_list_count = 8 # donostia is really slow, assigined less task to it job_list_pre = 'job_seg_dem_diff_list_' if os.path.isdir(dem_common.process_log_dir) is False: io_function.mkdir(dem_common.process_log_dir) dem_list_txt = os.path.join(dem_common.process_log_dir, job_list_pre + machine_name + '.txt') # when submit segment of dem_diff, no need ext_shp ext_shp = "monitor_fail_segment_jobs" while True: dem_diff_list = get_dem_diff_list_to_seg() # only handle file are old enough dem_diff_list = get_dem_diff_old_enough(dem_diff_list) dem_diff_ids = [get_grid_id_from_path(item) for item in dem_diff_list] print('dem_diff_ids') print(dem_diff_ids) # remove dem_diff already assigined for other machine if os.path.isfile(dem_list_txt): io_function.delete_file_or_dir(dem_list_txt) dem_diff_assigned = read_dem_diff_assigned_to_other_machine( job_list_pre) assigned_ids = [ get_grid_id_from_path(item) for item in dem_diff_assigned ] print('assigned_ids') print(assigned_ids) keep_idx = [ idx for idx, id in enumerate(dem_diff_ids) if id not in assigned_ids ] dem_diff_list = [dem_diff_list[item] for item in keep_idx] if len(dem_diff_list) < 1: print( datetime.now(), 'there is no DEM_diff for %s to seg, wait 10 minutes' % machine_name) time.sleep(600) # wait 10 min continue # save some of them to txt, for "parallel_processing_curc.py" dem_diff_list = dem_diff_list[:max_list_count] save_ids = [get_grid_id_from_path(item) for item in dem_diff_list] print('save_ids') print(save_ids) io_function.save_list_to_txt(dem_list_txt, dem_diff_list) res = os.system('./run.sh %s %s' % (ext_shp, task)) if res != 0: sys.exit(1) copy_segment_result_to_curc(save_ids) if b_remove_job_folder: os.system('rm -r seg_dem_diff_*') io_function.delete_file_or_dir(dem_list_txt)
def segment_a_grey_image(img_path, save_dir, process_num, org_raster=None, b_save_patch_label=False): out_pre = os.path.splitext(os.path.basename(img_path))[0] label_path = os.path.join(save_dir, out_pre + '_label.tif') if os.path.isfile(label_path): basic.outputlogMessage('%s exist, skip segmentation' % label_path) return label_path height, width, band_num, date_type = raster_io.get_height_width_bandnum_dtype( img_path) print('input image: height, width, band_num, date_type', height, width, band_num, date_type) # if the original data is available, then calculate the attributes based on that if org_raster is not None: org_height, org_width, org_band_num, org_date_type = raster_io.get_height_width_bandnum_dtype( org_raster) if org_height != height or org_width != width: raise ValueError('%s and %s do not have the same size' % (img_path, org_raster)) save_labes = np.zeros((height, width), dtype=np.int32) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) # for idx, patch in enumerate(image_patches): # out_patch,out_labels = segment_a_patch(idx, patch, patch_count,img_path) # # copy to the entire image # row_s = patch[1] # row_e = patch[1] + patch[3] # col_s = patch[0] # col_e = patch[0] + patch[2] # save_labes[row_s:row_e, col_s:col_e] = out_labels theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, img_path, org_raster, b_save_patch_label) for idx, patch in enumerate(image_patches)] results = theadPool.starmap(segment_a_patch, parameters_list) patch_label_path_list = [] patch_label_id_range = [] object_attributes = {} # object id (label) and attributes (list) for res in results: patch, out_labels, nodata, attributes = res if isinstance( out_labels, str) and os.path.isfile(out_labels): #if it's a label file patch_label_path_list.append(out_labels) else: # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] current_min = np.max(save_labes) print('current_max', current_min) patch_label_id_range.append(current_min) save_labes[row_s:row_e, col_s:col_e] = out_labels + current_min + 1 if attributes is not None: update_label_attr = {} for key in attributes: update_label_attr[key + current_min] = attributes[key] # add to the attributes object_attributes.update(update_label_attr) # # apply median filter (remove some noise), # we should not use median filter, because it's labels, not images. # label_blurs = cv2.medianBlur(np.float32(save_labes), 3) # with kernal=3, cannot accept int32 # # print(label_blurs, label_blurs.dtype) # save_labes = label_blurs.astype(np.int32) # return a list of labels saved in current working folder. if b_save_patch_label: return patch_label_path_list if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) # save attributes (if not empty) if object_attributes: attribute_path = os.path.join(save_dir, out_pre + '_attributes.txt') io_function.save_dict_to_txt_json(attribute_path, object_attributes) # save the label raster_io.save_numpy_array_to_rasterfile(save_labes, label_path, img_path) # do not set nodata # save id ranges to txt label_id_range_txt = os.path.splitext(label_path)[0] + '_IDrange.txt' patch_label_id_range = [str(item) for item in patch_label_id_range] io_function.save_list_to_txt(label_id_range_txt, patch_label_id_range) return label_path
def download_dem_tarball(dem_index_shp, extent_polys, save_folder, pre_name, reg_tif_dir=None, poly_ids=None, b_arcticDEM_tile=False): # read dem polygons and url dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'fileurl', b_fix_invalid_polygon=False) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), dem_index_shp)) dem_tar_ball_list = [] reg_tifs_list = [] curr_dir = os.getcwd() b_save_grid_id_noDEM = True if poly_ids is None: poly_ids = [idx for idx in range(len(extent_polys))] b_save_grid_id_noDEM = False # if poly_ids is not the global unique id, then don't save it. if os.path.isfile('no_registration_strips.txt'): no_registration_strips = io_function.read_list_from_txt( 'no_registration_strips.txt') else: no_registration_strips = [] # tarballs is being downloaded downloading_tarballs = [] for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx if os.path.isfile(save_txt_path): urls = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem urls from %s' % (len(urls), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) urls = [dem_urls[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, urls) basic.outputlogMessage('save dem urls to %s' % save_txt_path) if len(urls) > 0: # total_size_GB = get_total_size(urls) # internet access, parallel running may cause problem. The info is not important # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1))) # time.sleep(5) # wait 5 seconds # download them using wget one by one for ii, url in enumerate(urls): tmp = urlparse(url) # in the Strip DEM, there are around 700 url are point to tif files, failed to download them # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif if url.startswith('/mnt') and url.endswith('.tif'): basic.outputlogMessage("error: not a valid url: %s" % url) continue filename = os.path.basename(tmp.path) save_dem_path = os.path.join(save_folder, filename) if reg_tif_dir is not None: tar_base = os.path.basename(filename)[:-7] # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version if b_arcticDEM_tile: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*reg_dem.tif') else: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*dem_reg.tif') if len(reg_tifs) > 0: basic.outputlogMessage( 'warning, unpack and registrated tif for %s already exists, skip downloading' % filename) reg_tifs_list.append(reg_tifs[0]) continue if './' + tar_base in no_registration_strips: basic.outputlogMessage( 'warning, %s is in no_registration_strips list, skip downloading' % filename) continue if filename in downloading_tarballs: basic.outputlogMessage( 'warning, %s is being downloaded by other processes' % filename) continue if os.path.isfile( save_dem_path) and os.path.getsize(save_dem_path) > 1: basic.outputlogMessage( 'warning, %s already exists, skip downloading' % filename) else: # download the dem basic.outputlogMessage( 'starting downloading %d th DEM (%d in total)' % ((ii + 1), len(urls))) downloading_tarballs.append(filename) # os.chdir(save_folder) # run_a_process_download(url) # download ################################################## # download in parallel basic.check_exitcode_of_process( download_tasks ) # if there is one former job failed, then quit while True: job_count = basic.alive_process_count(download_tasks) if job_count >= max_task_count: print( machine_name, datetime.now(), 'You are running %d or more tasks in parallel, wait ' % max_task_count) time.sleep(60) # continue break # start the processing sub_process = Process( target=run_a_process_download, args=(url, save_dem_path, reg_tif_dir, max_task_count, b_unpack_after_downloading )) # start a process, don't wait sub_process.start() download_tasks.append(sub_process) basic.close_remove_completed_process(download_tasks) # os.chdir(curr_dir) dem_tar_ball_list.append(save_dem_path) else: basic.outputlogMessage( 'Warning, can not find DEMs within %d th extent' % (idx)) if b_save_grid_id_noDEM: save_id_grid_no_dem(idx) # wait until all task complete while True: job_count = basic.alive_process_count(download_tasks) if job_count > 0: print( machine_name, datetime.now(), 'wait until all task are completed, alive task account: %d ' % job_count) time.sleep(60) # else: break return dem_tar_ball_list, reg_tifs_list
def get_grid_20(extent_shp_or_id_txt, grid_polys, ids): ''' get grid polygons and ids based on input extent (polygon in shpaefile) or ids (txt file) if "file_name_base+'_grid_ids.txt'" exists, it will read id in this file directly. :param extent_shp_or_id_txt: :param grid_polys: :param ids: :return: ''' io_function.is_file_exist(extent_shp_or_id_txt) if extent_shp_or_id_txt.endswith('.txt'): grid_ids = io_function.read_list_from_txt(extent_shp_or_id_txt) grid_ids = [int(item) for item in grid_ids ] else: shp_corresponding_grid_ids_txt = get_corresponding_grid_ids_txt(extent_shp_or_id_txt) if os.path.isfile(shp_corresponding_grid_ids_txt): print('corresponding grid ids txt file for %s exists, read grid id from txt'%extent_shp_or_id_txt) grid_ids = [ int(item) for item in io_function.read_list_from_txt(shp_corresponding_grid_ids_txt)] basic.outputlogMessage('read %d grids within the extents (%s)' % (len(grid_ids), os.path.basename(extent_shp_or_id_txt))) else: # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp_or_id_txt) if extent_shp_prj == '': raise ValueError('get proj4 of %s failed'%extent_shp_or_id_txt) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) if grid_shp_prj=='': raise ValueError('get proj4 of %s failed' % grid_20_shp) if extent_shp_prj != grid_shp_prj: basic.outputlogMessage('%s and %s do not have the same projection, will reproject %s' % (extent_shp_or_id_txt, grid_20_shp, os.path.basename(extent_shp_or_id_txt))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp_or_id_txt, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_id_txt) ext_poly_count = len(extent_polys) if ext_poly_count < 1: raise ValueError('No polygons in %s'%extent_shp_or_id_txt) grid_index = [] # if there are many polygons, this will take time. for idx,ext_poly in enumerate(extent_polys): print(timeTools.get_now_time_str(), 'get grids for extent idx', idx, 'total polygons:',ext_poly_count) index = vector_gpd.get_poly_index_within_extent(grid_polys, ext_poly) grid_index.extend(index) grid_index = list(set(grid_index)) # remove duplicated ids basic.outputlogMessage('find %d grids within the extents (%s)' % (len(grid_index), os.path.basename(extent_shp_or_id_txt)) ) grid_ids = [ ids[idx] for idx in grid_index] grid_ids_str = [str(item) for item in grid_ids ] io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,grid_ids_str) id_index = [ids.index(id) for id in grid_ids] selected_grid_polys = [grid_polys[idx] for idx in id_index ] return selected_grid_polys, grid_ids