def predict_one_image_yolo(para_file, image_path, img_save_dir, inf_list_file, gpuid,trained_model): config_file = parameters.get_string_parameters(para_file, 'network_setting_ini') # 'yolov4_obj.cfg' yolo_data = os.path.join('data','obj.data') # b_python_api = False inf_batch_size = parameters.get_digit_parameters(para_file,'inf_batch_size','int') b_python_api = parameters.get_bool_parameters(para_file,'b_inf_use_python_api') done_indicator = '%s_done'%inf_list_file if os.path.isfile(done_indicator): basic.outputlogMessage('warning, %s exist, skip prediction'%done_indicator) return # use a specific GPU for prediction, only inference one image time0 = time.time() if gpuid is not None: os.environ['CUDA_VISIBLE_DEVICES'] = str(gpuid) predict_remoteSensing_image(para_file,image_path, img_save_dir,trained_model, config_file, yolo_data, batch_size=inf_batch_size, b_python_api=b_python_api) duration = time.time() - time0 os.system('echo "$(date): time cost of inference for image in %s: %.2f seconds">>"time_cost.txt"' % (inf_list_file, duration)) # write a file to indicate that the prediction has done. os.system('echo %s > %s_done'%(inf_list_file,inf_list_file)) return
def open(self, imgpath): """ open image file Args: imgpath: the path of image file Returns:True if succeessful, False otherwise """ try: self.imgpath = imgpath self.ds = gdal.Open(imgpath) except RuntimeError as e: basic.outputlogMessage('Unable to open: ' + self.imgpath) basic.outputlogMessage(str(e)) return False prj = self.ds.GetProjection() self.spatialrs = osr.SpatialReference(wkt=prj) basefilename = os.path.basename(self.imgpath).split('.')[0] self.geotransform = self.ds.GetGeoTransform() return True
def get_file_list_by_ext(ext, folder, bsub_folder): """ Args: ext: extension name of files want to find, can be string for a single extension or list for multi extension eg. '.tif' or ['.tif','.TIF'] folder: This is the directory, which needs to be explored. bsub_folder: True for searching sub folder, False for searching current folder only Returns: a list with the files abspath ,eg. ['/user/data/1.tif','/user/data/2.tif'] Notes: if input error, it will exit the program """ extension = [] if isinstance(ext, str): extension.append(ext) elif isinstance(ext, list): extension = ext else: basic.outputlogMessage('input extension type is not correct') assert False if os.path.isdir(folder) is False: basic.outputlogMessage('input error, %s is not a directory' % folder) assert False if isinstance(bsub_folder, bool) is False: basic.outputlogMessage('input error, bsub_folder must be a bool value') assert False # sys.exit(1) files = [] sub_folders = [] sub_folders.append(folder) while len(sub_folders) > 0: current_sear_dir = sub_folders[0] file_names = os.listdir(current_sear_dir) file_names = [ os.path.join(current_sear_dir, item) for item in file_names ] for str_file in file_names: if os.path.isdir(str_file): sub_folders.append(str_file) continue ext_name = os.path.splitext(str_file)[1] for temp in extension: if ext_name == temp: # files.append(os.path.abspath(os.path.join(current_sear_dir,str_file))) files.append(str_file) break if bsub_folder is False: break sub_folders.pop(0) return files
def add_polygon_attributes(input, output, para_file, data_para_file): if io_function.is_file_exist(input) is False: return False # copy output if io_function.copy_shape_file(input, output) is False: raise IOError('copy shape file %s failed'%input) # remove narrow parts of mapped polygons polygon_narrow_part_thr = parameters.get_digit_parameters_None_if_absence(para_file, 'mapped_polygon_narrow_threshold', 'float') # if it is not None, then it will try to remove narrow parts of polygons if polygon_narrow_part_thr is not None and polygon_narrow_part_thr > 0: # use the buffer operation to remove narrow parts of polygons basic.outputlogMessage("start removing narrow parts (thr %.2f) in polygons"%(polygon_narrow_part_thr*2)) if vector_gpd.remove_narrow_parts_of_polygons_shp_NOmultiPolygon(input, output, polygon_narrow_part_thr): message = "Finished removing narrow parts (thr %.2f) in polygons and save to %s"%(polygon_narrow_part_thr*2,output) basic.outputlogMessage(message) else: pass else: basic.outputlogMessage("warning, mapped_polygon_narrow_threshold is not in the parameter file, skip removing narrow parts") # calculate area, perimeter of polygons if cal_add_area_length_of_polygon(output) is False: return False # calculate the polygon information b_calculate_shape_info = parameters.get_bool_parameters_None_if_absence(para_file,'b_calculate_shape_info') if b_calculate_shape_info: # remove "_shapeInfo.shp" to make it calculate shape information again os.system('rm *_shapeInfo.shp') if calculate_gully_information(output) is False: return False # add topography of each polygons dem_files, slope_files, aspect_files, dem_diff_files = get_topographic_files(data_para_file) if calculate_polygon_topography(output,para_file,dem_files,slope_files,aspect_files=aspect_files,dem_diffs=dem_diff_files) is False: basic.outputlogMessage('Warning: calculate information of topography failed') # return False # don't return return True
def Getband(self, bandindex): if not self.ds: basic.outputlogMessage('Please Open the file first') return False bandindex = int(bandindex) try: srcband = self.ds.GetRasterBand(bandindex) except RuntimeError as e: # for example, try GetRasterBand(10) basic.outputlogMessage('Band ( %i ) not found' % bandindex) basic.outputlogMessage(str(e)) return False return srcband
def SetBandNoDataValue(self, bandindex, nodatavalue): if not self.ds is None: try: self.ds.GetRasterBand(bandindex).SetNoDataValue(nodatavalue) except RuntimeError as e: basic.outputlogMessage( 'Unable set no data value for band %d data: ' % bandindex) basic.outputlogMessage(str(e)) return False return True else: basic.outputlogMessage('Please Create file first') return False
def ReadbandData(self, bandindex, xoff, yoff, width, height, gdalDatatype): if not self.ds is None: try: banddata = self.ds.GetRasterBand(bandindex).ReadRaster( xoff, yoff, width, height, width, height, gdalDatatype) except RuntimeError as e: basic.outputlogMessage('Unable band %d data: ' % bandindex) basic.outputlogMessage(str(e)) return False return banddata else: basic.outputlogMessage('Please Open file first') return False
def calculate_mean_of_images(images_list): if len(images_list) < 1: basic.outputlogMessage('No image in the list') return False # get band number img_obj = RSImageclass() first_img = get_first_path_in_line(images_list[0]) if first_img is False: return False img_obj.open(first_img) band_count = img_obj.GetBandCount() img_obj = None mean_of_images = [] # each band has one value for i in range(0, band_count): mean_of_images.append(0.0) total_pixel = 0 number = 0 for image in images_list: (width, height, mean_of_band) = cal_the_mean_of_bands(image) pixel_count = width * height number = number + 1 print('%d / %d' % (number, len(images_list))) if width is False: return False if len(mean_of_band) != band_count: basic.outputlogMessage('error, band count is different') return False for i in range(0, band_count): mean_of_images[i] = (mean_of_images[i] * total_pixel + mean_of_band[i] * pixel_count) total_pixel = total_pixel + width * height for i in range(0, band_count): mean_of_images[i] = mean_of_images[i] / total_pixel for i in range(0, band_count): basic.outputlogMessage('band {}: mean {}'.format( i + 1, mean_of_images[i])) f_obj = open('mean_value.txt', 'w') f_obj.writelines('total image count {} \n'.format(len(images_list))) for i in range(0, len(mean_of_images)): f_obj.writelines('band {} \n'.format(i + 1)) for i in range(0, len(mean_of_images)): f_obj.writelines('mean_value: {} \n'.format(mean_of_images[i])) f_obj.close() return mean_of_images
def read_training_pixels_inside_polygons(self, img_path, shp_path): ''' read pixels on a image in the extent of polygons :param img_path: the path of an image :param shp_path: the path of shape file :return: ''' if io_function.is_file_exist( img_path) is False or io_function.is_file_exist( shp_path) is False: return False no_data = 255 # consider changing to other values touch = False # we only read the pixels inside the polygons, so set all_touched as False sub_images, class_labels = build_RS_data.read_pixels_inside_polygons( img_path, shp_path, mask_no_data=no_data, touch=touch) # read them one by one Xs, ys = [], [] for idx, (img_data, label) in enumerate(zip(sub_images, class_labels)): # img: 3d array (nband, height, width) # label: int values # print(img_data) # print(label) X_arr = img_data.reshape(img_data.shape[0], -1) # remove non-data pixels valid_pixles = np.any(X_arr != no_data, axis=0) X_arr = X_arr[:, valid_pixles] valid_pixel_count = int(X_arr.size / img_data.shape[0]) # print('pixel count',valid_pixel_count) if valid_pixel_count < 1: basic.outputlogMessage( 'Warning, No valid pixel in %d th polygon due to its small size' % valid_pixel_count) continue y_arr = np.ones(X_arr.shape[1]) * label Xs.append(X_arr) ys.append(y_arr) X_pixels = np.concatenate(Xs, axis=1) y_pixels = np.concatenate(ys, axis=0) X_pixels = np.transpose(X_pixels, (1, 0)) basic.outputlogMessage(str(X_pixels.shape)) basic.outputlogMessage(str(y_pixels.shape)) return X_pixels, y_pixels
def find_polygon_intersec_polygons(shp_path): basic.outputlogMessage('Checking duplicated polygons in %s'%shp_path) polygons = read_polygons_gpd(shp_path) count = len(polygons) for idx, poly in enumerate(polygons): for kk in range(idx+1,count): inter = poly.intersection(polygons[kk]) if inter.is_empty is False: basic.outputlogMessage('warning, %d th polygon has intersection with %d th polygon'%(idx+1, kk+1)) # break basic.outputlogMessage('finished checking of polygons intersect other polygons')
def remove_narrow_parts_of_polygons_shp(input_shp, out_shp, rm_narrow_thr): # read polygons as shapely objects shapefile = gpd.read_file(input_shp) attribute_names = None new_polygon_list = [] polygon_attributes_list = [] # 2d list for idx, row in shapefile.iterrows(): if idx == 0: attribute_names = row.keys().to_list( )[:-1] # the last one is 'geometry' print('removing narrow parts of %dth polygon (total: %d)' % (idx + 1, len(shapefile.geometry.values))) shapely_polygon = row['geometry'] out_polygon = remove_narrow_parts_of_a_polygon(shapely_polygon, rm_narrow_thr) # if out_polygon.is_empty is True: # print(idx, out_polygon) if out_polygon.is_empty is True: basic.outputlogMessage( 'Warning, remove %dth (0 index) polygon in %s because it is empty after removing narrow parts' % (idx, os.path.basename(input_shp))) # continue, don't save # shapefile.drop(idx, inplace=True), else: new_polygon_list.append(out_polygon) attributes = [row[key] for key in attribute_names] polygon_attributes_list.append( attributes) # last one is 'geometry' # copy attributes save_polyons_attributes = {} for idx, attribute in enumerate(attribute_names): # print(idx, attribute) values = [item[idx] for item in polygon_attributes_list] save_polyons_attributes[attribute] = values save_polyons_attributes["Polygons"] = new_polygon_list polygon_df = pd.DataFrame(save_polyons_attributes) basic.outputlogMessage( 'After removing the narrow parts, obtaining %d polygons' % len(new_polygon_list)) print(out_shp, isinstance(out_shp, list)) basic.outputlogMessage('will be saved to %s' % out_shp) wkt_string = map_projection.get_raster_or_vector_srs_info_wkt(input_shp) return save_polygons_to_files(polygon_df, 'Polygons', wkt_string, out_shp)
def WritebandData(self, bandindex, xoff, yoff, width, height, banddata, gdalDatatype): if not self.ds is None: try: self.ds.GetRasterBand(bandindex).WriteRaster( xoff, yoff, width, height, banddata, width, height, gdalDatatype) except RuntimeError as e: basic.outputlogMessage('Unable write band %d data: ' % bandindex) basic.outputlogMessage(str(e)) return False return True else: basic.outputlogMessage('Please Create file first') return False
def co_registration_one_dem(ref_dem, dem_tif, save_dir): if check_coreg_results(dem_tif, save_dir): basic.outputlogMessage('co-registeration results for %s exists, skip' % dem_tif) return 0 align_outputs = check_align_folder(dem_tif) if len(align_outputs) >= 9: basic.outputlogMessage('%s has been co-registered, skip' % dem_tif) else: commond_str = dem_dem_align + ' ' + ref_dem + ' ' + dem_tif basic.outputlogMessage(commond_str) res = os.system(commond_str) if res != 0: sys.exit(1) return move_align_results(ref_dem, dem_tif, save_dir)
def dem_to_slope(input, output, slope_file_bak): if os.path.isfile(output): basic.outputlogMessage('%s exists, skip' % output) return output if os.path.isfile(slope_file_bak): basic.outputlogMessage('%s exists, skip' % slope_file_bak) return slope_file_bak if os.path.isfile(input) is False: basic.outputlogMessage('Waring, %s does not exist' % input) return False # # use the default setting in QGIS command_str = 'gdaldem slope %s %s -of GTiff -co compress=lzw -co tiled=yes -co bigtiff=if_safer -b 1 -s 1.0' % ( input, output) basic.os_system_exit_code(command_str) return output
def remove_polygons_based_shapeinfo(in_shp, output_shp,area_limit,circularit_limit, holes_count ): # remove polygons if they are larege (> area_limit) and narrow (<circularit_limit) # if too many holse, may consider remove them as well. # remove based on: INarea, INperimete,circularit,ratio_w_h, hole_count if os.path.isfile(output_shp): basic.outputlogMessage('%s exists, skip'%output_shp) return output_shp polygons = vector_gpd.read_polygons_gpd(in_shp) # add some shape info shape_info_list = [ vector_gpd.calculate_polygon_shape_info(item) for item in polygons] shapeinfo_all_dict = vector_gpd.list_to_dict(shape_info_list) vector_gpd.add_attributes_to_shp(in_shp,shapeinfo_all_dict) shapefile = gpd.read_file(in_shp) # remove relative large but narrow ones. remove_count = 0 for idx, row in shapefile.iterrows(): shape_info = shape_info_list[idx] # remove quite large but narrow ones if shape_info['INarea'] > area_limit and shape_info['circularit'] < circularit_limit : shapefile.drop(idx, inplace=True) remove_count += 1 continue # remove holes if shape_info['hole_count'] > holes_count: shapefile.drop(idx, inplace=True) remove_count += 1 continue basic.outputlogMessage('remove %d polygons based on shapeinfo, remain %d ones saving to %s' % (remove_count, len(shapefile.geometry.values), output_shp)) if len(shapefile.geometry.values) < 1: basic.outputlogMessage('After removing based on shapeinfo, no polygon to save') return None # save results shapefile.to_file(output_shp, driver='ESRI Shapefile') return output_shp
def draw_one_attribute_histogram(shp_file,field_name,attribute, output,color='grey',hatch=""): """ draw the figure of one attribute's histograms Args: shp_file: shape file path attribute_name: name of attribute output: output the figure Returns: True if successful, False otherwise """ values = read_attribute(shp_file,field_name) if field_name == 'INarea': # m^2 to km^2 values = [item/1000000.0 for item in values] fig_obj = plt.figure() # create a new figure ax = Subplot(fig_obj, 111) fig_obj.add_subplot(ax) # n, bins, patches = plt.hist(values, bins="auto", alpha=0.75,ec="black") # ec means edge color n, bins, patches = ax.hist(values, bins="auto", alpha=0.75, ec="black",linewidth='1.5',color=color,hatch=hatch) # print(n,bins,patches) # n_label = [str(i) for i in n] # plt.hist(values, bins="auto", alpha=0.75, ec="black",label=n_label) # plt.gcf().subplots_adjust(bottom=0.15) # reserve space for label # plt.xlabel(attribute,fontsize=15) # # plt.ylabel("Frequency") # plt.ylabel("Number",fontsize=15) # # plt.title('Histogram of '+attribute) # plt.text(60, .025, r'$\mu=100,\ \sigma=15$') # plt.axis([40, 160, 0, 0.03]) # hide the right and top boxed axis ax.axis["right"].set_visible(False) ax.axis["top"].set_visible(False) # plt.grid(True) plt.savefig(output) basic.outputlogMessage("Output figures to %s"%os.path.abspath(output)) basic.outputlogMessage("ncount: " + str(n)) basic.outputlogMessage("bins: "+ str(bins))
def check_one_vector_file(idx,total,file_path, good_file_list): if os.path.basename(file_path) in good_file_list: return True try: print('checking %d/%d' % (idx, total)) # src = raster_io.open_raster_read(tif_path) geometries = vector_gpd.read_polygons_gpd(file_path,b_fix_invalid_polygon=False) if len(geometries) < 1: basic.outputlogMessage('No geometries in %s'%file_path) return False geometries_list = [ item for item in geometries ] if None in geometries_list: basic.outputlogMessage('None geometries in %s'%file_path) return False except: basic.outputlogMessage('incomplete vector: %s' % file_path) return False return True
def check_input_image_and_label(image_path, label_path): """ check the input image and label, they should have same width, height, and projection :param image_path: the path of image :param label_path: the path of label :return: (width, height) of image if successful, Otherwise (None, None). """ img_obj = RSImageclass() if img_obj.open(image_path) is False: assert False label_obj = RSImageclass() if label_obj.open(label_path) is False: assert False # check width and height width = img_obj.GetWidth() height = img_obj.GetHeight() if width != label_obj.GetWidth() or height != label_obj.GetHeight(): basic.outputlogMessage( "Error, not the same width and height of image (%s) and label (%s)" % (image_path, label_path)) assert False # check resolution a = img_obj.GetXresolution() #test b = label_obj.GetXresolution() c = img_obj.GetYresolution() d = label_obj.GetYresolution() if img_obj.GetXresolution() != label_obj.GetXresolution( ) or img_obj.GetYresolution() != label_obj.GetYresolution(): basic.outputlogMessage( "warning, not the same resolution of image (%s) and label (%s)" % (image_path, label_path)) #assert False # check projection if img_obj.GetProjection() != label_obj.GetProjection(): basic.outputlogMessage( "warning, not the same projection of image (%s) and label (%s)" % (image_path, label_path)) # assert False return (width, height)
def get_one_sub_image_label_parallel(idx, c_polygon, bufferSize, pre_name, pre_name_for_label, c_class_int, saved_dir, image_tile_list, img_tile_boxes, dstnodata, brectangle, b_label, polygons_all, class_labels_all): # output message basic.outputlogMessage( 'obtaining %dth sub-image and the corresponding label raster' % idx) ## get an image and the corresponding label raster (has errors) ## image_array, label_array = get_one_sub_image_label(idx,c_polygon, class_labels[idx], polygons_all, class_labels_all, bufferSize, img_tile_boxes,image_tile_list) sub_image_label_str = None # get buffer area expansion_polygon = c_polygon.buffer(bufferSize) if b_label: tail_name = '_%d_class_%d.tif' % (idx, c_class_int) else: tail_name = '_%d.tif' % (idx) # get one sub-image based on the buffer areas subimg_shortName = os.path.join('subImages', pre_name + tail_name) subimg_saved_path = os.path.join(saved_dir, subimg_shortName) if get_sub_image(idx, expansion_polygon, image_tile_list, img_tile_boxes, subimg_saved_path, dstnodata, brectangle) is False: basic.outputlogMessage('Warning, skip the %dth polygon' % idx) return None # based on the sub-image, create the corresponding vectors sublabel_shortName = os.path.join('subLabels', pre_name_for_label + tail_name) sublabel_saved_path = os.path.join(saved_dir, sublabel_shortName) if b_label: if get_sub_label(idx, subimg_saved_path, c_polygon, c_class_int, polygons_all, class_labels_all, bufferSize, brectangle, sublabel_saved_path) is False: basic.outputlogMessage( 'Warning, get the label raster for %dth polygon failed' % idx) return None sub_image_label_str = subimg_shortName + ":" + sublabel_shortName + '\n' return sub_image_label_str
def main(options, args): input_shp = args[0] output_raster = args[1] if io_function.is_file_exist(input_shp) is False: return False all_class_raster = io_function.get_name_by_adding_tail( output_raster, 'AllClass') num_class = parameters.get_digit_parameters(options.para_file, 'NUM_CLASSES_noBG', None, 'int') if convert_training_examples_from_shp_to_raster(input_shp, all_class_raster) is False: basic.outputlogMessage( "Producing the label images from training polygons is Falild") return False else: basic.outputlogMessage( "Done: Producing the label images from training polygons, output: %s" % all_class_raster) if num_class == 1: #only keep target (gully or others) label one_class_raster = io_function.get_name_by_adding_tail( output_raster, 'oneClass') if only_keep_one_class( all_class_raster, one_class_raster, class_index=1) is False: return False else: one_class_raster = all_class_raster # crop the label image to have the same 2D dimension with the training images baseimage = parameters.get_input_image_path() if RSImageProcess.subset_image_baseimage(output_raster, one_class_raster, baseimage) is False: basic.outputlogMessage("Error: subset_image_baseimage Failed") return False return True
def main(options, args): polygons_shp = args[0] field_name = options.field_name # add information from a raster file raster_file = options.raster_file if raster_file is not None: if check_same_projection(polygons_shp, raster_file) is False: raise ValueError('%s and %s don\'t have the same projection' % (polygons_shp, raster_file)) buffer_meters = options.buffer_meters if buffer_meters is None: add_raster_info_insidePolygons(polygons_shp, raster_file, field_name) basic.outputlogMessage( 'add %s information (inside polygons) to %s' % (field_name, polygons_shp)) else: add_raster_info_from_bufferArea(polygons_shp, raster_file, field_name, buffer_meters) basic.outputlogMessage( 'add %s information (in surrounding buffer area) to %s' % (field_name, polygons_shp)) # add IoU values validation_shp = options.val_polygon if validation_shp is not None: if check_same_projection(polygons_shp, validation_shp) is False: raise ValueError('%s and %s don\'t have the same projection') if add_IoU_values(polygons_shp, validation_shp, field_name): basic.outputlogMessage('add %s to %s' % (field_name, polygons_shp)) if field_name == "adj_count": buffer_meters = options.buffer_meters add_adjacent_polygon_count(polygons_shp, buffer_meters, field_name) if field_name == 'circularity': add_polygon_circularity_info(polygons_shp)
def draw_one_list_histogram(value_list,output,bins=None,labels=None,color=None,hatch="",xlabelrotation=None,ylim=None): fig, ax = plt.subplots(nrows=1, ncols=1,figsize=(8,8)) n, bins, patches = ax.hist(value_list,bins=bins, alpha=0.75, ec="black",linewidth='1.5', color=color,hatch=hatch,label=labels,rwidth=1) #density = True, # ax.legend(prop={'size': 12}) plt.xticks(bins) ax.tick_params(axis='both',which='both',direction='out',length=7,labelsize=20) #,width=50 #, if xlabelrotation is not None: ax.tick_params(axis='x', labelrotation=90) if ylim is not None: ax.set_ylim(ylim) plt.gcf().subplots_adjust(bottom=0.15) # plt.grid(True) plt.savefig(output) # basic.outputlogMessage("Output figures to %s"%os.path.abspath(output)) basic.outputlogMessage("ncount: " + str(n)) basic.outputlogMessage("bins: "+ str(bins)) histogram2logfile(value_list, bins)
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name, para_file, network_setting_ini, gpu_num): ''' in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse. so in this function, we start two process, one for training, another for evaluation (run on CPU) ''' # prepare training folder EXP_FOLDER = expr_name INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models') TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train') EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval') VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis') EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export') io_function.mkdir(INIT_FOLDER) io_function.mkdir(TRAIN_LOGDIR) io_function.mkdir(EVAL_LOGDIR) io_function.mkdir(VIS_LOGDIR) io_function.mkdir(EXPORT_DIR) # prepare the tensorflow check point (pretrained model) for training pre_trained_dir = parameters.get_directory_None_if_absence( network_setting_ini, 'pre_trained_model_folder') pre_trained_tar = parameters.get_string_parameters(network_setting_ini, 'TF_INIT_CKPT') pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar) if os.path.isfile(pre_trained_path) is False: print('pre-trained model: %s not exist, try to download' % pre_trained_path) # try to download the file pre_trained_url = parameters.get_string_parameters_None_if_absence( network_setting_ini, 'pre_trained_model_url') res = os.system('wget %s ' % pre_trained_url) if res != 0: sys.exit(1) io_function.movefiletodir(pre_trained_tar, pre_trained_dir) # unpack pre-trained model to INIT_FOLDER os.chdir(INIT_FOLDER) res = os.system('tar -xf %s' % pre_trained_path) if res != 0: raise IOError('failed to unpack %s' % pre_trained_path) os.chdir(WORK_DIR) dataset_dir = os.path.join(WORK_DIR, 'tfrecord') batch_size = parameters.get_digit_parameters(network_setting_ini, 'batch_size', 'int') # maximum iteration number iteration_num = parameters.get_digit_parameters(network_setting_ini, 'iteration_num', 'int') base_learning_rate = parameters.get_digit_parameters( network_setting_ini, 'base_learning_rate', 'float') train_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_output_stride', 'int') train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates1', 'int') train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates2', 'int') train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates3', 'int') inf_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_output_stride', 'int') inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates1', 'int') inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates2', 'int') inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates3', 'int') # depth_multiplier default is 1.0. depth_multiplier = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'depth_multiplier', 'float') decoder_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'decoder_output_stride', 'int') aspp_convs_filters = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'aspp_convs_filters', 'int') train_script = os.path.join(deeplab_dir, 'train.py') train_split = os.path.splitext( parameters.get_string_parameters(para_file, 'training_sample_list_txt'))[0] model_variant = parameters.get_string_parameters(network_setting_ini, 'model_variant') checkpoint = parameters.get_string_parameters(network_setting_ini, 'tf_initial_checkpoint') init_checkpoint_files = io_function.get_file_list_by_pattern( INIT_FOLDER, checkpoint + '*') if len(init_checkpoint_files) < 1: raise IOError('No initial checkpoint in %s with pattern: %s' % (INIT_FOLDER, checkpoint)) init_checkpoint = os.path.join(INIT_FOLDER, checkpoint) b_early_stopping = parameters.get_bool_parameters(para_file, 'b_early_stopping') b_initialize_last_layer = parameters.get_bool_parameters( para_file, 'b_initialize_last_layer') dataset = parameters.get_string_parameters(para_file, 'dataset_name') num_classes_noBG = parameters.get_digit_parameters_None_if_absence( para_file, 'NUM_CLASSES_noBG', 'int') assert num_classes_noBG != None if b_initialize_last_layer is True: if pre_trained_tar in pre_trained_tar_21_classes: print( 'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21' % pre_trained_tar) num_classes_noBG = 20 if pre_trained_tar in pre_trained_tar_19_classes: print( 'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19' % pre_trained_tar) num_classes_noBG = 18 num_of_classes = num_classes_noBG + 1 image_crop_size = parameters.get_string_list_parameters( para_file, 'image_crop_size') if len(image_crop_size) != 2 and image_crop_size[0].isdigit( ) and image_crop_size[1].isdigit(): raise ValueError('image_crop_size should be height,width') crop_size_str = ','.join(image_crop_size) # validation interval (epoch), do # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int') train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file) iter_per_epoch = math.ceil(train_count / batch_size) total_epoches = math.ceil(iteration_num / iter_per_epoch) already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) if already_trained_iteration >= iteration_num: basic.outputlogMessage('Training already run %d iterations, skip' % already_trained_iteration) return True save_interval_secs = 1200 # default is 1200 second for saving model save_summaries_secs = 600 # default is 600 second for saving summaries eval_interval_secs = save_interval_secs # default is 300 second for running evaluation, if no new saved model, no need to run evaluation? train_process = Process( target=train_deeplab, args=(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, iteration_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer)) train_process.start() time.sleep(60) # wait if train_process.exitcode is not None and train_process.exitcode != 0: sys.exit(1) # eval_process.start() # time.sleep(10) # wait # if eval_process.exitcode is not None and eval_process.exitcode != 0: # sys.exit(1) while True: # only run evaluation when there is new trained model already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) basic.outputlogMessage( 'Already trained iteration: %d, latest evaluation at %d step' % (already_trained_iteration, miou_dict['step'][-1])) if already_trained_iteration > miou_dict['step'][-1]: # run evaluation and wait until it finished gpuid = "" # set gpuid to empty string, making evaluation run on CPU evl_script = os.path.join(deeplab_dir, 'eval.py') evl_split = os.path.splitext( parameters.get_string_parameters( para_file, 'validation_sample_list_txt'))[0] # max_eva_number = -1 # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds) max_eva_number = 1 # only run once inside the while loop, use while loop to control multiple evaluation eval_process = Process( target=evaluation_deeplab, args=(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters, gpuid, eval_interval_secs)) eval_process.start( ) # put Process inside while loop to avoid error: AssertionError: cannot start a process twice while eval_process.is_alive(): time.sleep(5) # check if need early stopping if b_early_stopping: print(datetime.now(), 'check early stopping') miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) if 'overall' in miou_dict.keys() and len( miou_dict['overall']) >= 5: # if the last five miou did not improve, then stop training if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005 ): # 0.0001 (%0.01) # 0.5 % basic.outputlogMessage( 'early stopping: stop training because overall miou did not improved in the last five evaluation' ) output_early_stopping_message(TRAIN_LOGDIR) # train_process.kill() # this one seems not working # subprocess pid different from ps output # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output # os.system('kill ' + str(train_process.pid)) # still not working. train_process.pid is not the one output by ps -aux # train_process.terminate() # Note that descendant processes of the process will not be terminated # train_process.join() # Wait until child process terminates with open('train_py_pid.txt', 'r') as f_obj: lines = f_obj.readlines() train_pid = int(lines[0].strip()) os.system('kill ' + str(train_pid)) basic.outputlogMessage( 'kill training processing with id: %d' % train_pid) break # this breaks the while loop, making that it may not evaluate on some new saved model. # if the evaluation step is less than saved model iteration, run another iteration again immediately already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) if already_trained_iteration > miou_dict['step'][-1]: continue # if finished training if train_process.is_alive() is False: break # # if eval_process exit, then quit training as well # if eval_process.is_alive() is False and train_process.is_alive(): # train_process.kill() # break time.sleep(eval_interval_secs) # wait for next evaluation # save loss value to disk get_loss_learning_rate_list(TRAIN_LOGDIR) # get miou again miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) # eval_process did not exit as expected, kill it again. # os.system('kill ' + str(eval_process.pid)) # get iou and backup iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt') loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt') patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt') # backup miou and training_loss & learning rate test_id = os.path.basename(WORK_DIR) + '_' + expr_name backup_dir = os.path.join(WORK_DIR, 'result_backup') if os.path.isdir(backup_dir) is False: io_function.mkdir(backup_dir) new_iou_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(iou_path)) io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True) loss_new_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(loss_path)) io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True) new_patch_info = os.path.join(backup_dir, test_id + '_' + os.path.basename(patch_info)) io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True) # plot mIOU, loss, and learnint rate curves, and backup miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main( iou_path, train_count=train_count, val_count=val_count, batch_size=batch_size) loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main( loss_path, train_count=train_count, val_count=val_count, batch_size=batch_size) miou_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(miou_curve_path)) io_function.copy_file_to_dst(miou_curve_path, miou_curve_bakname, overwrite=True) loss_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(loss_curve_path)) io_function.copy_file_to_dst(loss_curve_path, loss_curve_bakname, overwrite=True)
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file, network_setting_ini, gpu_num): # prepare training folder EXP_FOLDER = expr_name INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models') TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train') EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval') VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis') EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export') io_function.mkdir(INIT_FOLDER) io_function.mkdir(TRAIN_LOGDIR) io_function.mkdir(EVAL_LOGDIR) io_function.mkdir(VIS_LOGDIR) io_function.mkdir(EXPORT_DIR) # prepare the tensorflow check point (pretrained model) for training pre_trained_dir = parameters.get_directory_None_if_absence( network_setting_ini, 'pre_trained_model_folder') pre_trained_tar = parameters.get_string_parameters(network_setting_ini, 'TF_INIT_CKPT') pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar) if os.path.isfile(pre_trained_path) is False: print('pre-trained model: %s not exist, try to download' % pre_trained_path) # try to download the file pre_trained_url = parameters.get_string_parameters_None_if_absence( network_setting_ini, 'pre_trained_model_url') res = os.system('wget %s ' % pre_trained_url) if res != 0: sys.exit(1) io_function.movefiletodir(pre_trained_tar, pre_trained_dir) # unpack pre-trained model to INIT_FOLDER os.chdir(INIT_FOLDER) res = os.system('tar -xf %s' % pre_trained_path) if res != 0: raise IOError('failed to unpack %s' % pre_trained_path) os.chdir(WORK_DIR) dataset_dir = os.path.join(WORK_DIR, 'tfrecord') batch_size = parameters.get_digit_parameters(network_setting_ini, 'batch_size', 'int') # maximum iteration number iteration_num = parameters.get_digit_parameters(network_setting_ini, 'iteration_num', 'int') base_learning_rate = parameters.get_digit_parameters( network_setting_ini, 'base_learning_rate', 'float') train_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_output_stride', 'int') train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates1', 'int') train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates2', 'int') train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'train_atrous_rates3', 'int') inf_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_output_stride', 'int') inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates1', 'int') inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates2', 'int') inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'inf_atrous_rates3', 'int') # depth_multiplier default is 1.0. depth_multiplier = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'depth_multiplier', 'float') decoder_output_stride = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'decoder_output_stride', 'int') aspp_convs_filters = parameters.get_digit_parameters_None_if_absence( network_setting_ini, 'aspp_convs_filters', 'int') train_script = os.path.join(deeplab_dir, 'train.py') train_split = os.path.splitext( parameters.get_string_parameters(para_file, 'training_sample_list_txt'))[0] model_variant = parameters.get_string_parameters(network_setting_ini, 'model_variant') checkpoint = parameters.get_string_parameters(network_setting_ini, 'tf_initial_checkpoint') init_checkpoint_files = io_function.get_file_list_by_pattern( INIT_FOLDER, checkpoint + '*') if len(init_checkpoint_files) < 1: raise IOError('No initial checkpoint in %s with pattern: %s' % (INIT_FOLDER, checkpoint)) init_checkpoint = os.path.join(INIT_FOLDER, checkpoint) b_early_stopping = parameters.get_bool_parameters(para_file, 'b_early_stopping') b_initialize_last_layer = parameters.get_bool_parameters( para_file, 'b_initialize_last_layer') dataset = parameters.get_string_parameters(para_file, 'dataset_name') num_classes_noBG = parameters.get_digit_parameters_None_if_absence( para_file, 'NUM_CLASSES_noBG', 'int') assert num_classes_noBG != None if b_initialize_last_layer is True: if pre_trained_tar in pre_trained_tar_21_classes: print( 'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21' % pre_trained_tar) num_classes_noBG = 20 if pre_trained_tar in pre_trained_tar_19_classes: print( 'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19' % pre_trained_tar) num_classes_noBG = 18 num_of_classes = num_classes_noBG + 1 image_crop_size = parameters.get_string_list_parameters( para_file, 'image_crop_size') if len(image_crop_size) != 2 and image_crop_size[0].isdigit( ) and image_crop_size[1].isdigit(): raise ValueError('image_crop_size should be height,width') crop_size_str = ','.join(image_crop_size) evl_script = os.path.join(deeplab_dir, 'eval.py') evl_split = os.path.splitext( parameters.get_string_parameters(para_file, 'validation_sample_list_txt'))[0] max_eva_number = 1 # validation interval (epoch) validation_interval = parameters.get_digit_parameters_None_if_absence( para_file, 'validation_interval', 'int') train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file) iter_per_epoch = math.ceil(train_count / batch_size) total_epoches = math.ceil(iteration_num / iter_per_epoch) already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR) if already_trained_iteration >= iteration_num: basic.outputlogMessage('Training already run %d iterations, skip' % already_trained_iteration) return True if validation_interval is None: basic.outputlogMessage( 'No input validation_interval, so training to %d, then evaluating in the end' % iteration_num) # run training train_deeplab(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, iteration_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer) # run evaluation evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters) miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) get_loss_learning_rate_list(TRAIN_LOGDIR) else: basic.outputlogMessage( 'training to the maximum iteration of %d, and evaluating very %d epoch(es)' % (iteration_num, validation_interval)) for epoch in range(validation_interval, total_epoches + validation_interval, validation_interval): to_iter_num = min(epoch * iter_per_epoch, iteration_num) if to_iter_num <= already_trained_iteration: continue basic.outputlogMessage( 'training and evaluating to %d epoches (to iteration: %d)' % (epoch, to_iter_num)) # run training train_deeplab(train_script, dataset, train_split, num_of_classes, base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2, train_atrous_rates3, train_output_stride, crop_size_str, batch_size, to_iter_num, depth_multiplier, decoder_output_stride, aspp_convs_filters, b_initialize_last_layer) # run evaluation evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes, model_variant, inf_atrous_rates1, inf_atrous_rates2, inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number, depth_multiplier, decoder_output_stride, aspp_convs_filters) # get miou miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes) # save loss value to disk get_loss_learning_rate_list(TRAIN_LOGDIR) # check if need to early stopping if b_early_stopping: if len(miou_dict['overall']) >= 5: # if the last five miou did not improve, then stop training if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005 ): # 0.0001 (%0.01) # 0.5 % basic.outputlogMessage( 'early stopping: stop training because overall miou did not improved in the last five evaluation' ) output_early_stopping_message(TRAIN_LOGDIR) break # plot mIOU, loss, and learnint rate curves iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt') loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt') miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main( iou_path, train_count=train_count, val_count=val_count, batch_size=batch_size) loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main( loss_path, train_count=train_count, val_count=val_count, batch_size=batch_size) # backup miou and training_loss & learning rate test_id = os.path.basename(WORK_DIR) + '_' + expr_name backup_dir = os.path.join(WORK_DIR, 'result_backup') if os.path.isdir(backup_dir) is False: io_function.mkdir(backup_dir) new_iou_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(iou_path)) io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True) miou_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(miou_curve_path)) io_function.copy_file_to_dst(miou_curve_path, miou_curve_bakname, overwrite=True) loss_new_name = os.path.join(backup_dir, test_id + '_' + os.path.basename(loss_path)) io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True) loss_curve_bakname = os.path.join( backup_dir, test_id + '_' + os.path.basename(loss_curve_path)) io_function.copy_file_to_dst(loss_curve_path, loss_curve_bakname, overwrite=True)
import os,sys HOME = os.path.expanduser('~') codes_path = HOME +'/codes/PycharmProjects/DeeplabforRS' sys.path.insert(0, codes_path) import parameters import basic_src.basic as basic para_file = "para.ini" # patchSize = 65 patchSize = 455 # while(patchSize<460): while(patchSize >= 65): basic.outputlogMessage("Test on Patch size (train): train_patch_width=%d, train_patch_height=%d"%(patchSize,patchSize)) # change the para.ini file # parameters.write_Parameters_file(para_file,"patch_width",patchSize) # parameters.write_Parameters_file(para_file, "patch_height", patchSize) parameters.write_Parameters_file(para_file,"train_patch_width",patchSize) parameters.write_Parameters_file(para_file, "train_patch_height", patchSize) #running the model if os.path.isfile("./whole_process.sh") is False: basic.outputlogMessage("Please copy whole_process.sh file first") sys.exit(1) os.system("./whole_process.sh")
def main(options, args): polygons_shp = args[0] polygons_shp_backup = args[0] output = options.output if output is None: output = io_function.get_name_by_adding_tail(polygons_shp, 'removed') para_file = options.para_file assert io_function.is_file_exist(polygons_shp) # remove polygons based on area # area_thr = 1000 #10 pixels area_thr = parameters.get_digit_parameters_None_if_absence( para_file, 'minimum_area', 'int') b_smaller = True if area_thr is not None: rm_area_save_shp = io_function.get_name_by_adding_tail( polygons_shp_backup, 'rmArea') if remove_polygons(polygons_shp, 'INarea', area_thr, b_smaller, rm_area_save_shp) is False: basic.outputlogMessage( "error, removing polygons based on size failed") else: polygons_shp = rm_area_save_shp else: basic.outputlogMessage( 'warning, minimum_area is absent in the para file, skip removing polygons based on areas' ) # remove polygons based on slope information # slope_small_thr = 2 slope_small_thr = parameters.get_digit_parameters_None_if_absence( para_file, 'minimum_slope', 'float') b_smaller = True if slope_small_thr is not None: rm_slope_save_shp1 = io_function.get_name_by_adding_tail( polygons_shp_backup, 'rmslope1') if remove_polygons(polygons_shp, 'slo_mean', slope_small_thr, b_smaller, rm_slope_save_shp1) is False: basic.outputlogMessage( "error, removing polygons based on slo_mean failed") else: polygons_shp = rm_slope_save_shp1 else: basic.outputlogMessage( 'warning, minimum_slope is absent in the para file, skip removing polygons based on minimum slope' ) # slope_large_thr = 20 slope_large_thr = parameters.get_digit_parameters_None_if_absence( para_file, 'maximum_slope', 'float') b_smaller = False if slope_large_thr is not None: rm_slope_save_shp2 = io_function.get_name_by_adding_tail( polygons_shp_backup, 'rmslope2') if remove_polygons(polygons_shp, 'slo_mean', slope_large_thr, b_smaller, rm_slope_save_shp2) is False: basic.outputlogMessage( "error, removing polygons based on slo_mean (2) failed") else: polygons_shp = rm_slope_save_shp2 else: basic.outputlogMessage( 'warning, maximum_slope is absent in the para file, skip removing polygons based on maximum slope' ) # remove polygons based on dem # dem_small_thr = 3000 dem_small_thr = parameters.get_digit_parameters_None_if_absence( para_file, 'minimum_elevation', 'int') b_smaller = True if dem_small_thr is not None: rm_dem_save_shp = io_function.get_name_by_adding_tail( polygons_shp_backup, 'rmDEM') if remove_polygons(polygons_shp, 'dem_mean', dem_small_thr, b_smaller, rm_dem_save_shp) is False: basic.outputlogMessage( "error, removing polygons based on dem_mean failed") else: polygons_shp = rm_dem_save_shp else: basic.outputlogMessage( 'warning, minimum_elevation is absent in the para file, skip removing polygons based on minimum elevation' ) # remove polygons not in the extent outline_shp = parameters.get_string_parameters_None_if_absence( para_file, 'target_outline_shp') if outline_shp is not None: rm_outline_save_shp = io_function.get_name_by_adding_tail( polygons_shp_backup, 'rmOutline') remove_polygons_outside_extent(polygons_shp, outline_shp, rm_outline_save_shp) polygons_shp = rm_outline_save_shp else: basic.outputlogMessage( 'warning, target_outline_shp is absent in the para file, skip removing polygons based on outlines' ) # copy to final output copy_shape_file(polygons_shp, output) pass
def build_adjacent_map_of_polygons(polygons_list, process_num=1): """ build an adjacent matrix of the tou :param polygons_list: a list contains all the shapely (not pyshp) polygons :return: a matrix storing the adjacent (shared points) for all polygons """ # another implement is in the vector_features.py, # here, we implement the calculation parallel to improve the efficiency. # the input polgyons are all valid. polygon_count = len(polygons_list) if polygon_count < 2: basic.outputlogMessage('error, the count of polygon is less than 2') return False # # https://shapely.readthedocs.io/en/stable/manual.html#str-packed-r-tree # tree = STRtree(polygons_list) polygon_boxes = [get_polygon_bounding_box(item) for item in polygons_list] # this would take a lot of memory if they are many polyton, such as more than 10 000 ad_matrix = np.zeros((polygon_count, polygon_count), dtype=np.int8) if process_num == 1: for i in range(0, polygon_count): t0 = time.time() # if i%100 == 0: # start_idx = i+1 # check_polygons = [polygons_list[j] for j in range(start_idx, polygon_count)] # tree = STRtree(check_polygons) start_idx = i + 1 check_polygons = [ polygons_list[j] for j in range(i + 1, polygon_count) if is_two_bound_disjoint( polygon_boxes[i], polygon_boxes[j]) is False ] adj_polygons, adj_poly_idxs = find_adjacent_polygons( polygons_list[i], check_polygons) # find index from the entire polygon list # adj_polygons, adj_poly_idxs = find_adjacent_polygons(polygons_list[i], polygons_list, Rtree=tree) # adj_polygons, adj_poly_idxs = find_adjacent_polygons(polygons_list[i], check_polygons, Rtree=tree) # find adjacent from entire list using tree, but slower # adjacent_polygons = [item for item in tree.query(polygons_list[i]) if item.intersection(polygons_list[i])] # adjacent_poly_idx = [polygons_list.index(item) for item in adjacent_polygons] # remove itself # adjacent_poly_idx.remove(i) # for idx in adjacent_poly_idx: # ad_matrix[i, idx] = 1 # ad_matrix[idx, i] = 1 # also need the low part of matrix, or later polygon can not find previous neighbours # print(datetime.now(), '%d/%d'%(i, polygon_count),'cost', time.time() - t0) for idx in adj_poly_idxs: j = start_idx + idx # j = idx # if j==i: # continue ad_matrix[i, j] = 1 ad_matrix[ j, i] = 1 # also need the low part of matrix, or later polygon can not find previous neighbours elif process_num > 1: theadPool = Pool(process_num) parameters_list = [(i, polygons_list, polygon_boxes, i + 1, polygon_count) for i in range(0, polygon_count)] results = theadPool.starmap(find_adjacent_polygons_from_sub, parameters_list) print(datetime.now(), 'finish parallel runing') for i, adj_polygons, adj_poly_idxs in results: # print(adj_poly_idxs) for idx in adj_poly_idxs: j = i + 1 + idx # j = idx # if j==i: # continue # print(i, j) ad_matrix[i, j] = 1 ad_matrix[ j, i] = 1 # also need the low part of matrix, or later polygon can not find previous neighbours else: raise ValueError('wrong process_num: %d' % process_num) # print(ad_matrix) return ad_matrix
def get_sub_images_and_labels(t_polygons_shp, t_polygons_shp_all, bufferSize, image_tile_list, saved_dir, pre_name, dstnodata, brectangle = True): ''' get sub images (and labels ) from training polygons :param t_polygons_shp: training polygon :param t_polygons_shp_all: the full set of training polygon, t_polygons_shp is a subset or equal to this one. :param bufferSize: buffer size of a center polygon to create a sub images :param image_tile_list: image tiles :param saved_dir: output dir :param dstnodata: nodata when save for the output images :param brectangle: True: get the rectangle extent of a images. :return: ''' # read polygons t_shapefile = gpd.read_file(t_polygons_shp) class_labels = t_shapefile['class_int'].tolist() center_polygons = t_shapefile.geometry.values # check_polygons_invalidity(center_polygons,t_polygons_shp) # read the full set of training polygons, used this one to produce the label images t_shapefile_all = gpd.read_file(t_polygons_shp_all) class_labels_all = t_shapefile_all['class_int'].tolist() polygons_all = t_shapefile_all.geometry.values # check_polygons_invalidity(polygons_all,t_polygons_shp_all) img_tile_boxes = get_image_tile_bound_boxes(image_tile_list) pre_name_for_label = os.path.splitext(os.path.basename(t_polygons_shp))[0] list_txt_obj = open('sub_images_labels_list.txt','a') # go through each polygon for idx, (c_polygon, c_class_int) in enumerate(zip(center_polygons,class_labels)): # output message basic.outputlogMessage('obtaining %dth sub-image and the corresponding label raster'%idx) ## get an image and the corresponding label raster (has errors) ## image_array, label_array = get_one_sub_image_label(idx,c_polygon, class_labels[idx], polygons_all, class_labels_all, bufferSize, img_tile_boxes,image_tile_list) # get buffer area expansion_polygon = c_polygon.buffer(bufferSize) # get one sub-image based on the buffer areas subimg_shortName = os.path.join('subImages' , pre_name+'_%d_class_%d.tif'%(idx,c_class_int)) subimg_saved_path = os.path.join(saved_dir, subimg_shortName) if get_sub_image(idx,expansion_polygon,image_tile_list,img_tile_boxes, subimg_saved_path, dstnodata, brectangle) is False: basic.outputlogMessage('Warning, skip the %dth polygon'%idx) continue # based on the sub-image, create the corresponding vectors sublabel_shortName = os.path.join('subLabels', pre_name_for_label + '_%d_class_%d.tif' % (idx, c_class_int)) sublabel_saved_path = os.path.join(saved_dir, sublabel_shortName) if get_sub_label(idx,subimg_saved_path, c_polygon, c_class_int, polygons_all, class_labels_all, bufferSize, brectangle, sublabel_saved_path) is False: basic.outputlogMessage('Warning, get the label raster for %dth polygon failed' % idx) continue list_txt_obj.writelines(subimg_shortName + ":"+sublabel_shortName+'\n') pass list_txt_obj.close() test = 1 #extract the geometry in GeoJSON format if t_polygons_shp_all != t_polygons_shp: # find the training polygons in the full set pass # find the data in the shape pass
def get_one_sub_image_label(idx,center_polygon, class_int, polygons_all,class_int_all, bufferSize, img_tile_boxes,image_tile_list): ''' get an sub image and the corresponding labe raster :param idx: the polygon index :param center_polygon: the polygon in training polygon :param class_int: the class number of this polygon :param polygons_all: the full set of training polygons, for generating label images :param class_int_all: the class number for the full set of training polygons :param bufferSize: the buffer area to generate sub-images :param img_tile_boxes: the bound boxes of all the image tiles :param image_tile_list: the list of image paths :return: ''' ############# This function is not working ############# # center_polygon corresponds to one polygon in the full set of training polygons, so it is not necessary to check # get adjacent polygon adj_polygons, adj_polygons_class = get_adjacent_polygons(center_polygon, polygons_all, class_int_all, bufferSize) # add the center polygons to adj_polygons adj_polygons.extend([center_polygon]) adj_polygons_class.extend([class_int]) basic.outputlogMessage('get a sub image covering %d training polygons'%len(adj_polygons)) # find the images which the center polygon overlap (one or two images) img_index = get_overlap_image_index(adj_polygons, img_tile_boxes) if len(img_index) < 1: basic.outputlogMessage('Warning, %dth polygon and the adjacent ones do not overlap any image tile, please check ' '(1) the shape file and raster have the same projection' 'and (2) this polygon is in the extent of images'%idx) image_list = [image_tile_list[item] for item in img_index] # open the raster to get projection, resolution # with rasterio.open(image_list[0]) as src: # resX = src.res[0] # resY = src.res[1] # src_profile = src.profile src = rasterio.open(image_list[0]) resX = src.res[0] resY = src.res[1] src_profile = src.profile # rasterize the shapes burn_shapes = [(item_shape, item_class_int) for (item_shape, item_class_int) in zip(adj_polygons,adj_polygons_class)] burn_boxes = get_bounds_of_polygons(adj_polygons) # check weather the extent is too large burn_boxes_width = math.ceil((burn_boxes[2]- burn_boxes[0])/resX) burn_boxes_height = math.ceil((burn_boxes[3] - burn_boxes[1])/resY) if burn_boxes_width*burn_boxes_height > 10000*10000: raise ValueError('error, the polygons want to burn cover a very large area') # fill as 255 for region outsize shapes for test purpose # set all_touched as True, may good small shape # new_transform = (burn_boxes[0], resX, 0, burn_boxes[3], 0, -resY ) # (X_min, resX, 0, Y_max, 0, -resY) # GDAL-style transforms, have been deprecated after raster 1.0 # affine.Affine() vs. GDAL-style geotransforms: https://rasterio.readthedocs.io/en/stable/topics/migrating-to-v1.html new_transform = (resX ,0, burn_boxes[0] , 0, -resY, burn_boxes[3]) # (resX, 0, X_min, 0, -resY, Y_max) out_label = rasterize(burn_shapes, out_shape=(burn_boxes_width,burn_boxes_height), transform=new_transform, fill=0, all_touched=False, dtype=rasterio.uint8) print('new_transform', new_transform) print('out_label', out_label.shape) # test, save to disk kwargs = src.meta kwargs.update( dtype=rasterio.uint8, count=1, width=burn_boxes_width, height = burn_boxes_height, transform=new_transform) with rasterio.open('test_6_albers.tif', 'w', **kwargs) as dst: dst.write_band(1, out_label.astype(rasterio.uint8)) # mask, get pixels cover by polygons, set all_touched as True polygons_json = [mapping(item) for item in adj_polygons] out_image, out_transform = mask(src, polygons_json, nodata=0, all_touched=True, crop=True) #test: output infomation print('out_transform', out_transform) print('out_image',out_image.shape) # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) save_path = "masked_of_polygon_%d.tif"%(idx+1) with rasterio.open(save_path, "w", **out_meta) as dest: dest.write(out_image) # return image_array, label_array return 1, 1
def get_sub_image(idx,selected_polygon, image_tile_list, image_tile_bounds, save_path, dstnodata, brectangle ): ''' get a mask image based on a selected polygon, it may cross two image tiles :param selected_polygon: selected polygons :param image_tile_list: image list :param image_tile_bounds: the boxes of images in the list :param save_path: save path :param brectangle: if brectangle is True, crop the raster using bounds, else, use the polygon :return: True is successful, False otherwise ''' # find the images which the center polygon overlap (one or two images) img_index = get_overlap_image_index([selected_polygon], image_tile_bounds) if len(img_index) < 1: basic.outputlogMessage( 'Warning, %dth polygon do not overlap any image tile, please check ' #and its buffer area '(1) the shape file and raster have the same projection' 'and (2) this polygon is in the extent of images' % idx) return False image_list = [image_tile_list[item] for item in img_index] # check it cross two or more images if len(image_list) == 1: # for the case that the polygon only overlap one raster with rasterio.open(image_list[0]) as src: polygon_json = mapping(selected_polygon) # not necessary # overlap_win = rasterio.features.geometry_window(src, [polygon_json], pad_x=0, pad_y=0, north_up=True, rotated=False, # pixel_precision=3) if brectangle: # polygon_box = selected_polygon.bounds polygon_json = mapping(selected_polygon.envelope) #shapely.geometry.Polygon([polygon_box]) # crop image and saved to disk out_image, out_transform = mask(src, [polygon_json], nodata=dstnodata, all_touched=True, crop=True) # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) with rasterio.open(save_path, "w", **out_meta) as dest: dest.write(out_image) pass else: # for the case it overlap more than one raster, need to produce a mosaic tmp_saved_files = [] for k_img,image_path in enumerate(image_list): with rasterio.open(image_path) as src: polygon_json = mapping(selected_polygon) if brectangle: # polygon_box = selected_polygon.bounds polygon_json = mapping(selected_polygon.envelope) # shapely.geometry.Polygon([polygon_box]) # crop image and saved to disk out_image, out_transform = mask(src, [polygon_json], nodata=dstnodata, all_touched=True, crop=True) tmp_saved = os.path.splitext(save_path)[0] +'_%d'%k_img + os.path.splitext(save_path)[1] # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) with rasterio.open(tmp_saved, "w", **out_meta) as dest: dest.write(out_image) tmp_saved_files.append(tmp_saved) # mosaic files in tmp_saved_files mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(dstnodata),'-a_nodata',str(dstnodata)] mosaic_args_list.extend(tmp_saved_files) if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False: raise IOError('error, obtain a mosaic (%s) failed'%save_path) # # for test # if idx==13: # raise ValueError('for test') # remove the tmp files for tmp_file in tmp_saved_files: io_function.delete_file_or_dir(tmp_file) # if it will output a very large image (10000 by 10000 pixels), then raise a error return True