def performStage2TilesSegmentation(self, tilesImgDIR, tilesMaskedDIR, tilesSegsDIR, tilesSegBordersDIR, tmpDIR, tilesBase, s1BordersImage, segStatsInfo, minPxlsVal, distThresVal, bandsVal, ncpus): rsgisUtils = rsgislib.RSGISPyUtils() imgTiles = glob.glob(os.path.join(tilesImgDIR, tilesBase+"*.kea")) for imgTile in imgTiles: baseName = os.path.splitext(os.path.basename(imgTile))[0] maskedFile = os.path.join(tilesMaskedDIR, baseName + '_masked.kea') dataType = rsgisUtils.getRSGISLibDataTypeFromImg(imgTile) imageutils.maskImage(imgTile, s1BordersImage, maskedFile, 'KEA', dataType, 0, 0) imgTiles = glob.glob(os.path.join(tilesMaskedDIR, tilesBase+"*_masked.kea")) def stage2threadedTiledImgSeg(imgTile): baseName = os.path.splitext(os.path.basename(imgTile))[0] clumpsFile = os.path.join(tilesSegsDIR, baseName + '_segs.kea') kMeansCentres, imgStretchStats = self.findSegStatsFiles(imgTile, segStatsInfo) segutils.runShepherdSegmentationPreCalcdStats(imgTile, clumpsFile, kMeansCentres, imgStretchStats, outputMeanImg=None, tmpath=os.path.join(tmpDIR, baseName+'_segstemp'), gdalformat='KEA', noStats=False, noStretch=False, noDelete=False, minPxls=minPxlsVal, distThres=distThresVal, bands=bandsVal, processInMem=False) p = Pool(ncpus) p.map(stage2threadedTiledImgSeg, imgTiles) segTiles = glob.glob(os.path.join(tilesSegsDIR, tilesBase+"*_segs.kea")) for segTile in segTiles: baseName = os.path.splitext(os.path.basename(segTile))[0] borderMaskFile = os.path.join(tilesSegBordersDIR, baseName + '_segsborder.kea') rastergis.defineBorderClumps(segTile, 'BoundaryClumps') rastergis.exportCol2GDALImage(segTile, borderMaskFile, 'KEA', rsgislib.TYPE_8UINT, 'BoundaryClumps')
def calcWhiteness(image, bBand, gBand, rBand, outImage, stats=True, gdalformat='KEA'): """ Helper function to calculate whiteness, note the output no data value is -999. Where: :param image: is a string specifying the input image file. :param bBand: is an int specifying the blue band in the input image (band indexing starts at 1) :param gBand: is an int specifying the green band in the input image (band indexing starts at 1) :param rBand: is an int specifying the red band in the input image (band indexing starts at 1) :param outImage: is a string specifying the output image file. :param stats: is a boolean specifying whether pyramids and stats should be calculated (Default: True) :param gdalformat: is a string specifing the output image file format (Default: KEA) """ expression = '(blue+green+red)!=0?(abs(blue-((blue+green+red)/3)) + abs(green-((blue+green+red)/3)) + abs(red-((blue+green+red)/3)))/((blue+green+red)/3):-999' bandDefns = [] bandDefns.append(rsgislib.imagecalc.BandDefn('blue', image, bBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('green', image, gBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('red', image, rBand)) rsgislib.imagecalc.bandMath(outImage, expression, gdalformat, rsgislib.TYPE_32FLOAT, bandDefns) # Set no data value rsgislib.RSGISPyUtils().setImageNoDataValue(outImage, -999) if stats: rsgislib.imageutils.popImageStats(outImage, False, -999., True)
def sortImgsUTM2DIRs(inputImgsDIR, fileSearchStr, outBaseDIR): """ A function which will sort a series of input image files which a projected using the UTM system into individual directories per UTM zone. Please note that the input files are moved on your system!! Where: :param inputImgsDIR: directory where the input files are to be found. :param fileSearchStr: the wildcard search string to find files within the input directory (e.g., \*.kea). :param outBaseDIR: the output directory where the UTM folders will be created and the files copied. """ rsgisUtils = rsgislib.RSGISPyUtils() inFiles = glob.glob(os.path.join(inputImgsDIR, fileSearchStr)) for imgFile in inFiles: utmZone = rsgisUtils.getUTMZone(imgFile) if utmZone is not None: outDIR = os.path.join(outBaseDIR, 'utm' + utmZone) if not os.path.exists(outDIR): os.makedirs(outDIR) imgFileList = rsgisUtils.getImageFiles(imgFile) for tmpFile in imgFileList: print('Moving: ' + tmpFile) outFile = os.path.join(outDIR, os.path.basename(tmpFile)) shutil.move(tmpFile, outFile)
def normalise_image_band(input_img, band, output_img, gdal_format='KEA'): """ Perform a simple normalisation a single image band (val - min)/range. :param input_img: The input image file. :param band: the image band (starts at 1) to be normalised :param output_img: the output image file (will just be a single band). :param gdal_format: The output image format. """ import rsgislib import rsgislib.imageutils rsgis_utils = rsgislib.RSGISPyUtils() no_data_val = rsgis_utils.getImageNoDataValue(input_img, band) use_no_data_val = True if no_data_val is None: use_no_data_val = False no_data_val = 0.0 band_min, band_max = rsgislib.imagecalc.getImageBandMinMax( input_img, band, use_no_data_val, no_data_val) band_defns = [rsgislib.imagecalc.BandDefn('b1', input_img, band)] band_range = band_max - band_min exp = '(b1=={0})?0.0:(b1-{1})/{2}'.format(no_data_val, band_min, band_range) rsgislib.imagecalc.bandMath(output_img, exp, gdal_format, rsgislib.TYPE_32FLOAT, band_defns) rsgislib.imageutils.popImageStats(output_img, usenodataval=True, nodataval=0.0, calcpyramids=True)
def create_datejson_file(image_list, out_msk_dir, out_json_file, gdal_format='KEA'): """ Assume the date is the second component of the file name (XXX_YYYYMMDD_XXXX.xxx) """ import rsgislib import datetime import os.path rsgis_utils = rsgislib.RSGISPyUtils() out_img_ext = rsgis_utils.getFileExtension(gdal_format) date_imgs = dict() for img in image_list: basename = rsgis_utils.get_file_basename(img) basename_comps = basename.split('_') print(basename_comps) if len(basename_comps) < 2: raise Exception("The filename must have at least two components split by '_'.") date_str = basename_comps[1] if len(date_str) != 8: raise Exception("The second component must have 8 characters YYYYMMDD. String provided: '{}'".format(date_str)) date_obj = datetime.datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d") date_imgs[date_obj] = dict() date_imgs[date_obj]["input"] = img date_imgs[date_obj]["output"] = os.path.join(out_msk_dir, "{}_chngmsk{}".format(basename, out_img_ext)) with open(out_json_file, 'w') as out_json_file_obj: json.dump(date_imgs, out_json_file_obj, sort_keys=True, indent=4)
def createPy6SSensorFuncCall(inputFile, outputFile, wvLenInNM): """ * inputFile - * outputFile - """ rsgisUtils = rsgislib.RSGISPyUtils() lineList = rsgisUtils.readTextFile2List(inputFile) startWVLen = '' endWVLen = '' respStr = '' first = True for line in lineList: splitLine = line.split(',') if len(splitLine) == 2: if first: startWVLen = splitLine[0] endWVLen = splitLine[0] respStr = splitLine[1] first = False else: endWVLen = splitLine[0] respStr = respStr + ', ' + splitLine[1] if wvLenInNM: startWVLen = str(float(startWVLen) / 1000) endWVLen = str(float(endWVLen) / 1000) cmd = 's.wavelength = Py6S.Wavelength(' + startWVLen + ', ' + endWVLen + ', [' + respStr + '])' #print(cmd) rsgisUtils.writeList2File([cmd], outputFile)
def merge_annual_stats(input_pd_file, country_names_lut_file, out_feather=None, out_excel=None, excel_sheet=None, out_csv=None): rsgis_utils = rsgislib.RSGISPyUtils() country_names_luts = rsgis_utils.readJSON2Dict(country_names_lut_file) data_df = pandas.read_feather(input_pd_file) if data_df is not None: cnty_lst = list() for region in data_df['region']: cnty_lst.append(country_names_luts['gid'][region]) data_df['name'] = cnty_lst data_df = data_df[['region', 'name', 'count', 'area']] data_df = data_df.sort_values(by=['name']).reset_index() data_df = data_df.drop(['index'], axis=1) print(data_df) if out_feather is not None: data_df.to_feather(out_feather) if out_csv is not None: data_df.to_csv(out_csv) if out_excel is not None: if excel_sheet is None: excel_sheet = 'gmw_stats' data_df.to_excel(out_excel, sheet_name=excel_sheet)
def perform_analysis(self, scn_db_obj, sen_obj, plgin_objs): logger.info("Processing Scene: {}".format(scn_db_obj.PID)) if scn_db_obj.Invalid: return False, None, False success = True out_info = None outputs = False rsgis_utils = rsgislib.RSGISPyUtils() eodd_utils = EODataDownUtils() # Find the the valid image mask valid_img_file = rsgis_utils.findFileNone(scn_db_obj.ARDProduct_Path, "*valid.tif") logger.debug("Valid Image File: {}".format(valid_img_file)) # Find the the cloud image mask cloud_img_file = rsgis_utils.findFileNone(scn_db_obj.ARDProduct_Path, "*clouds.tif") logger.debug("Cloud Mask File: {}".format(cloud_img_file)) basename = rsgis_utils.get_file_basename(valid_img_file) basename = basename.replace('_valid', '') logger.debug("The basename for the processing is: {}".format(basename)) base_tmp_dir = os.path.join( self.params["tmp_path"], "{}_{}_clearsky".format(scn_db_obj.Product_ID, scn_db_obj.PID)) if not os.path.exists(base_tmp_dir): os.mkdir(base_tmp_dir) process_tmp_dir = os.path.join(base_tmp_dir, "processing") if not os.path.exists(process_tmp_dir): os.mkdir(process_tmp_dir) clear_sky_img = os.path.join(base_tmp_dir, "{}_clearsky.kea".format(basename)) rsgislib.imagecalibration.calcClearSkyRegions( cloud_img_file, valid_img_file, clear_sky_img, 'KEA', tmpPath=process_tmp_dir, deleteTmpFiles=False, initClearSkyRegionDist=5000, initClearSkyRegionMinSize=3000, finalClearSkyRegionDist=1000, morphSize=21) clear_sky_tif_img = eodd_utils.translateCloudOpGTIFF( clear_sky_img, scn_db_obj.ARDProduct_Path) if os.path.exists(base_tmp_dir): shutil.rmtree(base_tmp_dir) outputs = True out_info = {"clearskyimg": clear_sky_tif_img} return success, out_info, outputs
def calcBrightnessScaled(image, bBand, gBand, rBand, outImage, stats=True, gdalformat='KEA'): """ Helper function to calculate visable brightness, note the output no data value is -999. Where: :param image: is a string specifying the input image file. :param bBand: is an int specifying the blue band in the input image (band indexing starts at 1) :param gBand: is an int specifying the green band in the input image (band indexing starts at 1) :param rBand: is an int specifying the red band in the input image (band indexing starts at 1) :param outImage: is a string specifying the output image file. :param stats: is a boolean specifying whether pyramids and stats should be calculated (Default: True) :param gdalformat: is a string specifing the output image file format (Default: KEA) """ rsgisUtils = rsgislib.RSGISPyUtils() uidStr = rsgisUtils.uidGenerator() tmpImg = os.path.splitext(outImage)[0]+'_tmp'+uidStr+'.'+rsgisUtils.getFileExtension(gdalformat) expression = '(blue+green+red)!=0?((blue/'+str(scalefac)+')+(green/'+str(scalefac)+')+(red/'+str(scalefac)+'))/3:-999' bandDefns = [] bandDefns.append(rsgislib.imagecalc.BandDefn('blue', image, bBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('green', image, gBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('red', image, rBand)) rsgislib.imagecalc.bandMath(tmpImg, expression, gdalformat, rsgislib.TYPE_32FLOAT, bandDefns) rsgislib.imageutils.normaliseImagePxlVals(inputimage=tmpImg, outputimage=outImage, gdalformat=gdalformat, datatype=rsgislib.TYPE_32FLOAT, innodataval=-999, outnodataval=-999, outmin=0, outmax=1, stretchtype=rsgislib.imageutils.STRETCH_LINEARSTDDEV, stretchparam=2) rsgisUtils.deleteFileWithBasename(tmpImg) # Set no data value rsgisUtils.setImageNoDataValue(outImage, -999) if stats: rsgislib.imageutils.popImageStats(outImage,False,-999.,True)
def calcNDWI(image, nBand, sBand, outImage, stats=True, gdalformat='KEA'): """ Helper function to calculate NDWI ((NIR-SWIR)/(NIR+SWIR)), note the output no data value is -999. See: Xu, H. (2006). Modification of normalised difference water index (NDWI) to enhance open water features in remotely sensed imagery. International Journal of Remote Sensing, 27(14), 3025–3033. http://doi.org/10.1080/01431160600589179 Where: :param image: is a string specifying the input image file. :param nBand: is an int specifying the nir band in the input image (band indexing starts at 1) :param sBand: is an int specifying the swir band (e.g., Landsat TM Band 5) in the input image (band indexing starts at 1) :param outImage: is a string specifying the output image file. :param stats: is a boolean specifying whether pyramids and stats should be calculated (Default: True) :param gdalformat: is a string specifing the output image file format (Default: KEA) """ expression = '(nir+swir)!=0?(nir-swir)/(nir+swir):-999' bandDefns = [] bandDefns.append(rsgislib.imagecalc.BandDefn('swir', image, sBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('nir', image, nBand)) rsgislib.imagecalc.bandMath(outImage, expression, gdalformat, rsgislib.TYPE_32FLOAT, bandDefns) # Set no data value rsgislib.RSGISPyUtils().setImageNoDataValue(outImage, -999) if stats: rsgislib.imageutils.popImageStats(outImage, False, -999., True)
def createStage3ImageSubsets(self, inputImage, s2BordersImage, s3BordersClumps, subsetImgsDIR, subsetImgsMaskedDIR, subImgBaseName, minSize): segmentation.clump(s2BordersImage, s3BordersClumps, 'KEA', True, 0) rastergis.populateStats(s3BordersClumps, True, True) rastergis.spatialExtent(s3BordersClumps, 'minXX', 'minXY', 'maxXX', 'maxXY', 'minYX', 'minYY', 'maxYX', 'maxYY') rsgisUtils = rsgislib.RSGISPyUtils() dataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputImage) ratDS = gdal.Open(s3BordersClumps, gdal.GA_Update) minX = rat.readColumn(ratDS, "minXX") maxX = rat.readColumn(ratDS, "maxXX") minY = rat.readColumn(ratDS, "minYY") maxY = rat.readColumn(ratDS, "maxYY") Histogram = rat.readColumn(ratDS, "Histogram") for i in range(minX.shape[0]): if i > 0: subImage = os.path.join(subsetImgsDIR, subImgBaseName + str(i) + '.kea') #print( "[" + str(minX[i]) + ", " + str(maxX[i]) + "][" + str(minY[i]) + ", " + str(maxY[i]) + "]" ) imageutils.subsetbbox(inputImage, subImage, 'KEA', dataType, minX[i], maxX[i], minY[i], maxY[i]) if Histogram[i] > minSize: maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_masked.kea') else: maskedFile = os.path.join(subsetImgsMaskedDIR, subImgBaseName + str(i) + '_burn.kea') imageutils.maskImage(subImage, s2BordersImage, maskedFile, 'KEA', dataType, 0, 0) rastergis.populateStats(maskedFile, True, False) ratDS = None
def create_gadm_lut(vec_file, vec_lyr, gid_col, ctry_name_col, lut_json_file): """ A function which splits a vector layer by an attribute value into either different layers or different output files. :param vec_file: Input vector file :param vec_lyr: Input vector layer :param gid_col: The column name for the GID. :param ctry_name_col: The column name for the country names. :param lut_json_file: An output file with the LUT linking numeric ID and region unique value. """ import geopandas import rsgislib base_gpdf = geopandas.read_file(vec_file, layer=vec_lyr) lut = dict() lut['gid'] = dict() lut['ctry'] = dict() for i, row in base_gpdf.iterrows(): lut['gid'][row[gid_col]] = row[ctry_name_col] lut['ctry'][row[ctry_name_col]] = row[gid_col] rsgis_utils = rsgislib.RSGISPyUtils() rsgis_utils.writeDict2JSON(lut, lut_json_file)
def calcBrightness(image, bBand, gBand, rBand, outImage, stats=True, gdalformat='KEA', scalefac=1000): """ Helper function to calculate visable brightness, note the output no data value is -999. Where: :param image: is a string specifying the input image file. :param bBand: is an int specifying the blue band in the input image (band indexing starts at 1) :param gBand: is an int specifying the green band in the input image (band indexing starts at 1) :param rBand: is an int specifying the red band in the input image (band indexing starts at 1) :param outImage: is a string specifying the output image file. :param stats: is a boolean specifying whether pyramids and stats should be calculated (Default: True) :param gdalformat: is a string specifing the output image file format (Default: KEA) :param scalefac: is a float which can be used retirved reflectance between 0-1 (Default: 1000 to match rsgislib/arcsi) """ expression = '(blue+green+red)!=0?((blue/'+str(scalefac)+')+(green/'+str(scalefac)+')+(red/'+str(scalefac)+'))/3:-999' bandDefns = [] bandDefns.append(rsgislib.imagecalc.BandDefn('blue', image, bBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('green', image, gBand)) bandDefns.append(rsgislib.imagecalc.BandDefn('red', image, rBand)) rsgislib.imagecalc.bandMath(outImage, expression, gdalformat, rsgislib.TYPE_32FLOAT, bandDefns) # Set no data value rsgislib.RSGISPyUtils().setImageNoDataValue(outImage, -999) if stats: rsgislib.imageutils.popImageStats(outImage,False,-999.,True)
def performClumpingSingleThread(inputImage, clumpsImage, tmpDIR='tmp', width=2000, height=2000, gdalformat='KEA'): """ Clump the input image using a tiled processing chain allowing large images to be clumped more quickly. :param inputImage: the input image to be clumped. :param clumpsImage: the output clumped image. :param tmpDIR: the temporary directory where intermediate files will be written (default is 'tmp'). Directory will be created and deleted if does not exist. :param width: int for width of the image tiles used for processing (Default = 2000). :param height: int for height of the image tiles used for processing (Default = 2000). :param gdalformat: string with the GDAL image format for the output image (Default = KEA). NOTE. KEA is used as intermediate format internally and therefore needs to be available. """ createdTmp = False if not os.path.exists(tmpDIR): os.makedirs(tmpDIR) createdTmp = True rsgisUtils = rsgislib.RSGISPyUtils() uidStr = rsgisUtils.uidGenerator() dataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputImage) baseName = os.path.splitext(os.path.basename(inputImage))[0] + "_" + uidStr imgTilesDIR = os.path.join(tmpDIR, "imgtiles_" + uidStr) tilesClumpsDIR = os.path.join(tmpDIR, "imgclumpstiles_" + uidStr) tilesImgBase = os.path.join(imgTilesDIR, baseName) initMergedClumps = os.path.join(tmpDIR, "MergedInitClumps_" + uidStr + ".kea") if not os.path.exists(imgTilesDIR): os.makedirs(imgTilesDIR) if not os.path.exists(tilesClumpsDIR): os.makedirs(tilesClumpsDIR) imageutils.createTiles(inputImage, tilesImgBase, int(width), int(height), 0, False, 'KEA', dataType, 'kea') imageTiles = glob.glob(tilesImgBase + "*") for tile in imageTiles: tilBaseName = os.path.splitext(os.path.basename(tile))[0] clumpedTile = os.path.join(tilesClumpsDIR, tilBaseName + '_clumps.kea') segmentation.clump(tile, clumpedTile, 'KEA', True, 0, True) clumpTiles = glob.glob(os.path.join(tilesClumpsDIR, '*_clumps.kea')) print("Create Blank Image") imageutils.createCopyImage(inputImage, initMergedClumps, 1, 0, 'KEA', rsgislib.TYPE_32UINT) print("Merge Tiles into Blank Image") segmentation.mergeClumpImages(clumpTiles, initMergedClumps, True) print("Merge Tile Boundaries") segmentation.mergeEquivClumps(initMergedClumps, clumpsImage, gdalformat, ['PixelVal']) shutil.rmtree(imgTilesDIR) shutil.rmtree(tilesClumpsDIR) os.remove(initMergedClumps) if createdTmp: shutil.rmtree(tmpDIR)
def genSen2DownloadListGoogle(dbFile, tile, outFile, outpath, cloudCover=None, startDate=None, endDate=None, limit=None, multiDwn=False, lstCmds=False): """ Using sqlite database query and create a list of files to download """ # Import python sqlite3 module import sqlite3 try: ggSen2DBConn = sqlite3.connect(dbFile) ggSen2DBCursor = ggSen2DBConn.cursor() queryVar = [tile] query = 'SELECT BASE_URL FROM SEN2 WHERE MGRS_TILE = ?' if not cloudCover is None: query = query + ' AND CLOUD_COVER < ?' queryVar.append(cloudCover) if not startDate is None: query = query + ' AND date(SENSING_TIME) > date(?)' queryVar.append(startDate) if not endDate is None: query = query + ' AND date(SENSING_TIME) < date(?)' queryVar.append(endDate) if not limit is None: query = query + ' ORDER BY CLOUD_COVER ASC LIMIT {}'.format(limit) multiStr = '' if multiDwn: multiStr = '-m' cmdLst = [] for row in ggSen2DBCursor.execute(query, queryVar): if lstCmds: cmdLst.append("gsutil " + multiStr + " cp -r " + row[0] + " " + outpath) else: cmdLst.append(row[0]) rsgisUtils = rsgislib.RSGISPyUtils() rsgisUtils.writeList2File(cmdLst, outFile) except ARCSIException as e: print("Error: {}".format(e), file=sys.stderr) except Exception as e: print("Error: {}".format(e), file=sys.stderr)
def merge_gmw_tile_stats(tile_stats_dir, out_json_file, uid_lut_file=None, out_feather=None, out_excel=None, excel_sheet=None, out_csv=None): rsgis_utils = rsgislib.RSGISPyUtils() tile_files = glob.glob(os.path.join(tile_stats_dir, "*.json")) first = True for tile_stats_file in tqdm.tqdm(tile_files): tile_stats_dict = rsgis_utils.readJSON2Dict(tile_stats_file) if first: combined_stats = tile_stats_dict first = False else: for uid in combined_stats: if uid in tile_stats_dict: combined_stats[uid]['count'] += tile_stats_dict[uid][ 'count'] combined_stats[uid]['area'] += tile_stats_dict[uid]['area'] combined_stats_vld = dict() for uid in combined_stats: if combined_stats[uid]['count'] > 0: combined_stats_vld[uid] = combined_stats[uid] rsgis_utils.writeDict2JSON(combined_stats_vld, out_json_file) if (out_feather is not None) or (out_excel is not None) or (out_csv is not None): df_dict = dict() df_dict['uid'] = list() df_dict['count'] = list() df_dict['area'] = list() if uid_lut_file is not None: uid_lut_dict = rsgis_utils.readJSON2Dict(uid_lut_file) df_dict['region'] = list() for uid in combined_stats_vld: df_dict['uid'].append(uid) df_dict['count'].append(combined_stats_vld[uid]['count']) df_dict['area'].append(combined_stats_vld[uid]['area']) if uid_lut_file is not None: df_dict['region'].append(uid_lut_dict['id'][uid]) df_stats = pandas.DataFrame.from_dict(df_dict) if out_feather is not None: df_stats.to_feather(out_feather) if out_csv is not None: df_stats.to_csv(out_csv) if out_excel is not None: if excel_sheet is None: excel_sheet = 'gmw_stats' df_stats.to_excel(out_excel, sheet_name=excel_sheet)
def calcImgsPxlMode(inputImgs, outputImg, gdalformat, no_data_val=0): """ Function which calculates the mode of a group of images. Warning, this function can be very slow. Use rsgislib.imagecalc.imagePixelColumnSummary :param inputImgs: the list of images :param outputImg: the output image file name and path (will be same dimensions as the input) :param gdalformat: the GDAL image file format of the output image file. """ import scipy.stats from rios import applier rsgis_utils = rsgislib.RSGISPyUtils() datatype = rsgis_utils.getRSGISLibDataTypeFromImg(inputImgs[0]) numpyDT = rsgis_utils.getNumpyDataType(datatype) try: import tqdm progress_bar = rsgislib.TQDMProgressBar() except: from rios import cuiprogress progress_bar = cuiprogress.GDALProgressBar() infiles = applier.FilenameAssociations() infiles.images = inputImgs outfiles = applier.FilenameAssociations() outfiles.outimage = outputImg otherargs = applier.OtherInputs() otherargs.no_data_val = no_data_val otherargs.numpyDT = numpyDT aControls = applier.ApplierControls() aControls.progress = progress_bar aControls.drivername = gdalformat aControls.omitPyramids = True aControls.calcStats = False def _applyCalcMode(info, inputs, outputs, otherargs): """ This is an internal rios function """ image_data = numpy.concatenate(inputs.images, axis=0).astype(numpy.float32) image_data[image_data == otherargs.no_data_val] = numpy.nan mode_arr, count_arr = scipy.stats.mode(image_data, axis=0, nan_policy='omit') outputs.outimage = mode_arr.astype(otherargs.numpyDT) applier.apply(_applyCalcMode, infiles, outfiles, otherargs, controls=aControls)
def flipRefChipHDF5File(input_h5_file, output_h5_file, datatype=None): """ A function which flips each sample in both the x and y axis. So the output file will have double the number of samples as the input file. :param input_h5_file: The input HDF5 file for chips extracted from images. :param output_h5_file: The output HDF5 file for chips extracted from images. :param datatype: is the data type used for the output HDF5 file (e.g., rsgislib.TYPE_32FLOAT). If None (default) then the output data type will be float32. """ import tqdm import h5py import numpy import rsgislib rsgis_utils = rsgislib.RSGISPyUtils() if datatype is None: datatype = rsgislib.TYPE_32FLOAT f = h5py.File(input_h5_file, 'r') n_in_feats = f['DATA/REF'].shape[0] chip_size = f['DATA/REF'].shape[1] n_bands = f['DATA/DATA'].shape[3] n_out_feats = n_in_feats * 2 feat_arr = numpy.zeros([n_out_feats, chip_size, chip_size, n_bands], dtype=numpy.float32) feat_ref_arr = numpy.zeros([n_out_feats, chip_size, chip_size], dtype=numpy.uint16) i_feat = 0 for n in tqdm.tqdm(range(n_in_feats)): numpy.copyto(feat_ref_arr[i_feat], numpy.flip(f['DATA/REF'][n].T, axis=0).T, casting='safe') numpy.copyto(feat_arr[i_feat], numpy.flip(f['DATA/DATA'][n].T, axis=1).T, casting='safe') i_feat += 1 numpy.copyto(feat_ref_arr[i_feat], numpy.flip(f['DATA/REF'][n].T, axis=1).T, casting='safe') numpy.copyto(feat_arr[i_feat], numpy.flip(f['DATA/DATA'][n].T, axis=2).T, casting='safe') i_feat += 1 f.close() ###################################################################### # Create the output HDF5 file and populate with data. ###################################################################### h5_dtype = rsgis_utils.getNumpyCharCodesDataType(datatype) fH5Out = h5py.File(output_h5_file, 'w') dataGrp = fH5Out.create_group("DATA") metaGrp = fH5Out.create_group("META-DATA") dataGrp.create_dataset('DATA', data=feat_arr, chunks=(250, chip_size, chip_size, n_bands), compression="gzip", shuffle=True, dtype=h5_dtype) dataGrp.create_dataset('REF', data=feat_ref_arr, chunks=(250, chip_size, chip_size), compression="gzip", shuffle=True, dtype='H') describDS = metaGrp.create_dataset("DESCRIPTION", (1,), dtype="S10") describDS[0] = 'IMAGE REF TILES'.encode() fH5Out.close()
def do_processing(self, **kwargs): rsgis_utils = rsgislib.RSGISPyUtils() unq_vals = rsgis_utils.readJSON2Dict(self.params['unq_vals_file']) lut_vals = dict() for val in unq_vals: lut_vals[val] = dict() lut_vals[val]['count'] = 0 lut_vals[val]['area'] = 0.0 calc_unq_val_pxl_areas(self.params['tile_pxa_img'], self.params['tile_roi_img'], self.params['img_tile'], lut_vals) for val in unq_vals: lut_vals[val]['count'] = int(lut_vals[val]['count']) lut_vals[val]['area'] = float(lut_vals[val]['area']) rsgis_utils = rsgislib.RSGISPyUtils() rsgis_utils.writeDict2JSON(lut_vals, self.params['out_file'])
def performStage2Tiling(self, inputImage, tileShp, tilesRat, tilesBase, tilesMetaDIR, tilesImgDIR, tmpDIR, width, height, validDataThreshold, bordersImage): tilingutils.createMinDataTiles(inputImage, tileShp, tilesRat, width, height, validDataThreshold, bordersImage, True, True, tmpDIR) tilingutils.createTileMaskImagesFromClumps(tilesRat, tilesBase, tilesMetaDIR, "KEA") rsgisUtils = rsgislib.RSGISPyUtils() dataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputImage) tilingutils.createTilesFromMasks(inputImage, tilesBase, tilesMetaDIR, tilesImgDIR, dataType, 'KEA')
def get_class_training_chips_data(imgBandInfo, classVecSampleInfo, chip_h_size, tmpdir, refImg=None): """ A function to extract training chips (windows/regions) for vector regions for a given input image set. :param imgBandInfo: A list of rsgislib.imageutils.ImageBandInfo objects to define the images and bands of interest. :param classVecSampleInfo: A list of rsgislib.classification.ClassVecSamplesInfoObj objects to define the training regions. :param chip_h_size: is half the chip size to be extracted (i.e., 10 with output image chips 21x21, 10 pixels either size of the one of interest). :param tmpdir: A directory for temporary outputs created during the processing. :param refImg: A reference image which defines the area of interest, pixel size etc. for the processing. If None then an image will be generated using the input images but the tmpdir needs to be defined. :return: dictionary of ClassSimpleInfoObj objects. """ import rsgislib import rsgislib.imageutils import rsgislib.vectorutils import os import os.path import random import shutil # Get valid mask, rasterised to this rsgis_utils = rsgislib.RSGISPyUtils() uid_str = rsgis_utils.uidGenerator() tmp_lcl_dir = os.path.join(tmpdir, "get_class_training_chips_data_{}".format(uid_str)) if not os.path.exists(tmp_lcl_dir): os.makedirs(tmp_lcl_dir) rasterise_ref_img = refImg if refImg is None: rasterise_ref_img = os.path.join(tmp_lcl_dir, "ref_img_vmsk.kea") rsgislib.imageutils.create_valid_mask(imgBandInfo, rasterise_ref_img, 'KEA', tmp_lcl_dir) classInfo = dict() for class_sample_info in classVecSampleInfo: cls_basename = rsgis_utils.get_file_basename(class_sample_info.fileH5) out_vec_img = os.path.join(tmp_lcl_dir, "{}_img.kea".format(cls_basename)) rsgislib.vectorutils.rasteriseVecLyr(class_sample_info.vecfile, class_sample_info.veclyr, rasterise_ref_img, out_vec_img, gdalformat="KEA", burnVal=class_sample_info.id, datatype=rsgislib.TYPE_16UINT, vecAtt=None, vecExt=False, thematic=True, nodata=0) rsgislib.imageutils.extractChipZoneImageBandValues2HDF(imgBandInfo, out_vec_img, class_sample_info.id, chip_h_size, class_sample_info.fileH5) rand_red_val = random.randint(1, 255) rand_grn_val = random.randint(1, 255) rand_blu_val = random.randint(1, 255) classInfo[class_sample_info.classname] = ClassSimpleInfoObj(id=class_sample_info.id, fileH5=class_sample_info.fileH5, red=rand_red_val, green=rand_grn_val, blue=rand_blu_val) shutil.rmtree(tmp_lcl_dir) return classInfo
def calcWSG84PixelArea(img, out_img, scale=10000, gdalformat='KEA'): """ A function which calculates the area (in metres) of the pixel projected in WGS84. :param img: input image, for which the per-pixel area will be calculated. :param out_img: output image file. :param scale: scale the output area to unit of interest. Scale=10000(Ha), Scale=1(sq m), Scale=1000000(sq km), Scale=4046.856(Acre), Scale=2590000(sq miles), Scale=0.0929022668(sq feet) """ import rsgislib.tools from rios import applier try: import tqdm progress_bar = rsgislib.TQDMProgressBar() except: from rios import cuiprogress progress_bar = cuiprogress.GDALProgressBar() rsgis_utils = rsgislib.RSGISPyUtils() x_res, y_res = rsgis_utils.getImageRes(img) infiles = applier.FilenameAssociations() infiles.img = img outfiles = applier.FilenameAssociations() outfiles.outimage = out_img otherargs = applier.OtherInputs() otherargs.x_res = x_res otherargs.y_res = y_res otherargs.scale = float(scale) aControls = applier.ApplierControls() aControls.progress = progress_bar aControls.drivername = gdalformat aControls.omitPyramids = False aControls.calcStats = False def _calcPixelArea(info, inputs, outputs, otherargs): xBlock, yBlock = info.getBlockCoordArrays() x_res_arr = numpy.zeros_like(yBlock, dtype=float) x_res_arr[...] = otherargs.x_res y_res_arr = numpy.zeros_like(yBlock, dtype=float) y_res_arr[...] = otherargs.y_res x_res_arr_m, y_res_arr_m = rsgislib.tools.degrees_to_metres( yBlock, x_res_arr, y_res_arr) outputs.outimage = numpy.expand_dims( (x_res_arr_m * y_res_arr_m) / otherargs.scale, axis=0) applier.apply(_calcPixelArea, infiles, outfiles, otherargs, controls=aControls)
def convert_between_tms_xyz(tiles_dir): """ A function which converts between TMS and XYZ tile indexing. The conversion is performed in-situ with the files simply renamed. If a tile cache is in TMS format then this function will convert it to XYZ, while if it is in XYZ format then it will convert it to TMS (they are easily interchangeable) The TMS format uses a grid with the origin at the bottom-left. While the XYZ format is uses a grid with the origin in the top-left. XYZ grids can be loads into QGIS and other GIS systems as layers. TMS grids are outputted by gdal2tiles.py. :param tiles_dir: The input directory for the tile cache. """ import os import rsgislib import tqdm c_pwd = os.getcwd() rsgis_utils = rsgislib.RSGISPyUtils() in_zoom_dirs = [] lcl_files = os.listdir(tiles_dir) for lcl_file in lcl_files: lcl_name = lcl_file lcl_file = os.path.join(tiles_dir, lcl_file) if os.path.isdir(lcl_file) and rsgis_utils.isNumber(lcl_name): in_zoom_dirs.append(os.path.abspath(lcl_file)) for zoom_dir in tqdm.tqdm(in_zoom_dirs, desc='overall'): zoom = int(os.path.basename(zoom_dir)) x_dirs = os.listdir(zoom_dir) for x_dir in tqdm.tqdm(x_dirs, desc='zoom {}'.format(zoom), leave=False): if rsgis_utils.isNumber(x_dir): x = int(x_dir) x_dir = os.path.join(zoom_dir, x_dir) if os.path.isdir(x_dir) and rsgis_utils.isNumber(x): y_files = os.listdir(x_dir) os.chdir(x_dir) for y_file in tqdm.tqdm(y_files, desc='y', leave=False): y_cur_file = os.path.join(x_dir, y_file) y_in_str = os.path.splitext(y_file)[0] if rsgis_utils.isNumber(y_in_str): y_in = int(y_in_str) img_ext = os.path.splitext(y_file)[1] n_tile_x, n_tile_y, n_zoom = flip_xyz_tms_tiles( x, y_in, zoom) out_img_file = "{}{}".format(n_tile_y, img_ext) os.rename(y_cur_file, out_img_file) os.chdir(c_pwd)
def gen_command_info(self, **kwargs): rsgis_utils = rsgislib.RSGISPyUtils() img_tiles = glob.glob(kwargs['img_tiles']) for img_tile in img_tiles: tile_base_name = rsgis_utils.get_file_basename(img_tile, checkvalid=False) tile_base_name = tile_base_name.replace(kwargs['tile_name_rm'], '') out_pxa_file = os.path.join(kwargs['out_pxa_path'], "{}_pxa.kea".format(tile_base_name)) if not os.path.exists(out_pxa_file): c_dict = dict() c_dict['img_tile'] = img_tile c_dict['tile_pxa_img'] = out_pxa_file self.params.append(c_dict)
def add_unq_id_val(vec_file, vec_lyr, unq_col, unq_id_col, out_vec_file, out_vec_lyr, out_format="GPKG", lut_json_file=None): """ A function which splits a vector layer by an attribute value into either different layers or different output files. :param vec_file: Input vector file :param vec_lyr: Input vector layer :param unq_col: The column name used to define unique regions. :param unq_id_col: The output column name used to create a numeric unique ID :param out_vec_file: The output vector file with the selected features. :param out_vec_lyr: The output layer file with the selected features. :param out_format: the output vector format (Default: GPKG) :param lut_json_file: An output file with the LUT linking numeric ID and region unique value. If None no output is produced. """ import geopandas import numpy import tqdm base_gpdf = geopandas.read_file(vec_file, layer=vec_lyr) unq_vals = base_gpdf[unq_col].unique() base_gpdf[unq_id_col] = numpy.zeros((base_gpdf.shape[0]), dtype=int) unq_id_val = 1 lut = dict() lut['id'] = dict() lut['val'] = dict() for unq_val in tqdm.tqdm(unq_vals): base_gpdf.loc[base_gpdf[unq_col] == unq_val, unq_id_col] = unq_id_val lut['id'][unq_id_val] = unq_val lut['val'][unq_val] = unq_id_val unq_id_val += 1 if out_format == 'GPKG': base_gpdf.to_file(out_vec_file, layer=out_vec_lyr, driver=out_format) else: base_gpdf.to_file(out_vec_file, driver=out_format) if lut_json_file is not None: import rsgislib rsgis_utils = rsgislib.RSGISPyUtils() rsgis_utils.writeDict2JSON(lut, lut_json_file)
def create_output_imgs(self, out_imgs_info): """ A function to create the output image(s) if required during processing. :param out_imgs_info: a list of rsgislib.imageutils.OutImageInfo objects specifying the output image(s). """ rsgis_utils = rsgislib.RSGISPyUtils() out_wkt_str = rsgis_utils.getWKTFromEPSGCode(int(self.img_epsg)) if out_wkt_str is None: raise Exception( "Did not have a projection string for the EPSG code: {}". format(self.img_epsg)) if math.fabs(self.img_pxl_res[0]) != math.fabs(self.img_pxl_res[1]): raise Exception( "Currently limited to only outputting images with square pixels." ) out_img_res = math.fabs(self.img_pxl_res[0]) self.out_imgs_info = out_imgs_info self.out_imgs_objs = dict() for img in out_imgs_info: rsgislib.imageutils.createBlankImgFromBBOX(self.bbox_intersect, out_wkt_str, img.file_name, out_img_res, img.no_data_val, img.nbands, img.gdal_format, img.datatype, snap2grid=False) self.out_imgs_objs[img.name] = dict() self.out_imgs_objs[img.name]['dataset'] = gdal.Open( img.file_name, gdal.GA_Update) if self.out_imgs_objs[img.name]['dataset'] is None: raise Exception("Could not open image file: {}".format( img.file_name)) self.out_imgs_objs[img.name]['nbands'] = img.nbands self.out_imgs_objs[img.name]['bands'] = dict() for band in range(img.nbands): self.out_imgs_objs[img.name]['bands'][ band + 1] = self.out_imgs_objs[ img.name]['dataset'].GetRasterBand(band + 1) if self.out_imgs_objs[img.name]['bands'][band + 1] is None: raise Exception( "Could not open band {} within image {}".format( band + 1, img.file_name))
def extract_sen1_img_data(eodd_config_file, roi_vec_file, roi_vec_lyr, start_date, end_date, out_dir, tmp_dir): rsgis_utils = rsgislib.RSGISPyUtils() bbox = rsgis_utils.getVecLayerExtent(roi_vec_file, roi_vec_lyr) bbox_wgs84 = rsgis_utils.reprojBBOX_epsg(bbox, 27700, 4326) sys_main_obj = eodatadown.eodatadownsystemmain.EODataDownSystemMain() sys_main_obj.parse_config(eodd_config_file) sen_obj = sys_main_obj.get_sensor_obj("Sentinel1ASF") scns = sen_obj.query_scn_records_date_bbox(start_date, end_date, bbox_wgs84, start_rec=0, n_recs=0, valid=True, cloud_thres=None) print("N scns = {}".format(len(scns))) for scn in scns: print(scn.ARDProduct_Path) scn_dB_file = glob.glob( os.path.join(scn.ARDProduct_Path, "*dB_osgb.tif")) if len(scn_dB_file) == 1: scn_dB_file = scn_dB_file[0] else: raise Exception("There should only be 1 dB image for the scene.") basename = os.path.splitext(os.path.basename(scn_dB_file))[0] scn_dir = os.path.join(out_dir, "{}_{}".format(scn.Product_File_ID, scn.PID)) if not os.path.exists(scn_dir): os.mkdir(scn_dir) scn_tmp_dir = os.path.join( tmp_dir, "{}_{}".format(scn.Product_File_ID, scn.PID)) if not os.path.exists(scn_tmp_dir): os.mkdir(scn_tmp_dir) metadata_file = os.path.join(scn_dir, "{}.json".format(basename)) export_metadata(scn, metadata_file) out_img = os.path.join(scn_dir, "{}_sub.tif".format(basename)) subset_mask_img(scn_dB_file, roi_vec_file, roi_vec_lyr, scn_tmp_dir, out_img) print("")
def performDOSCalc(inputFile, outputFile, gdalformat='KEA', nonNegative=True, noDataVal=0, darkObjReflVal=0, darkObjPercentile=0.01, copyBandNames=True, calcStatsPyd=True): """ A command to perform a dark object subtraction (DOS) on an input image. :param inputFile: input image to which the DOS method is to be applied. Typically, this image with be in top of atmosphere reflectance (TOA) :param outputFile: the output image file :param gdalformat: the output image file format (default = KEA) :param nonNegative: is a boolean specifying where negative output pixel values will be accepted (Dafualt is True; i.e., no negative values) :param noDataVal: is the no data value within the input image file. :param darkObjReflVal: is an offset which is applied to all pixel values to make a minimum reflectance value (Default = 0) :param darkObjPercentile: is the percentile of the input image used to define the dark object threshold, range is 0 - 1 (Default is 0.01; i.e., 1%). :param copyBandNames: is a boolean specifying that the band names of the input image should be copied to the output image file (Default: True) :param calcStatsPyd: is a boolean specifying that the image stats and pyramids should be calculated on the output image (Default: True) Example:: import rsgislib.imagecalibration rsgislib.imagecalibration.performDOSCalc("LS5TM_20110701_lat52lon421_r24p204_rad_toa.kea", 'LS5TM_20110701_lat52lon421_r24p204_rad_toa_dos.kea") """ import rsgislib import rsgislib.imagecalc import rsgislib.imageutils import collections rsgisUtils = rsgislib.RSGISPyUtils() outDataType = rsgisUtils.getRSGISLibDataTypeFromImg(inputFile) percentList = rsgislib.imagecalc.bandPercentile(inputFile, darkObjPercentile, noDataVal) offsetsList = list() OffVal = collections.namedtuple('DOSOffset', ['offset']) for val in percentList: offsetsList.append(OffVal(offset=val)) applySubtractSingleOffsets(inputFile, outputFile, gdalformat, outDataType, nonNegative, True, noDataVal, darkObjReflVal, offsetsList) if copyBandNames: bandNames = rsgislib.imageutils.getBandNames(inputFile) rsgislib.imageutils.setBandNames(outputFile, bandNames) if calcStatsPyd: rsgislib.imageutils.popImageStats(outputFile, usenodataval=True, nodataval=noDataVal, calcpyramids=True)
def split_chip_sample_ref_train_valid_test(input_sample_h5_file, train_h5_file, valid_h5_file, test_h5_file, test_sample, valid_sample, train_sample=None, rand_seed=42, datatype=None): """ A function to split a chip HDF5 samples file (from rsgislib.imageutils.extractChipZoneImageBandValues2HDF) into three (i.e., Training, Validation and Testing). :param input_sample_h5_file: Input HDF file, probably from rsgislib.imageutils.extractZoneImageBandValues2HDF. :param train_h5_file: Output file with the training data samples (this has the number of samples left following the removal of the test and valid samples if train_sample=None) :param valid_h5_file: Output file with the valid data samples. :param test_h5_file: Output file with the testing data samples. :param test_sample: The size of the testing sample to be taken. :param valid_sample: The size of the validation sample to be taken. :param train_sample: The size of the training sample to be taken. If None then the remaining samples are returned. :param rand_seed: The random seed to be used to randomly select the sub-samples. :param datatype: is the data type used for the output HDF5 file (e.g., rsgislib.TYPE_32FLOAT). If None (default) then the output data type will be float32. """ import rsgislib from rsgislib.imageutils import splitSampleRefChipHDF5File import os import os.path rsgis_utils = rsgislib.RSGISPyUtils() uid_str = rsgis_utils.uidGenerator() out_dir = os.path.split(os.path.abspath(test_h5_file))[0] tmp_train_valid_sample_file = os.path.join(out_dir, "train_valid_tmp_sample_{}.h5".format(uid_str)) splitSampleRefChipHDF5File(input_sample_h5_file, test_h5_file, tmp_train_valid_sample_file, test_sample, rand_seed, datatype) if train_sample is not None: tmp_train_sample_file = os.path.join(out_dir, "train_tmp_sample_{}.h5".format(uid_str)) splitSampleRefChipHDF5File(tmp_train_valid_sample_file, valid_h5_file, tmp_train_sample_file, valid_sample, rand_seed, datatype) tmp_remain_sample_file = os.path.join(out_dir, "remain_tmp_sample_{}.h5".format(uid_str)) splitSampleRefChipHDF5File(tmp_train_sample_file, train_h5_file, tmp_remain_sample_file, train_sample, rand_seed, datatype) os.remove(tmp_train_sample_file) os.remove(tmp_remain_sample_file) else: splitSampleRefChipHDF5File(tmp_train_valid_sample_file, valid_h5_file, train_h5_file, valid_sample, rand_seed, datatype) os.remove(tmp_train_valid_sample_file)
def gen_command_info(self, **kwargs): if not os.path.exists(kwargs['out_path']): os.mkdir(kwargs['out_path']) rsgis_utils = rsgislib.RSGISPyUtils() base_gpdf = geopandas.read_file(kwargs['roi_vec'], layer=kwargs['roi_vec_lyr']) unq_vals = base_gpdf[kwargs['roi_vec_col']].unique().tolist() base_gpdf = None rsgis_utils.writeDict2JSON(unq_vals, kwargs['unq_vals_file']) img_tiles = glob.glob(kwargs['img_tiles']) for img_tile in img_tiles: tile_base_name = rsgis_utils.get_file_basename(img_tile, checkvalid=False) out_file = os.path.join(kwargs['out_path'], "{}_stats.json".format(tile_base_name)) if not os.path.exists(out_file): tile_generic_base_name = tile_base_name.replace( kwargs['tile_name_rm'], '') tile_roi_img = os.path.join( kwargs['roi_img_path'], "{}_roi_{}.kea".format(tile_generic_base_name, kwargs['roi_name'])) if not os.path.exists(tile_roi_img): raise Exception("Could not file ROI image file: {}".format( tile_roi_img)) tile_pxa_img = os.path.join( kwargs['pxa_img_path'], "{}_pxa.kea".format(tile_generic_base_name)) if not os.path.exists(tile_pxa_img): raise Exception( "Could not file Pixel Area image file: {}".format( tile_pxa_img)) c_dict = dict() c_dict['img_tile'] = img_tile c_dict['unq_vals_file'] = kwargs['unq_vals_file'] c_dict['tile_pxa_img'] = tile_pxa_img c_dict['tile_roi_img'] = tile_roi_img c_dict['out_file'] = out_file self.params.append(c_dict)