def cancel_orders(txt_in: str, username: str, espa_env: str): """ :param txt_in: The full path and filename of the input txt file containing the ESPA orders :param username: ESPA user name :param espa_env: ESPA environment :return: """ order_list = json.loads(open(txt_in, "r").read()) t0 = get_time() espa_url = get_espa_env(espa_env) passwd = espa_login() for order in order_list: url = espa_url + api_config.api_urls["order"] + order["orderid"] logger.warning('Cancelling order: %s', url) cancellation = {'orderid': order['orderid'], 'status': 'cancelled'} result = requests.put(url, auth=(username, passwd), json=cancellation).json() logger.info('Result: %s', result) logger.warning('Removing cancelled file: %s', txt_in) os.unlink(txt_in) logger.info("Processing time: {}".format(get_time() - t0))
def read_band_as_array(rast, n_bands=1): """Read gdal object as an array. Mask out nodata. Args: rast <osgeo.gdal.Dataset>: open raster n_bands <int>: number of bands in file (default=1) """ # get nodata value r_a = rast.GetRasterBand(n_bands) r_nd = False try: r_nd = r_a.GetNoDataValue() except AttributeError: logger.warning( "Variable {0} does not have NoData value.".format(r_a)) # read raster as array rast_arr = np.array(r_a.ReadAsArray()) # mask nodata value, if it exists if r_nd is not False: rast_arr = np.ma.masked_where(rast_arr == r_nd, rast_arr) logger.info("NoData value: {0}".format(r_nd)) else: rast_arr = r_a logger.info("NoData value could not be determined.") return (rast_arr, r_nd)
def unzip_files(tests: list, masts: list) -> None: """ Extract files from archives in sorted order :param tests: List of paths to the test .tar archives :param masts: List of paths to the master .tar archives :return: """ print('Warning: decompressing files. Make sure you have the necessary ' 'disk space to complete this operation...\n') time.sleep(5) # Make sure the lists are sorted masts.sort() tests.sort() for mast, test in zip(masts, tests): try: mode = 'r:gz' if mast.endswith('gz') else 'r' tar_mast = tarfile.open(mast, mode) tar_test = tarfile.open(test, mode) logger.info("{0} is {1} MB...\n".format( mast, os.path.getsize(mast) * 0.000001)) logger.info("{0} is {1} MB...\n".format( test, os.path.getsize(test) * 0.000001)) if os.path.getsize(mast) == 0: logger.critical( "Archive {0} is of zero size!".format(mast)) sys.exit(1) elif os.path.getsize(test) == 0: logger.critical( "Archive {0} is of zero size!".format(test)) sys.exit(1) except Exception as exc: logger.critical("Problem with archive file(s): %s and %s. %s", mast, test, str(exc)) sys.exit(1) try: tar_mast.extractall(path=os.path.dirname(mast)) tar_test.extractall(path=os.path.dirname(test)) except Exception as exc: logger.critical( "Problem extracting contents from archive files:" "%s and %s. %s", mast, test, str(exc)) return None
def _api(url, endpoint, data=None, verb='POST'): """ Send an HTTP request to an endpoint, optional message body """ url = '{}/{}'.format(url, endpoint) logger.info('%s %s...', verb.upper(), url) r = getattr(requests, verb.lower())(url, data=fmt_body(data)) logger.info(r) r.raise_for_status() return r.json()
def rm_files(envi_files, ext): """Remove files from list, by specific extension Args: envi_files <list>: file names to be checked ext <str>: file extension to be removed""" out_files = [i for i in envi_files if ext not in i] logger.info("Skipping analysis of {0} file {1}".format( ext, [i for i in envi_files if ext in i])) return out_files
def check_images(test, mast): """Read in a generic (non-geographic) image, like JPEG, and do a diff Return diff raster if actually different Args: test <str>: path to test image mast <str>: path to master image """ try: from scipy.misc import imread except ImportError: from scipy.ndimage import imread # read images try: test_im = imread(test) mast_im = imread(mast) except ImportError: logger.warning("Likely missing Python Image Library (PIL).") # try Scikit Image from skimage.io import imread try: mast_im = imread(mast) test_im = imread(test) except (ValueError, TypeError, ImportError): logger.warning("Not able to open image with skimag.io. Likely missing image library.") return None # check diff try: diff_im = do_diff(test_im, mast_im) if len(np.nonzero(diff_im)) > 3: logger.error("Values differ between {0} and {1}.".format(test, mast)) return diff_im else: logger.info("Values equivalent between {0} and {1}.".format(test, mast)) return None except ValueError: logger.error("Image {0} and {1} are not the same dimensions.".format(test, mast))
def place_order(espa_env: str, username: str, outdir: str = None, order_key: str = None): """ Place the order with the appropriate ESPA environment :param order_key: Optionally specify a keyword pointing to a specific order :param outdir: Optionally specify full path to the output directory, otherwise os.getcwd() is used :param espa_env: The name of the ESPA environment :param username: ESPA username :return: """ espa_url = espa_orders_api.get_espa_env(espa_env) orders = load_order(order_key) passwd = espa_orders_api.espa_login() order_length = len(orders) response = list() for i, order in enumerate(orders): logger.info("Requesting order %d of %d", i + 1, order_length) r = requests.post(espa_url + api_config.api_urls["order"], auth=(username, passwd), json=order) try: result = r.json() if 'orderid' not in result: logger.error('Order "%s" %d/%d failed: %s', order_key, i + 1, order_length, result) continue response.append(result) except json.decoder.JSONDecodeError: # inherits from ValueError # This error seems to occur when trying to decode a None-type object logger.error( "There was likely a problem connecting with the host. " "Check to see if ESPA is down for maintenance.") filename = order_text(outdir) logger.warning('Store ordering results: %s', filename) with open(filename, "a") as f: f.write(json.dumps(response, indent=4))
def get_ext(*args): """Get unique extensions for all extracted files. Ignore .tar files. Args: *args <str>: string(s) of file extensions """ exts = [] for i in args: exts += [os.path.splitext(j)[1] for j in i if '.tar' not in j] logger.info("All extensions: {0}".format(exts)) logger.info("Unique extensions: {0}".format(list(set(exts)))) return list(set(exts))
def count_bands(r_name, raster): """Count number of bands inside raster Args: r_name <str>: file name of raster raster <osgeo.gdal.Dataset>: raster """ try: from osgeo import gdal except ImportError: import gdal d_r = raster.RasterCount logger.info("Number of bands in {0}: {1}".format(r_name, d_r)) return d_r
def count_sds(r_name, raster): """Count number of SDS inside raster. Args: r_name <str>: file name of raster raster <osgeo.gdal.Dataset>: raster """ try: from osgeo import gdal except ImportError: import gdal d_r = len(raster.GetSubDatasets()) logger.info("Number of SDS in {0}: {1}".format(r_name, d_r)) return d_r
def img_stats(test: str, mast: str, diff_img: np.ndarray, dir_in: str, fn_in: str, dir_out: str, sds_ct: int = 0) -> None: """ Log stats from array :param test: name of test file :param mast: name of master file :param diff_img: image array :param dir_in: directory where test data exists :param fn_in: input filename (to identify csv entry) :param dir_out: output directory :param sds_ct: index of SDS (default=0) :return: """ diff_img = np.ma.masked_where(diff_img == 0, diff_img) fn_out = dir_out + os.sep + "stats.csv" logger.info("Writing stats for {0} to {1}.".format(fn_in, fn_out)) file_exists = os.path.isfile(fn_out) # Changed "ab" to "a", using python 3.6 was getting TypeError: a bytes-like object is required, not 'str' with open(fn_out, "a") as f: writer = csv.writer(f) # write header if file didn't already exist if not file_exists: writer.writerow( ("dir", "test_file", "master_file", "mean", "min", "max", "25_percentile", "75_percentile", "1_percentile", "99_percentile", "std_dev", "median")) writer.writerow( (dir_in, test + "_" + str(sds_ct), mast + "_" + str(sds_ct), np.mean(diff_img), np.amin(diff_img), np.amax(diff_img), np.percentile(diff_img.compressed(), 25), np.percentile(diff_img.compressed(), 75), np.percentile(diff_img.compressed(), 1), np.percentile(diff_img.compressed(), 99), np.std(diff_img), np.median(diff_img.compressed()))) return None
def extent_diff_rows(test, mast): """Make sure number of rows are the same between GDAL objects. Args: test <osgeo.gdal.Dataset>: test raster mast <osgeo.gdal.Dataset>: master raster """ rows_diff = test.RasterYSize - mast.RasterYSize if rows_diff == 0: logger.info("Rows match.") status = True else: logger.error("Rows do not match.") logger.error("Test row: {0}".format(test.RasterYSize)) logger.error("Master row: {0}".format(mast.RasterYSize)) status = False return status
def call_stats(test, mast, rast_arr, fn_out, dir_out, rast_num=0): """Call stats function(s) if data are valid Args: test <str>: name of test file mast <str>: name of master file rast_arr <numpy.ndarray>: array of target raster fn_out <str>: file path of image dir_out <str>: path to output directory rast_num <int>: individual number of image (default=0) nodata <int>: no data value (default=-9999) """ if isinstance(rast_arr, (np.ndarray, np.ma.core.MaskedArray)): if np.any(rast_arr != 0): logger.warning("Image difference found!") logger.warning("Test: {0} | Master: {1}".format(test, mast)) # find file name (for saving plot) fout = fn_out.split(os.sep)[-1] # do stats of difference stats.img_stats(test, mast, rast_arr, os.path.dirname(fn_out), fout, dir_out, rast_num) # plot diff image ImWrite.plot_diff_image(test, mast, rast_arr, fout, "diff_" + str(rast_num), dir_out) # plot abs diff image ImWrite.plot_diff_image(test, mast, rast_arr, fout, "abs_diff_" + str(rast_num), dir_out, do_abs=True) # plot diff histograms ImWrite.plot_hist(test, mast, rast_arr, fout, "diff_" + str(rast_num), dir_out) else: logger.info("Binary data match.") else: logger.warning("Target raster is not a valid numpy array or numpy " "masked array. Cannot run statistics!")
def check_jpeg_files(test: list, mast: list, dir_out: str) -> None: """ Check JPEG files (i.e., Gverify or preview images) for diffs in file size or file contents. Plot difference image if applicable :param test: List of paths to test jpg files :param mast: List of paths to master jpg files :param dir_out: Full path to output directory :return: """ test, mast = Cleanup.remove_nonmatching_files(test, mast) logger.info("Checking JPEG preview/gverify files...") if mast is None or test is None: logger.error("No JPEG files to check in test and/or mast " "directories.") else: for i, j in zip(test, mast): # Compare file sizes if os.path.getsize(i) != os.path.getsize(j): logger.warning("JPEG file sizes do not match for " "Master {0} and Test {1}...\n".format(j, i)) logger.warning("{0} size: {1}".format( i, os.path.getsize(i))) logger.warning("{0} size: {1}".format( j, os.path.getsize(j))) else: logger.info("JPEG files {0} and {1} are the same " "size".format(j, i)) # diff images result = ArrayImage.check_images(i, j) if result: ImWrite.plot_diff_image(test=i, mast=j, diff_raster=result, fn_out=i.split(os.sep)[-1], fn_type="diff_", dir_out=dir_out)
def compare_geo_trans(test, mast): """Make sure geographic transforms are the same between GDAL objects. Args: test <osgeo.gdal.Dataset>: test raster mast <osgeo.gdal.Dataset>: master raster """ tst_gt = test.GetGeoTransform() mst_gt = mast.GetGeoTransform() gt_diff = (tst_gt == mst_gt) if gt_diff: logger.info("Geo transforms match.") status = True else: logger.error("Geo transforms match.") logger.error("Test transform: {0}".format(tst_gt)) logger.error("Master transform: {0}".format(mst_gt)) status = False return status
def check_text_files(test, mast, ext): """Check master and test text-based files (headers, XML, etc.) line-by-line for differences. Sort all the lines to attempt to capture new entries. Args: test <str>: path to test text file mast <str>: path to master text file ext <str>: file extension (should be .txt, .xml or .gtf """ logger.info("Checking {0} files...".format(ext)) test, mast = Cleanup.remove_nonmatching_files(test, mast) # Do some checks to make sure files are worth testing if mast is None or test is None: logger.warning("No {0} files to check in test and/or mast " "directories.".format(ext)) return if len(mast) != len(test): logger.error("{0} file lengths differ. Master: {1} | Test:" " {2}".format(ext, len(mast), len(test))) return for i, j in zip(test, mast): topen = open(i) mopen = open(j) # Read text line-by-line from file file_topen = topen.readlines() file_mopen = mopen.readlines() # Close files topen.close() mopen.close() # Check file names for name differences. # Print non-matching names in details. # get file names i_fn = i.split(os.sep)[-1] j_fn = j.split(os.sep)[-1] if i_fn != j_fn: logger.error( "{0} file names differ. Master: {1} | Test: {2}".format( ext, j, i)) return else: logger.info("{0} file names equivalent. Master: {1} | Test: " "{2}".format(ext, j, i)) # Check open files line-by-line (sorted) for changes. # Print non-matching lines in details. txt_diffs = set(file_topen).difference(set(file_mopen)) if len(txt_diffs) > 0: for k in txt_diffs: logger.error("{0} changes: {1}".format(ext, k)) else: logger.info("No differences between {0} and {1}.".format(i, j))
def compare_proj_ref(test, mast): """Make sure projections are the same between two GDAL objects. Args: test <osgeo.gdal.Dataset>: test raster mast <osgeo.gdal.Dataset>: master raster """ tst_proj = test.GetProjectionRef() mst_proj = mast.GetProjectionRef() proj_diff = (tst_proj == mst_proj) if proj_diff: logger.info("Projections match.") status = True else: logger.error("Projections do not match.") logger.error("Test transform: {0}".format(tst_proj)) logger.error("Master transform: {0}".format(mst_proj)) status = False return status
def do_diff(test, mast, nodata=False): """Do image diff, break if the grids are not the same size. Args: test <numpy.ndarray>: array of test raster mast <numpy.ndarray>: array of master raster """ # If a NoData value is present, or the "--include-nodata" flag was used: if nodata is not False: test = np.ma.masked_where(test == nodata, test) mast = np.ma.masked_where(mast == nodata, mast) logger.info("Making nodata value {0} from diff calc.".format(nodata)) try: # TODO: Figure out why some bands cannot be compared correctly. diff = test.astype(np.float) - mast.astype(np.float) return diff except (ValueError, AttributeError, TypeError) as e: logger.warning("Error: {0}".format(e)) return False
def check_images(test, mast, dir_out, ext, include_nd=False): """Compare the test and master images, both for their raw contents and geographic parameters. If differences exist, produce diff plot + CSV stats file. Args: test <str>: path to test image mast <str>: path to master image dir_out <str>: path to output directory ext <str>: file extension include_nd <bool>: incl. nodata values in file cmp (default=False) """ logger.warning("Checking {0} files...".format(ext)) # clean up non-matching files test, mast = Cleanup.remove_nonmatching_files(test, mast) # make sure there are actually files to check if mast is None or test is None: logger.error("No {0} files to check in test and/or mast directories.".format(ext)) return False # do other comparison checks, return stats + plots if diffs exist for i, j in zip(test, mast): logger.info("Checking Test {0} against Master {1}".format(i, j)) # Open each raster ds_test = RasterIO.open_raster(i) ds_mast = RasterIO.open_raster(j) # Compare various raster parameters status = list() status.append(RasterCmp.compare_proj_ref(ds_test, ds_mast)) status.append(RasterCmp.compare_geo_trans(ds_test, ds_mast)) status.append(RasterCmp.extent_diff_cols(ds_test, ds_mast)) status.append(RasterCmp.extent_diff_rows(ds_test, ds_mast)) # If any above tests fail, go to next iteration if any(stat is False for stat in status): continue # Count number of sub-bands in the files d_range = Find.count(i, ds_test, j, ds_mast, ext) if d_range is None: logger.critical("Number of files different; data cannot be tested successfully.") continue # if sub-bands exist, read them one-by-one and do diffs + stats if d_range > 1: for ii in range(0, d_range): # Get the first band from each raster if ext == ".img": logger.info("Reading sub-band {0} from .img {1}...".format(ii, i)) ds_tband = RasterIO.read_band_as_array(ds_test, ii) ds_mband = RasterIO.read_band_as_array(ds_mast, ii) else: logger.info("Reading .hdf/.nc SDS {0} from file {0}...".format(ii, i)) sds_tband = RasterIO.open_raster(RasterIO.get_sds(ds_test)[ii][0]) sds_mband = RasterIO.open_raster(RasterIO.get_sds(ds_mast)[ii][0]) ds_tband, t_nd = RasterIO.read_band_as_array(sds_tband) ds_mband, m_nd = RasterIO.read_band_as_array(sds_mband) # do image differencing without masking NoData if isinstance(t_nd, type(None)) or include_nd: diff = do_diff(ds_tband, ds_mband) # do image differencing with NoData masked else: diff = do_diff(ds_tband, ds_mband, nodata=int(t_nd)) # call stats functions to write out results/plots/etc. call_stats(i, j, diff, i, dir_out, rast_num=ii) else: # else it's a singleband raster logger.info("Reading {0}...".format(i)) # read in bands as array ds_tband, t_nd = RasterIO.read_band_as_array(ds_test) ds_mband, m_nd = RasterIO.read_band_as_array(ds_mast) # do diff if isinstance(t_nd, type(None)) or include_nd: diff = do_diff(ds_tband, ds_mband) else: diff = do_diff(ds_tband, ds_mband, nodata=int(t_nd)) # call stats functions to write out results/plots/etc. call_stats(i, j, diff, i, dir_out)
def qa_data(dir_mast: str, dir_test: str, dir_out: str, archive: bool = True, xml_schema: str = None, incl_nd: bool = False) -> None: """ Function to check files and call appropriate QA module(s) :param dir_mast: Full path to the master directory :param dir_test: Full path to the test directory :param dir_out: Full path to the QA output directory :param archive: If True, will clean up existing files and extract from archives :param xml_schema: Full path to XML files, default is None :param incl_nd: If True, include NoData in comparisons :return: """ # start timing code t0 = time.time() # create output dir if it doesn't exist if not os.path.exists(dir_out): os.makedirs(dir_out) if archive: # do initial cleanup of input directories Cleanup.cleanup_files(dir_mast) Cleanup.cleanup_files(dir_test) # create output directory if it doesn't exist if not os.path.exists(dir_out): os.makedirs(dir_out) # read in .tar.gz files test_files = Find.find_files(dir_test, ".tar*") mast_files = Find.find_files(dir_mast, ".tar*") # Extract files from archive Extract.unzip_files(test_files, mast_files) # find only the deepest dirs test_dirs = sorted([r for r, d, f in os.walk(dir_test) if not d]) mast_dirs = sorted([r for r, d, f in os.walk(dir_mast) if not d]) if len(test_dirs) != len(mast_dirs): logger.critical( "Directory structure of Master differs from Test., MASTER: %s, TEST: %s", mast_dirs, test_dirs) sys.exit(1) for i in range(0, len(test_dirs)): # Find extracted files all_test = sorted(Find.find_files(test_dirs[i], ".*")) all_mast = sorted(Find.find_files(mast_dirs[i], ".*")) # Find unique file extensions exts = Find.get_ext(all_test, all_mast) for ext in exts: logger.info("Finding {0} files...".format(ext)) test_f = Find.find_files(test_dirs[i], ext) mast_f = Find.find_files(mast_dirs[i], ext) logger.info("Performing QA on {0} files located in {1}".format( ext, dir_test)) logger.info("Test files: {0}".format(test_f)) logger.info("Mast files: {0}".format(mast_f)) # remove any _hdf.img files found with .img files if ext == ".img": test_f = Cleanup.rm_files(test_f, "_hdf.img") mast_f = Cleanup.rm_files(mast_f, "_hdf.img") # if a text-based file if (ext.lower() == ".txt" or ext.lower() == ".xml" or ext.lower() == ".gtf" or ext.lower() == ".hdr" or ext.lower() == ".stats"): MetadataQA.check_text_files(test_f, mast_f, ext) # if text-based file is xml if ext.lower() == ".xml" and xml_schema: MetadataQA.check_xml_schema(test_f, xml_schema) MetadataQA.check_xml_schema(mast_f, xml_schema) # if non-geo image elif ext.lower() == ".jpg": MetadataQA.check_jpeg_files(test_f, mast_f, dir_out) # if no extension elif len(ext) == 0: continue # else, it's probably a geo-based image else: GeoImage.check_images(test_f, mast_f, dir_out, ext, include_nd=incl_nd) if archive: # Clean up files Cleanup.cleanup_files(dir_mast) Cleanup.cleanup_files(dir_test) # end timing t1 = time.time() m, s = divmod(t1 - t0, 60) h, m = divmod(m, 60) logger.warning("Total runtime: {0}h, {1}m, {2}s.".format( h, round(m, 3), round(s, 3))) logger.warning("Done.") return None
def count(fn_test, test, fn_mast, mast, ext): """Count number of bands inside file to decide how to iterate through file. Args: fn_test <str>: file name of test raster. test <osgeo.gdal.Dataset>: test raster fn_mast <str>: file name of master raster. mast <osgeo.gdal.Dataset>: master raster ext <str>: file extension of raster """ def count_bands(r_name, raster): """Count number of bands inside raster Args: r_name <str>: file name of raster raster <osgeo.gdal.Dataset>: raster """ try: from osgeo import gdal except ImportError: import gdal d_r = raster.RasterCount logger.info("Number of bands in {0}: {1}".format(r_name, d_r)) return d_r def count_sds(r_name, raster): """Count number of SDS inside raster. Args: r_name <str>: file name of raster raster <osgeo.gdal.Dataset>: raster """ try: from osgeo import gdal except ImportError: import gdal d_r = len(raster.GetSubDatasets()) logger.info("Number of SDS in {0}: {1}".format(r_name, d_r)) return d_r # count bands in each raster. if > 1, then handle differently if ext == ".img": # count_bands returns a 0 if there's <= 1 band in data d_range_test = count_bands(fn_test, test) d_range_mast = count_bands(fn_mast, mast) elif ext == ".hdf" or ext == ".nc": d_range_test = count_sds(fn_test, test) d_range_mast = count_sds(fn_mast, mast) else: d_range_test = 1 d_range_mast = 1 if d_range_test == 1: logger.info("File {0} is a singleband raster.".format(fn_test)) else: logger.info("File {0} is a multiband raster.".format(fn_test)) if d_range_mast == 1: logger.info("File {0} is a singleband raster.".format(fn_mast)) else: logger.info("File {0} is a multiband raster.".format(fn_mast)) if int(d_range_test) != int(d_range_mast): logger.critical("Number of sub-bands inside raster do not match. " "Test: {0} | Master: {1}.".format( d_range_test, d_range_mast)) d_range = None else: d_range = d_range_test return d_range