def compare_and_rm(t_names, m_names):
            """
            Compare just the file names, remove non-matches and return a list
            :param t_names: test file names
            :param m_names: master file names
            :return:
            """
            fn_diffs = sorted(
                list(set(rm_fn(t_names)).difference(set(rm_fn(m_names)))))

            if len(fn_diffs) > 0:
                logger.warning("Files to be removed: {0}".format(fn_diffs))

            if len(fn_diffs) == 0:
                return t_names

            # get only file name
            test_fn = rm_fn(t_names)

            rm = []
            for ii in test_fn:
                if ii in fn_diffs:
                    rm.append(False)
                else:
                    rm.append(True)

            logger.debug("remove boolean: {0}".format(rm))
            logger.debug("test_fn: {0}".format(test_fn))
            logger.debug("final list: {0}".format(
                list(itertools.compress(t_names, rm))))

            return list(itertools.compress(t_names, rm))
Пример #2
0
    def check_text_files(test, mast, ext):
        """Check master and test text-based files (headers, XML, etc.)
        line-by-line for differences.
        Sort all the lines to attempt to capture new entries.

        Args:
            test <str>: path to test text file
            mast <str>: path to master text file
            ext <str>: file extension (should be .txt, .xml or .gtf
        """
        logger.info("Checking {0} files...".format(ext))

        test, mast = Cleanup.remove_nonmatching_files(test, mast)

        # Do some checks to make sure files are worth testing
        if mast is None or test is None:
            logger.warning("No {0} files to check in test and/or mast "
                           "directories.".format(ext))
            return

        if len(mast) != len(test):
            logger.error("{0} file lengths differ. Master: {1} | Test:"
                         " {2}".format(ext, len(mast), len(test)))
            return

        for i, j in zip(test, mast):
            topen = open(i)
            mopen = open(j)

            # Read text line-by-line from file
            file_topen = topen.readlines()
            file_mopen = mopen.readlines()

            # Close files
            topen.close()
            mopen.close()

            # Check file names for name differences.
            # Print non-matching names in details.
            # get file names
            i_fn = i.split(os.sep)[-1]
            j_fn = j.split(os.sep)[-1]
            if i_fn != j_fn:
                logger.error(
                    "{0} file names differ. Master: {1} | Test: {2}".format(
                        ext, j, i))
                return
            else:
                logger.info("{0} file names equivalent. Master: {1} | Test: "
                            "{2}".format(ext, j, i))

            # Check open files line-by-line (sorted) for changes.
            # Print non-matching lines in details.
            txt_diffs = set(file_topen).difference(set(file_mopen))
            if len(txt_diffs) > 0:
                for k in txt_diffs:
                    logger.error("{0} changes: {1}".format(ext, k))

            else:
                logger.info("No differences between {0} and {1}.".format(i, j))
Пример #3
0
def pct_diff_raster(ds_tband: np.ndarray,
                    ds_mband: np.ndarray,
                    diff_rast: np.ndarray,
                    nodata: int = -9999) -> np.ndarray:
    """
    Calculate percent difference raster
    :param ds_tband: array of test raster
    :param ds_mband: array of master raster
    :param diff_rast: array of difference raster
    :param nodata: int representing no data value
    :return:
    """
    # get min and max of both rasters' worth of data
    mins = []
    maxs = []  # empty variable to compare both rasters' mins
    ds_tband = np.ma.masked_where(ds_tband == nodata, ds_tband)
    ds_mband = np.ma.masked_where(ds_mband == nodata, ds_mband)

    mins.append(np.min(ds_tband))
    mins.append(np.min(ds_mband))
    rmin = np.min(mins)

    maxs.append(np.max(ds_tband))
    maxs.append(np.max(ds_mband))
    rmax = np.max(maxs)

    # make a pct diff raster
    pct_diff_raster = ((np.abs(diff_rast) / np.abs(float(rmax - rmin))) * 100)

    logger.warning("Percent difference raster created.")

    return pct_diff_raster
def cancel_orders(txt_in: str, username: str, espa_env: str):
    """

    :param txt_in: The full path and filename of the input txt file containing the ESPA orders
    :param username: ESPA user name
    :param espa_env: ESPA environment
    :return:
    """

    order_list = json.loads(open(txt_in, "r").read())

    t0 = get_time()
    espa_url = get_espa_env(espa_env)
    passwd = espa_login()

    for order in order_list:
        url = espa_url + api_config.api_urls["order"] + order["orderid"]
        logger.warning('Cancelling order: %s', url)
        cancellation = {'orderid': order['orderid'], 'status': 'cancelled'}
        result = requests.put(url, auth=(username, passwd),
                              json=cancellation).json()
        logger.info('Result: %s', result)

    logger.warning('Removing cancelled file: %s', txt_in)
    os.unlink(txt_in)

    logger.info("Processing time: {}".format(get_time() - t0))
    def read_band_as_array(rast, n_bands=1):
        """Read gdal object as an array. Mask out nodata.

        Args:
           rast <osgeo.gdal.Dataset>: open raster
           n_bands <int>: number of bands in file (default=1)
        """
        # get nodata value
        r_a = rast.GetRasterBand(n_bands)
        r_nd = False
        try:
            r_nd = r_a.GetNoDataValue()
        except AttributeError:
            logger.warning(
                "Variable {0} does not have NoData value.".format(r_a))

        # read raster as array
        rast_arr = np.array(r_a.ReadAsArray())

        # mask nodata value, if it exists
        if r_nd is not False:
            rast_arr = np.ma.masked_where(rast_arr == r_nd, rast_arr)
            logger.info("NoData value: {0}".format(r_nd))
        else:
            rast_arr = r_a
            logger.info("NoData value could not be determined.")

        return (rast_arr, r_nd)
    def check_images(test, mast):
        """Read in a generic (non-geographic) image, like JPEG, and do a diff
        Return diff raster if actually different

        Args:
            test <str>: path to test image
            mast <str>: path to master image
        """
        try:
            from scipy.misc import imread

        except ImportError:
            from scipy.ndimage import imread

        # read images
        try:
            test_im = imread(test)

            mast_im = imread(mast)

        except ImportError:
            logger.warning("Likely missing Python Image Library (PIL).")

            # try Scikit Image
            from skimage.io import imread

            try:
                mast_im = imread(mast)

                test_im = imread(test)

            except (ValueError, TypeError, ImportError):
                logger.warning("Not able to open image with skimag.io. Likely missing image library.")

                return None

        # check diff
        try:
            diff_im = do_diff(test_im, mast_im)

            if len(np.nonzero(diff_im)) > 3:
                logger.error("Values differ between {0} and {1}.".format(test, mast))

                return diff_im

            else:
                logger.info("Values equivalent between {0} and {1}.".format(test, mast))

                return None

        except ValueError:
            logger.error("Image {0} and {1} are not the same dimensions.".format(test, mast))
def place_order(espa_env: str,
                username: str,
                outdir: str = None,
                order_key: str = None):
    """
    Place the order with the appropriate ESPA environment
    :param order_key: Optionally specify a keyword pointing to a specific order
    :param outdir: Optionally specify full path to the output directory, otherwise os.getcwd() is used
    :param espa_env: The name of the ESPA environment
    :param username: ESPA username
    :return:
    """

    espa_url = espa_orders_api.get_espa_env(espa_env)

    orders = load_order(order_key)

    passwd = espa_orders_api.espa_login()

    order_length = len(orders)

    response = list()

    for i, order in enumerate(orders):

        logger.info("Requesting order %d of %d", i + 1, order_length)

        r = requests.post(espa_url + api_config.api_urls["order"],
                          auth=(username, passwd),
                          json=order)

        try:
            result = r.json()
            if 'orderid' not in result:
                logger.error('Order "%s" %d/%d failed: %s', order_key, i + 1,
                             order_length, result)
                continue

            response.append(result)

        except json.decoder.JSONDecodeError:  # inherits from ValueError
            # This error seems to occur when trying to decode a None-type object
            logger.error(
                "There was likely a problem connecting with the host.  "
                "Check to see if ESPA is down for maintenance.")

    filename = order_text(outdir)
    logger.warning('Store ordering results: %s', filename)
    with open(filename, "a") as f:
        f.write(json.dumps(response, indent=4))
    def cleanup_files(indir: str):
        """
        Clean up all unpacked files, leaving alone the .tar.gz archives
        :param indir: Full path to the target directory
        :return:
        """
        print("Cleaning up files...")

        all_files = [
            os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(indir)
            for f in fnmatch.filter(files, '*')
        ]

        for f in all_files:
            if any(f.endswith(x) for x in (".tar.gz", '.tar')):
                continue

            else:
                try:
                    os.remove(f)

                except:
                    continue

        logger.warning("Cleaned up all data files.")

        # Clean up gap mask files
        gm = [
            os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(indir)
            for f in fnmatch.filter(dirnames, 'gap_mask')
        ]

        st = [
            os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(indir)
            for f in fnmatch.filter(dirnames, 'stats')
        ]

        [shutil.rmtree(i, ignore_errors=True) for i in gm]

        [shutil.rmtree(i, ignore_errors=True) for i in st]

        logger.warning("Removed all non-archive files.")

        return None
    def plot_diff_image(test,
                        mast,
                        diff_raster,
                        fn_out,
                        fn_type,
                        dir_out,
                        do_abs=False):
        """Take difference array and plot as image.

        Args:
            test <str>: name of test file
            mast <str>: name of mast file
            diff_raster <numpy.ndarray>: numpy array of values
            fn_out <str>: basename for file
            fn_type <str>: defines title of plot - "diff" or "pct_diff"
            dir_out <str>: directory where output data are being stored
        """
        import matplotlib.pyplot as plt
        import numpy as np

        # mask pixels that did not differ
        diff_raster = np.ma.masked_where(diff_raster == 0, diff_raster)

        # make output file
        im_out = dir_out + os.sep + fn_out + "_" + fn_type + ".png"

        # plot diff figure
        if do_abs:
            plt.imshow(np.abs(diff_raster), cmap='gist_gray')
            plt.colorbar(label="Abs. Difference")
        else:
            plt.imshow(diff_raster, cmap='PuOr')
            plt.colorbar(label="Difference")

        # annotate plot with file names
        plt.annotate(str(mast) + "\n" + str(test) + "\n",
                     fontsize=5,
                     xy=(0.01, 0.94),
                     xycoords='axes fraction')

        plt.title(fn_out, y=1.05)
        plt.savefig(im_out, dpi=250)
        plt.close()

        logger.warning("{0} raster written to {1}.".format(fn_type, im_out))
Пример #10
0
    def check_xml_schema(test, schema):
        """Ensure XML matches ESPA schema.
        :param test: <str> XML metadata file to compare with schema.
        :param schema: <str> Path to XML schema file.
        :return: None
        """
        # read schema
        xmlschema = etree.XMLSchema(etree.parse(schema))

        # read XML
        xmlfile = etree.parse(test)

        # do validation
        result = xmlschema.validate(xmlfile)

        if result:
            logger.warning('XML file {0} is valid with XML schema {1}.'.format(
                test, schema))

        else:
            logger.critical(
                'XML file {0} is NOT valid with XML schema {1}.'.format(
                    test, schema))
def do_diff(test, mast, nodata=False):
    """Do image diff, break if the grids are not the same size.

    Args:
        test <numpy.ndarray>: array of test raster
        mast <numpy.ndarray>: array of master raster
    """
    # If a NoData value is present, or the "--include-nodata" flag was used:
    if nodata is not False:
        test = np.ma.masked_where(test == nodata, test)
        mast = np.ma.masked_where(mast == nodata, mast)

        logger.info("Making nodata value {0} from diff calc.".format(nodata))

    try:
        # TODO: Figure out why some bands cannot be compared correctly.
        diff = test.astype(np.float) - mast.astype(np.float)

        return diff

    except (ValueError, AttributeError, TypeError) as e:
        logger.warning("Error: {0}".format(e))

        return False
def call_stats(test, mast, rast_arr, fn_out, dir_out, rast_num=0):
    """Call stats function(s) if data are valid

    Args:
        test <str>: name of test file
        mast <str>: name of master file
        rast_arr <numpy.ndarray>: array of target raster
        fn_out <str>: file path of image
        dir_out <str>: path to output directory
        rast_num <int>: individual number of image (default=0)
        nodata <int>: no data value (default=-9999)
    """
    if isinstance(rast_arr, (np.ndarray, np.ma.core.MaskedArray)):

        if np.any(rast_arr != 0):
            logger.warning("Image difference found!")

            logger.warning("Test: {0} | Master: {1}".format(test, mast))

            # find file name (for saving plot)
            fout = fn_out.split(os.sep)[-1]

            # do stats of difference
            stats.img_stats(test, mast, rast_arr, os.path.dirname(fn_out),
                            fout, dir_out, rast_num)

            # plot diff image
            ImWrite.plot_diff_image(test, mast, rast_arr, fout, "diff_" +
                                    str(rast_num), dir_out)

            # plot abs diff image
            ImWrite.plot_diff_image(test, mast, rast_arr, fout, "abs_diff_" +
                                    str(rast_num), dir_out, do_abs=True)

            # plot diff histograms
            ImWrite.plot_hist(test, mast, rast_arr, fout, "diff_" +
                              str(rast_num), dir_out)

        else:
            logger.info("Binary data match.")

    else:
        logger.warning("Target raster is not a valid numpy array or numpy "
                        "masked array. Cannot run statistics!")
Пример #13
0
    def check_jpeg_files(test: list, mast: list, dir_out: str) -> None:
        """
        Check JPEG files (i.e., Gverify or preview images) for diffs in file size or file contents.  Plot difference
        image if applicable
        :param test: List of paths to test jpg files
        :param mast: List of paths to master jpg files
        :param dir_out: Full path to output directory
        :return:
        """
        test, mast = Cleanup.remove_nonmatching_files(test, mast)
        logger.info("Checking JPEG preview/gverify files...")

        if mast is None or test is None:
            logger.error("No JPEG files to check in test and/or mast "
                         "directories.")

        else:
            for i, j in zip(test, mast):

                # Compare file sizes
                if os.path.getsize(i) != os.path.getsize(j):
                    logger.warning("JPEG file sizes do not match for "
                                   "Master {0} and Test {1}...\n".format(j, i))
                    logger.warning("{0} size: {1}".format(
                        i, os.path.getsize(i)))
                    logger.warning("{0} size: {1}".format(
                        j, os.path.getsize(j)))

                else:
                    logger.info("JPEG files {0} and {1} are the same "
                                "size".format(j, i))

                # diff images
                result = ArrayImage.check_images(i, j)

                if result:
                    ImWrite.plot_diff_image(test=i,
                                            mast=j,
                                            diff_raster=result,
                                            fn_out=i.split(os.sep)[-1],
                                            fn_type="diff_",
                                            dir_out=dir_out)
    def check_images(test, mast, dir_out, ext, include_nd=False):
        """Compare the test and master images, both for their raw contents and
        geographic parameters. If differences exist, produce diff plot + CSV
        stats file.

        Args:
            test <str>: path to test image
            mast <str>: path to master image
            dir_out <str>: path to output directory
            ext <str>: file extension
            include_nd <bool>: incl. nodata values in file cmp (default=False)
        """
        logger.warning("Checking {0} files...".format(ext))

        # clean up non-matching files
        test, mast = Cleanup.remove_nonmatching_files(test, mast)

        # make sure there are actually files to check
        if mast is None or test is None:
            logger.error("No {0} files to check in test and/or mast directories.".format(ext))

            return False

        # do other comparison checks, return stats + plots if diffs exist
        for i, j in zip(test, mast):

            logger.info("Checking Test {0} against Master {1}".format(i, j))

            # Open each raster
            ds_test = RasterIO.open_raster(i)

            ds_mast = RasterIO.open_raster(j)

            # Compare various raster parameters
            status = list()

            status.append(RasterCmp.compare_proj_ref(ds_test, ds_mast))

            status.append(RasterCmp.compare_geo_trans(ds_test, ds_mast))

            status.append(RasterCmp.extent_diff_cols(ds_test, ds_mast))

            status.append(RasterCmp.extent_diff_rows(ds_test, ds_mast))

            # If any above tests fail, go to next iteration
            if any(stat is False for stat in status):
                continue

            # Count number of sub-bands in the files
            d_range = Find.count(i, ds_test, j, ds_mast, ext)

            if d_range is None:
                logger.critical("Number of files different; data cannot be tested successfully.")

                continue

            # if sub-bands exist, read them one-by-one and do diffs + stats
            if d_range > 1:
                for ii in range(0, d_range):
                    # Get the first band from each raster
                    if ext == ".img":
                        logger.info("Reading sub-band {0} from .img {1}...".format(ii, i))

                        ds_tband = RasterIO.read_band_as_array(ds_test, ii)

                        ds_mband = RasterIO.read_band_as_array(ds_mast, ii)

                    else:
                        logger.info("Reading .hdf/.nc SDS {0} from file {0}...".format(ii, i))

                        sds_tband = RasterIO.open_raster(RasterIO.get_sds(ds_test)[ii][0])

                        sds_mband = RasterIO.open_raster(RasterIO.get_sds(ds_mast)[ii][0])

                        ds_tband, t_nd = RasterIO.read_band_as_array(sds_tband)

                        ds_mband, m_nd = RasterIO.read_band_as_array(sds_mband)

                    # do image differencing without masking NoData
                    if isinstance(t_nd, type(None)) or include_nd:
                        diff = do_diff(ds_tband, ds_mband)

                    # do image differencing with NoData masked
                    else:
                        diff = do_diff(ds_tband, ds_mband, nodata=int(t_nd))

                    # call stats functions to write out results/plots/etc.
                    call_stats(i, j, diff, i, dir_out, rast_num=ii)

            else:  # else it's a singleband raster
                logger.info("Reading {0}...".format(i))

                # read in bands as array
                ds_tband, t_nd = RasterIO.read_band_as_array(ds_test)

                ds_mband, m_nd = RasterIO.read_band_as_array(ds_mast)

                # do diff
                if isinstance(t_nd, type(None)) or include_nd:
                    diff = do_diff(ds_tband, ds_mband)

                else:
                    diff = do_diff(ds_tband, ds_mband, nodata=int(t_nd))

                # call stats functions to write out results/plots/etc.
                call_stats(i, j, diff, i, dir_out)
Пример #15
0
def qa_data(dir_mast: str,
            dir_test: str,
            dir_out: str,
            archive: bool = True,
            xml_schema: str = None,
            incl_nd: bool = False) -> None:
    """
    Function to check files and call appropriate QA module(s)
    :param dir_mast: Full path to the master directory
    :param dir_test: Full path to the test directory
    :param dir_out: Full path to the QA output directory
    :param archive: If True, will clean up existing files and extract from archives
    :param xml_schema: Full path to XML files, default is None
    :param incl_nd: If True, include NoData in comparisons
    :return:
    """
    # start timing code
    t0 = time.time()

    # create output dir if it doesn't exist
    if not os.path.exists(dir_out):
        os.makedirs(dir_out)

    if archive:
        # do initial cleanup of input directories
        Cleanup.cleanup_files(dir_mast)

        Cleanup.cleanup_files(dir_test)

        # create output directory if it doesn't exist
        if not os.path.exists(dir_out):
            os.makedirs(dir_out)

        # read in .tar.gz files
        test_files = Find.find_files(dir_test, ".tar*")

        mast_files = Find.find_files(dir_mast, ".tar*")

        # Extract files from archive
        Extract.unzip_files(test_files, mast_files)

    # find only the deepest dirs
    test_dirs = sorted([r for r, d, f in os.walk(dir_test) if not d])

    mast_dirs = sorted([r for r, d, f in os.walk(dir_mast) if not d])

    if len(test_dirs) != len(mast_dirs):
        logger.critical(
            "Directory structure of Master differs from Test., MASTER: %s, TEST: %s",
            mast_dirs, test_dirs)

        sys.exit(1)

    for i in range(0, len(test_dirs)):
        # Find extracted files
        all_test = sorted(Find.find_files(test_dirs[i], ".*"))

        all_mast = sorted(Find.find_files(mast_dirs[i], ".*"))

        # Find unique file extensions
        exts = Find.get_ext(all_test, all_mast)

        for ext in exts:
            logger.info("Finding {0} files...".format(ext))

            test_f = Find.find_files(test_dirs[i], ext)

            mast_f = Find.find_files(mast_dirs[i], ext)

            logger.info("Performing QA on {0} files located in {1}".format(
                ext, dir_test))

            logger.info("Test files: {0}".format(test_f))

            logger.info("Mast files: {0}".format(mast_f))

            # remove any _hdf.img files found with .img files
            if ext == ".img":
                test_f = Cleanup.rm_files(test_f, "_hdf.img")

                mast_f = Cleanup.rm_files(mast_f, "_hdf.img")

            # if a text-based file
            if (ext.lower() == ".txt" or ext.lower() == ".xml"
                    or ext.lower() == ".gtf" or ext.lower() == ".hdr"
                    or ext.lower() == ".stats"):

                MetadataQA.check_text_files(test_f, mast_f, ext)

                # if text-based file is xml
                if ext.lower() == ".xml" and xml_schema:
                    MetadataQA.check_xml_schema(test_f, xml_schema)

                    MetadataQA.check_xml_schema(mast_f, xml_schema)

            # if non-geo image
            elif ext.lower() == ".jpg":
                MetadataQA.check_jpeg_files(test_f, mast_f, dir_out)

            # if no extension
            elif len(ext) == 0:
                continue

            # else, it's probably a geo-based image
            else:
                GeoImage.check_images(test_f,
                                      mast_f,
                                      dir_out,
                                      ext,
                                      include_nd=incl_nd)

    if archive:
        # Clean up files
        Cleanup.cleanup_files(dir_mast)

        Cleanup.cleanup_files(dir_test)

    # end timing
    t1 = time.time()

    m, s = divmod(t1 - t0, 60)

    h, m = divmod(m, 60)

    logger.warning("Total runtime: {0}h, {1}m, {2}s.".format(
        h, round(m, 3), round(s, 3)))

    logger.warning("Done.")

    return None
    def plot_hist(test,
                  mast,
                  diff_raster,
                  fn_out,
                  fn_type,
                  dir_out,
                  bins=False):
        """Take difference array and plot as histogram.

        Args:
            test <str>: name of test file
            mast <str>: name of master file
            diff_raster <numpy.ndarray>: numpy array of values
            fn_out <str>: basename for file
            fn_type <str>: defines title of plot - "diff" or "pct_diff"
            dir_out <str>: directory where output data are being stored
            bins <int>: number of bins for histogram (default=255)
        """
        import matplotlib.pyplot as plt
        import numpy as np

        def bin_size(rast):
            """Determine bin size based upon data type.

            Args:
                rast <numpy.ndarray>: numpy array of values
            """
            dt = rast.dtype

            if '64' or '32' in dt.name:
                return 2000
            elif '16' in dt.name:
                return 1000
            elif '8' in dt.name:
                return 256
            else:
                return 50

        # mask pixels that did not differ
        diff_raster = np.ma.masked_where(diff_raster == 0, diff_raster)

        # make output file
        im_out = dir_out + os.sep + fn_out + "_" + fn_type + "_hist.png"

        # get array of values that are actually different
        diff_valid = diff_raster.compressed()

        # determine bin size
        if not bins:
            bins = bin_size(diff_raster)

        # do histogram
        try:
            plt.hist(diff_valid, bins)
        except AttributeError:
            logger.warning("Difference values from diff_valid variable could"
                           " not be plotted.")
            return

        # do basic stats
        diff_mean = np.mean(diff_raster)
        diff_sd = np.std(diff_raster)
        diff_abs_mean = np.mean(np.abs(diff_raster))
        diff_pix = len(diff_valid)
        diff_pct = (np.float(diff_pix) / np.product(np.shape(diff_raster))) \
                   * 100.0

        # annotate plot with file names
        plt.annotate(str(mast) + "\n" + str(test) + "\n",
                     fontsize=5,
                     xy=(0.01, 0.94),
                     xycoords='axes fraction')

        # annotate plot with basic stats
        plt.annotate("mean diff: " + str(round(diff_mean, 3)) + "\n" +
                     "std. dev.: " + str(round(diff_sd, 3)) + "\n" +
                     "abs. mean diff: " + str(round(diff_abs_mean, 3)) + "\n" +
                     "# diff pixels: " + str(diff_pix) + "\n" + "% diff: " +
                     str(round(diff_pct, 3)) + "\n" + "# bins: " + str(bins) +
                     "\n",
                     xy=(0.68, 0.72),
                     xycoords='axes fraction')

        # write figure out to PNG
        plt.savefig(im_out, bbox_inches="tight", dpi=350)

        plt.close()

        logger.warning("Difference histogram written to {0}.".format(im_out))