Exemplo n.º 1
0
def raster_vrt_stitch(inrasters,
                      outraster,
                      epsg,
                      clip=None,
                      clean=False,
                      warp_options: dict = {}):
    """[summary]
    https://gdal.org/python/osgeo.gdal-module.html#BuildVRT
    Keyword arguments are :
        options --- can be be an array of strings, a string or let empty and filled from other keywords..
        resolution --- 'highest', 'lowest', 'average', 'user'.
        outputBounds --- output bounds as (minX, minY, maxX, maxY) in target SRS.
        xRes, yRes --- output resolution in target SRS.
        targetAlignedPixels --- whether to force output bounds to be multiple of output resolution.
        separate --- whether each source file goes into a separate stacked band in the VRT band.
        bandList --- array of band numbers (index start at 1).
        addAlpha --- whether to add an alpha mask band to the VRT when the source raster have none.
        resampleAlg --- resampling mode.
            near: nearest neighbour resampling (default, fastest algorithm, worst interpolation quality).
            bilinear: bilinear resampling.
            cubic: cubic resampling.
            cubicspline: cubic spline resampling.
            lanczos: Lanczos windowed sinc resampling.
            average: average resampling, computes the average of all non-NODATA contributing pixels.
            mode: mode resampling, selects the value which appears most often of all the sampled points.
            max: maximum resampling, selects the maximum value from all non-NODATA contributing pixels.
            min: minimum resampling, selects the minimum value from all non-NODATA contributing pixels.
            med: median resampling, selects the median value of all non-NODATA contributing pixels.
            q1: first quartile resampling, selects the first quartile value of all non-NODATA contributing pixels.
            q3: third quartile resampling, selects the third quartile value of all non-NODATA contributing pixels.
        outputSRS --- assigned output SRS.
        allowProjectionDifference --- whether to accept input datasets have not the same projection. Note: they will *not* be reprojected.
        srcNodata --- source nodata value(s).
        VRTNodata --- nodata values at the VRT band level.
        hideNodata --- whether to make the VRT band not report the NoData value.
        callback --- callback method.
        callback_data --- user data for callback.
    """
    log = Logger('Raster Stitch')

    # Build a virtual dataset that points to all the rasters then mosaic them together
    # clipping out the HUC boundary and reprojecting to the output spatial reference
    path_vrt = get_unique_file_path(
        os.path.dirname(outraster),
        os.path.basename(outraster).split('.')[0] + '.vrt')

    log.info('Building temporary vrt: {}'.format(path_vrt))
    vrt_options = gdal.BuildVRTOptions()
    gdal.BuildVRT(path_vrt, inrasters, options=vrt_options)

    raster_warp(path_vrt, outraster, epsg, clip, warp_options)

    if clean:
        for rpath in inrasters:
            safe_remove_file(rpath)
Exemplo n.º 2
0
def process_lst(lst_xml_folder):
    """This is a slightly hack-y script to create some XMLS for the land_surface_temp script
        It's a bit of an afterthought so it just plunks down the XMLS all alone in a folder

    Args:
        lst_xml_folder ([type]): [description]
    """

    log = Logger("Generate XMLS for LST")
    hucs = [str(1700 + x) for x in range(1, 13)]

    for huc in hucs:
        hucdir = os.path.join(lst_xml_folder, huc)
        xml_file = os.path.join(hucdir, 'project.rs.xml')
        safe_makedirs(hucdir)
        if os.path.exists(xml_file):
            safe_remove_file(xml_file)

        project_name = f'Land Surface Temperature for HUC {huc}'
        project = RSProject(cfg, xml_file)
        project.create(project_name, 'LST')

        project.add_metadata({
            'ModelVersion': cfg.version,
            'HUC': huc,
            'dateCreated': datetime.datetime.now().isoformat(),
            'HUC{}'.format(len(huc)): huc
        })

        realizations = project.XMLBuilder.add_sub_element(
            project.XMLBuilder.root, 'Realizations')
        realization = project.XMLBuilder.add_sub_element(
            realizations, 'LST', None, {
                'id': 'LST1',
                'dateCreated': datetime.datetime.now().isoformat(),
                'guid': str(uuid.uuid4()),
                'productVersion': cfg.version
            })
        project.XMLBuilder.add_sub_element(realization, 'Name', project_name)

        output_node = project.XMLBuilder.add_sub_element(
            realization, 'Outputs')
        zipfile_node = project.add_dataset(output_node,
                                           f'{huc}.zip',
                                           RSLayer(f'LST Result for {huc}',
                                                   'LST_ZIP', 'ZipFile',
                                                   '1706.zip'),
                                           'ZipFile',
                                           replace=True,
                                           rel_path=True)

        project.XMLBuilder.write()
    log.info('done')
Exemplo n.º 3
0
def pending_check(file_path_pending, timeout):
    """returns true if we're still pending

    Args:
        file_path_pending ([type]): path to pending file
        file_path_pending ([itn]): in seconds
    """
    pending_exists = os.path.isfile(file_path_pending)
    if not pending_exists:
        return False

    pf_stats = os.stat(file_path_pending)
    # If the pending file is older than the timeout
    # then we need to delete the file and keep going
    if time.time() - pf_stats.st_mtime > timeout:
        safe_remove_file(file_path_pending)
        return False
    else:
        return True
    def delete_dataset(self, filepath: str, driver: ogr.Driver):
        """Delete a dataset and remove that entry from the registry
        """
        # If this dataset is known to the registry then we need to handle it
        if filepath in self._registry:
            if len(self._registry[filepath].layers) > 1:
                raise DatasetRegistryException(
                    'Cannot delete dataset when there are > 1 layers accessing it. {}'
                    .format(self._registry[filepath].layers))

            # Unload the DS
            if self._registry[filepath].ds is not None:
                self._registry[filepath].ds.Destroy()
                # Clean up the registry entry
                del self._registry[filepath]

        # Delete the Dataset
        err = driver.DeleteDataSource(filepath)

        # If this is a tempfile there's a possibility of failure.
        # In that case just remove the file normally (or try anyway)
        if err == ogr.OGRERR_FAILURE:
            safe_remove_file(filepath)
Exemplo n.º 5
0
def download_file(s3_url, download_folder, force_download=False):
    """
    Download a file given a HTTPS URL that points to a file on S3
    :param s3_url: HTTPS URL for a file on S3
    :param download_folder: Folder where the file will be downloaded.
    :param force_download:
    :return: Local file path where the file was downloaded
    """

    log = Logger('Download')

    safe_makedirs(download_folder)

    # Retrieve the S3 bucket and path from the HTTPS URL
    result = re.match(r'https://([^.]+)[^/]+/(.*)', s3_url)

    # If file already exists and forcing download then ensure unique file name
    file_path = os.path.join(download_folder, os.path.basename(result.group(2)))
    file_path_pending = os.path.join(download_folder, os.path.basename(result.group(2)) + '.pending')

    if os.path.isfile(file_path) and force_download:
        safe_remove_file(file_path)

    # If there is a pending path  and the pending path is fairly new
    # then wait for it.
    while pending_check(file_path_pending, PENDING_TIMEOUT):
        log.debug('Waiting for .pending file. Another process is working on this.')
        time.sleep(30)
    log.info('Waiting done. Proceeding.')

    # Skip the download if the file exists
    if os.path.isfile(file_path) and os.path.getsize(file_path) > 0:
        log.info('Skipping download because file exists.')
    else:
        _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download")

        # Write our pending file. No matter what we must clean this file up!!!
        def refresh_pending(init=False):
            with open(file_path_pending, 'w') as f:
                f.write(str(datetime.datetime.now()))

        # Cleaning up the commone areas is really important
        def download_cleanup():
            os.close(_file)
            safe_remove_file(tmpfilepath)
            safe_remove_file(file_path_pending)

        refresh_pending()

        pending_timer = Timer()
        log.info('Downloading {}'.format(s3_url))

        # Actual file download
        for download_retries in range(MAX_ATTEMPTS):
            if download_retries > 0:
                log.warning('Download file retry: {}'.format(download_retries))
            try:
                dl = 0
                _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download")
                with requests.get(s3_url, stream=True) as r:
                    r.raise_for_status()
                    byte_total = int(r.headers.get('content-length'))
                    progbar = ProgressBar(byte_total, 50, s3_url, byteFormat=True)

                    # Binary write to file
                    with open(tmpfilepath, 'wb') as tempf:
                        for chunk in r.iter_content(chunk_size=8192):
                            # Periodically refreshing our .pending file
                            # so other processes will be aware we are still working on it.
                            if pending_timer.ellapsed() > 10:
                                refresh_pending()
                            if chunk:  # filter out keep-alive new chunks
                                dl += len(chunk)
                                tempf.write(chunk)
                                progbar.update(dl)
                    # Close the temporary file. It will be removed
                    if (not os.path.isfile(tmpfilepath)):
                        raise Exception('Error writing to temporary file: {}'.format(tmpfilepath))

                progbar.finish()
                break
            except Exception as e:
                log.debug('Error downloading file from s3 {}: \n{}'.format(s3_url, str(e)))
                # if this is our last chance then the function must fail [0,1,2]
                if download_retries == MAX_ATTEMPTS - 1:
                    download_cleanup()  # Always clean up
                    raise e

        # Now copy the temporary file (retry 3 times)
        for copy_retries in range(MAX_ATTEMPTS):
            if copy_retries > 0:
                log.warning('Copy file retry: {}'.format(copy_retries))
            try:
                shutil.copy(tmpfilepath, file_path)
                # Make sure to clean up so the next process doesn't encounter a broken file
                if not file_compare(file_path, tmpfilepath):
                    raise Exception('Error copying temporary download to final path')
                break

            except Exception as e:
                log.debug('Error copying file from temporary location {}: \n{}'.format(tmpfilepath, str(e)))
                # if this is our last chance then the function must fail [0,1,2]
                if copy_retries == MAX_ATTEMPTS - 1:
                    download_cleanup()  # Always clean up
                    raise e

        download_cleanup()  # Always clean up

    return file_path
Exemplo n.º 6
0
def unzip(file_path, destination_folder, force_overwrite=False, retries=3):
    """[summary]

    Args:
        file_path: Full path to an existing zip archive
        destination_folder: Path where the zip archive will be unzipped
        force_overwrite (bool, optional): Force overwrite of a file if it's already there. Defaults to False.
        retries (int, optional): Number of retries on a single file. Defaults to 3.

    Raises:
        Exception: [description]
        Exception: [description]
        Exception: [description]
    """
    log = Logger('Unzipper')

    if not os.path.isfile(file_path):
        raise Exception('Unzip error: file not found: {}'.format(file_path))

    try:
        log.info('Attempting unzip: {} ==> {}'.format(file_path, destination_folder))
        zip_ref = zipfile.ZipFile(file_path, 'r')

        # only unzip files we don't already have
        safe_makedirs(destination_folder)

        log.info('Extracting: {}'.format(file_path))

        # Only unzip things we haven't already unzipped
        for fitem in zip_ref.filelist:
            uz_success = False
            uz_retry = 0
            while not uz_success and uz_retry < retries:
                try:
                    outfile = os.path.join(destination_folder, fitem.filename)
                    if fitem.is_dir():
                        if not os.path.isdir(outfile):
                            zip_ref.extract(fitem, destination_folder)
                            log.debug('   (creating)  {}'.format(fitem.filename))
                        else:
                            log.debug('   (skipping)  {}'.format(fitem.filename))
                    else:
                        if force_overwrite or (fitem.file_size > 0 and not os.path.isfile(outfile)) or (os.path.getsize(outfile) / fitem.file_size) < 0.99999:
                            log.debug('   (unzipping) {}'.format(fitem.filename))
                            zip_ref.extract(fitem, destination_folder)
                        else:
                            log.debug('   (skipping)  {}'.format(fitem.filename))

                    uz_success = True
                except Exception as e:
                    log.debug(e)
                    log.warning('unzipping file failed. waiting 3 seconds and retrying...')
                    time.sleep(3)
                    uz_retry += 1

            if (not uz_success):
                raise Exception('Unzipping of file {} failed after {} attempts'.format(fitem.filename, retries))

        zip_ref.close()
        log.info('Done')

    except zipfile.BadZipFile as e:
        # If the zip file is bad then we have to remove it.
        log.error('BadZipFile. Cleaning up zip file and output folder')
        safe_remove_file(file_path)
        safe_remove_dir(destination_folder)
        raise Exception('Unzip error: BadZipFile')
    except Exception as e:
        log.error('Error unzipping. Cleaning up output folder')
        safe_remove_dir(destination_folder)
        raise Exception('Unzip error: file could not be unzipped')
Exemplo n.º 7
0
 def download_cleanup():
     os.close(_file)
     safe_remove_file(tmpfilepath)
     safe_remove_file(file_path_pending)