def raster_vrt_stitch(inrasters, outraster, epsg, clip=None, clean=False, warp_options: dict = {}): """[summary] https://gdal.org/python/osgeo.gdal-module.html#BuildVRT Keyword arguments are : options --- can be be an array of strings, a string or let empty and filled from other keywords.. resolution --- 'highest', 'lowest', 'average', 'user'. outputBounds --- output bounds as (minX, minY, maxX, maxY) in target SRS. xRes, yRes --- output resolution in target SRS. targetAlignedPixels --- whether to force output bounds to be multiple of output resolution. separate --- whether each source file goes into a separate stacked band in the VRT band. bandList --- array of band numbers (index start at 1). addAlpha --- whether to add an alpha mask band to the VRT when the source raster have none. resampleAlg --- resampling mode. near: nearest neighbour resampling (default, fastest algorithm, worst interpolation quality). bilinear: bilinear resampling. cubic: cubic resampling. cubicspline: cubic spline resampling. lanczos: Lanczos windowed sinc resampling. average: average resampling, computes the average of all non-NODATA contributing pixels. mode: mode resampling, selects the value which appears most often of all the sampled points. max: maximum resampling, selects the maximum value from all non-NODATA contributing pixels. min: minimum resampling, selects the minimum value from all non-NODATA contributing pixels. med: median resampling, selects the median value of all non-NODATA contributing pixels. q1: first quartile resampling, selects the first quartile value of all non-NODATA contributing pixels. q3: third quartile resampling, selects the third quartile value of all non-NODATA contributing pixels. outputSRS --- assigned output SRS. allowProjectionDifference --- whether to accept input datasets have not the same projection. Note: they will *not* be reprojected. srcNodata --- source nodata value(s). VRTNodata --- nodata values at the VRT band level. hideNodata --- whether to make the VRT band not report the NoData value. callback --- callback method. callback_data --- user data for callback. """ log = Logger('Raster Stitch') # Build a virtual dataset that points to all the rasters then mosaic them together # clipping out the HUC boundary and reprojecting to the output spatial reference path_vrt = get_unique_file_path( os.path.dirname(outraster), os.path.basename(outraster).split('.')[0] + '.vrt') log.info('Building temporary vrt: {}'.format(path_vrt)) vrt_options = gdal.BuildVRTOptions() gdal.BuildVRT(path_vrt, inrasters, options=vrt_options) raster_warp(path_vrt, outraster, epsg, clip, warp_options) if clean: for rpath in inrasters: safe_remove_file(rpath)
def process_lst(lst_xml_folder): """This is a slightly hack-y script to create some XMLS for the land_surface_temp script It's a bit of an afterthought so it just plunks down the XMLS all alone in a folder Args: lst_xml_folder ([type]): [description] """ log = Logger("Generate XMLS for LST") hucs = [str(1700 + x) for x in range(1, 13)] for huc in hucs: hucdir = os.path.join(lst_xml_folder, huc) xml_file = os.path.join(hucdir, 'project.rs.xml') safe_makedirs(hucdir) if os.path.exists(xml_file): safe_remove_file(xml_file) project_name = f'Land Surface Temperature for HUC {huc}' project = RSProject(cfg, xml_file) project.create(project_name, 'LST') project.add_metadata({ 'ModelVersion': cfg.version, 'HUC': huc, 'dateCreated': datetime.datetime.now().isoformat(), 'HUC{}'.format(len(huc)): huc }) realizations = project.XMLBuilder.add_sub_element( project.XMLBuilder.root, 'Realizations') realization = project.XMLBuilder.add_sub_element( realizations, 'LST', None, { 'id': 'LST1', 'dateCreated': datetime.datetime.now().isoformat(), 'guid': str(uuid.uuid4()), 'productVersion': cfg.version }) project.XMLBuilder.add_sub_element(realization, 'Name', project_name) output_node = project.XMLBuilder.add_sub_element( realization, 'Outputs') zipfile_node = project.add_dataset(output_node, f'{huc}.zip', RSLayer(f'LST Result for {huc}', 'LST_ZIP', 'ZipFile', '1706.zip'), 'ZipFile', replace=True, rel_path=True) project.XMLBuilder.write() log.info('done')
def pending_check(file_path_pending, timeout): """returns true if we're still pending Args: file_path_pending ([type]): path to pending file file_path_pending ([itn]): in seconds """ pending_exists = os.path.isfile(file_path_pending) if not pending_exists: return False pf_stats = os.stat(file_path_pending) # If the pending file is older than the timeout # then we need to delete the file and keep going if time.time() - pf_stats.st_mtime > timeout: safe_remove_file(file_path_pending) return False else: return True
def delete_dataset(self, filepath: str, driver: ogr.Driver): """Delete a dataset and remove that entry from the registry """ # If this dataset is known to the registry then we need to handle it if filepath in self._registry: if len(self._registry[filepath].layers) > 1: raise DatasetRegistryException( 'Cannot delete dataset when there are > 1 layers accessing it. {}' .format(self._registry[filepath].layers)) # Unload the DS if self._registry[filepath].ds is not None: self._registry[filepath].ds.Destroy() # Clean up the registry entry del self._registry[filepath] # Delete the Dataset err = driver.DeleteDataSource(filepath) # If this is a tempfile there's a possibility of failure. # In that case just remove the file normally (or try anyway) if err == ogr.OGRERR_FAILURE: safe_remove_file(filepath)
def download_file(s3_url, download_folder, force_download=False): """ Download a file given a HTTPS URL that points to a file on S3 :param s3_url: HTTPS URL for a file on S3 :param download_folder: Folder where the file will be downloaded. :param force_download: :return: Local file path where the file was downloaded """ log = Logger('Download') safe_makedirs(download_folder) # Retrieve the S3 bucket and path from the HTTPS URL result = re.match(r'https://([^.]+)[^/]+/(.*)', s3_url) # If file already exists and forcing download then ensure unique file name file_path = os.path.join(download_folder, os.path.basename(result.group(2))) file_path_pending = os.path.join(download_folder, os.path.basename(result.group(2)) + '.pending') if os.path.isfile(file_path) and force_download: safe_remove_file(file_path) # If there is a pending path and the pending path is fairly new # then wait for it. while pending_check(file_path_pending, PENDING_TIMEOUT): log.debug('Waiting for .pending file. Another process is working on this.') time.sleep(30) log.info('Waiting done. Proceeding.') # Skip the download if the file exists if os.path.isfile(file_path) and os.path.getsize(file_path) > 0: log.info('Skipping download because file exists.') else: _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download") # Write our pending file. No matter what we must clean this file up!!! def refresh_pending(init=False): with open(file_path_pending, 'w') as f: f.write(str(datetime.datetime.now())) # Cleaning up the commone areas is really important def download_cleanup(): os.close(_file) safe_remove_file(tmpfilepath) safe_remove_file(file_path_pending) refresh_pending() pending_timer = Timer() log.info('Downloading {}'.format(s3_url)) # Actual file download for download_retries in range(MAX_ATTEMPTS): if download_retries > 0: log.warning('Download file retry: {}'.format(download_retries)) try: dl = 0 _file, tmpfilepath = tempfile.mkstemp(suffix=".temp", prefix="rstools_download") with requests.get(s3_url, stream=True) as r: r.raise_for_status() byte_total = int(r.headers.get('content-length')) progbar = ProgressBar(byte_total, 50, s3_url, byteFormat=True) # Binary write to file with open(tmpfilepath, 'wb') as tempf: for chunk in r.iter_content(chunk_size=8192): # Periodically refreshing our .pending file # so other processes will be aware we are still working on it. if pending_timer.ellapsed() > 10: refresh_pending() if chunk: # filter out keep-alive new chunks dl += len(chunk) tempf.write(chunk) progbar.update(dl) # Close the temporary file. It will be removed if (not os.path.isfile(tmpfilepath)): raise Exception('Error writing to temporary file: {}'.format(tmpfilepath)) progbar.finish() break except Exception as e: log.debug('Error downloading file from s3 {}: \n{}'.format(s3_url, str(e))) # if this is our last chance then the function must fail [0,1,2] if download_retries == MAX_ATTEMPTS - 1: download_cleanup() # Always clean up raise e # Now copy the temporary file (retry 3 times) for copy_retries in range(MAX_ATTEMPTS): if copy_retries > 0: log.warning('Copy file retry: {}'.format(copy_retries)) try: shutil.copy(tmpfilepath, file_path) # Make sure to clean up so the next process doesn't encounter a broken file if not file_compare(file_path, tmpfilepath): raise Exception('Error copying temporary download to final path') break except Exception as e: log.debug('Error copying file from temporary location {}: \n{}'.format(tmpfilepath, str(e))) # if this is our last chance then the function must fail [0,1,2] if copy_retries == MAX_ATTEMPTS - 1: download_cleanup() # Always clean up raise e download_cleanup() # Always clean up return file_path
def unzip(file_path, destination_folder, force_overwrite=False, retries=3): """[summary] Args: file_path: Full path to an existing zip archive destination_folder: Path where the zip archive will be unzipped force_overwrite (bool, optional): Force overwrite of a file if it's already there. Defaults to False. retries (int, optional): Number of retries on a single file. Defaults to 3. Raises: Exception: [description] Exception: [description] Exception: [description] """ log = Logger('Unzipper') if not os.path.isfile(file_path): raise Exception('Unzip error: file not found: {}'.format(file_path)) try: log.info('Attempting unzip: {} ==> {}'.format(file_path, destination_folder)) zip_ref = zipfile.ZipFile(file_path, 'r') # only unzip files we don't already have safe_makedirs(destination_folder) log.info('Extracting: {}'.format(file_path)) # Only unzip things we haven't already unzipped for fitem in zip_ref.filelist: uz_success = False uz_retry = 0 while not uz_success and uz_retry < retries: try: outfile = os.path.join(destination_folder, fitem.filename) if fitem.is_dir(): if not os.path.isdir(outfile): zip_ref.extract(fitem, destination_folder) log.debug(' (creating) {}'.format(fitem.filename)) else: log.debug(' (skipping) {}'.format(fitem.filename)) else: if force_overwrite or (fitem.file_size > 0 and not os.path.isfile(outfile)) or (os.path.getsize(outfile) / fitem.file_size) < 0.99999: log.debug(' (unzipping) {}'.format(fitem.filename)) zip_ref.extract(fitem, destination_folder) else: log.debug(' (skipping) {}'.format(fitem.filename)) uz_success = True except Exception as e: log.debug(e) log.warning('unzipping file failed. waiting 3 seconds and retrying...') time.sleep(3) uz_retry += 1 if (not uz_success): raise Exception('Unzipping of file {} failed after {} attempts'.format(fitem.filename, retries)) zip_ref.close() log.info('Done') except zipfile.BadZipFile as e: # If the zip file is bad then we have to remove it. log.error('BadZipFile. Cleaning up zip file and output folder') safe_remove_file(file_path) safe_remove_dir(destination_folder) raise Exception('Unzip error: BadZipFile') except Exception as e: log.error('Error unzipping. Cleaning up output folder') safe_remove_dir(destination_folder) raise Exception('Unzip error: file could not be unzipped')
def download_cleanup(): os.close(_file) safe_remove_file(tmpfilepath) safe_remove_file(file_path_pending)