def preprocess_dataset(self, dataset_list): """Performs pre-processing on the dataset_list object. dataset_list: list of datasets to be opened and have its metadata read. """ temp_dir = self.collection.get_temp_tile_directory() vrt_list = [] for dataset_path in dataset_list: fname = os.path.splitext(basename(dataset_path))[0] dataset_dir = os.path.split(dataset_path)[0] mod09_fname = temp_dir + '/' + fname + '.vrt' rbq500_fname = temp_dir + '/' + fname + '_RBQ500.vrt' dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = 'gdalbuildvrt -separate -overwrite ' command_string += mod09_fname command_string += ' ' + subDataSets[1][0] # band 1 command_string += ' ' + subDataSets[2][0] # band 2 command_string += ' ' + subDataSets[3][0] # band 3 command_string += ' ' + subDataSets[4][0] # band 4 command_string += ' ' + subDataSets[5][0] # band 5 command_string += ' ' + subDataSets[6][0] # band 6 command_string += ' ' + subDataSets[7][0] # band 7 result = execute(command_string=command_string) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt on bands: ' + '"%s" failed: %s'\ % (buildvrt_cmd, result['stderr'])) vrt_list.append(mod09_fname) command_string = 'gdalbuildvrt -separate -overwrite ' command_string += rbq500_fname command_string += ' ' + subDataSets[0][0] # 500m PQA result = execute(command_string=command_string) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt on rbq: ' + '"%s" failed: %s'\ % (buildvrt_cmd, result['stderr'])) vrt_list.append(rbq500_fname) return vrt_list
def preprocess_dataset(self, dataset_list): """Performs pre-processing on the dataset_list object. dataset_list: list of datasets to be opened and have its metadata read. """ temp_dir = self.collection.get_temp_tile_directory() vrt_list = [] for dataset_path in dataset_list: fname = os.path.splitext(basename(dataset_path))[0] dataset_dir = os.path.split(dataset_path)[0] mod09_fname = temp_dir + "/" + fname + ".vrt" rbq500_fname = temp_dir + "/" + fname + "_RBQ500.vrt" dataset = gdal.Open(dataset_path, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = "gdalbuildvrt -separate -overwrite " command_string += mod09_fname command_string += " " + subDataSets[1][0] # band 1 command_string += " " + subDataSets[2][0] # band 2 command_string += " " + subDataSets[3][0] # band 3 command_string += " " + subDataSets[4][0] # band 4 command_string += " " + subDataSets[5][0] # band 5 command_string += " " + subDataSets[6][0] # band 6 command_string += " " + subDataSets[7][0] # band 7 result = execute(command_string=command_string) if result["returncode"] != 0: raise DatasetError( "Unable to perform gdalbuildvrt on bands: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"]) ) vrt_list.append(mod09_fname) command_string = "gdalbuildvrt -separate -overwrite " command_string += rbq500_fname command_string += " " + subDataSets[0][0] # 500m PQA result = execute(command_string=command_string) if result["returncode"] != 0: raise DatasetError( "Unable to perform gdalbuildvrt on rbq: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"]) ) vrt_list.append(rbq500_fname) return vrt_list
def __make_mosaic_vrt(tile_record_list, mosaic_path): """From two or more source tiles create a vrt""" LOGGER.info('Creating mosaic VRT file %s', mosaic_path) source_file_list = [tr['tile_pathname'] for tr in tile_record_list] gdalbuildvrt_cmd = ["gdalbuildvrt", "-q", "-overwrite", "%s" % mosaic_path ] gdalbuildvrt_cmd.extend(source_file_list) result = execute(gdalbuildvrt_cmd, shell=False) if result['stdout']: log_multiline(LOGGER.info, result['stdout'], 'stdout from %s' % gdalbuildvrt_cmd, '\t') if result['stderr']: log_multiline(LOGGER.debug, result['stderr'], 'stderr from %s' % gdalbuildvrt_cmd, '\t') if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (gdalbuildvrt_cmd, result['stderr']))
def buildvrt(self, temp_dir): """Given a dataset_record and corresponding dataset, build the vrt that will be used to reproject the dataset's data to tile coordinates""" # Make the list of filenames from the dataset_path/scene01 and each # file_number's file_pattern. Also get list of nodata_value. self.source_file_list, self.nodata_list = self.list_source_files() nodata_value = self.nodata_list[0] # TODO: check that this works for PQA where nodata_value is None if nodata_value is not None: nodata_spec = ["-srcnodata", "%d" % nodata_value, "-vrtnodata", "%d" % (nodata_value)] else: nodata_spec = [] # Form the vrt_band_stack_filename. # This is done using # args = shlex.split(command_line) # where command_line is the buildvrt command create_directory(temp_dir) self.vrt_name = self.get_vrt_name(temp_dir) # Build the vrt buildvrt_cmd = ["gdalbuildvrt", "-separate", "-q"] buildvrt_cmd.extend(nodata_spec) buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name]) buildvrt_cmd.extend(self.source_file_list) # for fle in self.source_file_list: # buildvrt_cmd.append(fle) # buildvrt_cmd = ' '.join(buildvrt_cmd) result = execute(buildvrt_cmd, shell=False) if result["returncode"] != 0: raise DatasetError( "Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (buildvrt_cmd, result["stderr"]) ) # Add the metadata and return the band_stack as a gdal datatset, storing # as an attribute of the Bandstack object self.vrt_band_stack = self.add_metadata(self.vrt_name)
def find_datasets(self, source_dir): """Return a list of path to the datasets under 'source_dir'. Datasets should be standard ls7 SR products with modified xml """ LOGGER.info('Searching for datasets in %s', source_dir) if self.args.follow_symbolic_links: command = "find -L %s -name '*.xml' | sort" % source_dir else: command = "find %s -name '*.xml' | sort" % source_dir LOGGER.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], \ '"%s" failed: %s' % (command, result['stderr']) dataset_list = [os.path.abspath(sourcedir, scenedir) for scenedir in result['stdout'].split('\n') if scenedir] #if self.args.fast_filter: # no filters #dataset_list = self.fast_filter_datasets(dataset_list) return dataset_list
def find_datasets(self, source_dir): """Return a list of path to the datasets under 'source_dir'. Datasets are identified as a directory containing a 'scene01' subdirectory. Datasets are filtered by path, row, and date range if fast filtering is on (command line flag).""" LOGGER.info('Searching for datasets in %s', source_dir) if self.args.follow_symbolic_links: command = "find -L %s -name 'scene01' | sort" % source_dir else: command = "find %s -name 'scene01' | sort" % source_dir LOGGER.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], \ '"%s" failed: %s' % (command, result['stderr']) dataset_list = [ os.path.abspath(re.sub(r'/scene01$', '', scenedir)) for scenedir in result['stdout'].split('\n') if scenedir ] if self.args.fast_filter: dataset_list = self.fast_filter_datasets(dataset_list) return dataset_list
def run(self): """Run the system test.""" if self.result: return self.result elif self.command: print 'Changing directory:' os.chdir(self.test_name) print 'Current directory is now:', os.getcwd() print '' print 'Running command:' print self.command print '' exe_result = execute(self.command) self.logfile.write(exe_result['stdout']) self.logfile.write(exe_result['stderr']) if exe_result['returncode'] != 0: self.error_message = exe_result['stderr'] return 'ERROR' os.chdir('..') return 'Command run.' else: return 'No command to run.'
def find_datasets(self, source_dir): """Return a list of path to the datasets under 'source_dir'. Datasets are identified as a directory containing a 'scene01' subdirectory. Datasets are filtered by path, row, and date range if fast filtering is on (command line flag).""" LOGGER.info('Searching for datasets in %s', source_dir) if self.args.follow_symbolic_links: command = "find -L %s -name 'scene01' | sort" % source_dir else: command = "find %s -name 'scene01' | sort" % source_dir LOGGER.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], \ '"%s" failed: %s' % (command, result['stderr']) dataset_list = [os.path.abspath(re.sub(r'/scene01$', '', scenedir)) for scenedir in result['stdout'].split('\n') if scenedir] if self.args.fast_filter: dataset_list = self.fast_filter_datasets(dataset_list) return dataset_list
def get_directory_size(directory): command = "du -sk %s | cut -f1" % directory logger.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], '"%s" failed: %s' % (command, result['stderr']) logger.debug('stdout = %s', result['stdout']) return int(result['stdout'])
def _reproject(tile_type_info, tile_footprint, band_stack, output_path): nodata_value = band_stack.nodata_list[0] # Assume resampling method is the same for all bands, this is # because resampling_method is per proessing_level # TODO assert this is the case first_file_number = band_stack.band_dict.keys()[0] reproject_cmd = _create_reproject_command( band_stack, first_file_number, nodata_value, output_path, tile_footprint, tile_type_info ) if len(reproject_cmd) == 0: return command_string = " ".join(reproject_cmd) LOGGER.info("Performing gdalwarp for tile %s", tile_footprint) retry = True while retry: LOGGER.debug("command_string = %s", command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug("gdalwarp time = %s", datetime.now() - start_datetime) if result["stdout"]: log_multiline(LOGGER.debug, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: # Return code is non-zero log_multiline(LOGGER.error, result["stderr"], "stderr from " + command_string, "\t") # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if ( result["stderr"].find("LZW") > -1 # LZW-related error and tile_type_info["file_format"] == "GTiff" # Output format is GeoTIFF and "COMPRESS=LZW" in tile_type_info["format_options"] ): # LZW compression requested uncompressed_tile_path = output_path + ".tmp" # Write uncompressed tile to a temporary path command_string = command_string.replace("COMPRESS=LZW", "COMPRESS=NONE") command_string = command_string.replace(output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += "; gdal_translate -of GTiff" command_string += " " + " ".join(_make_format_spec(tile_type_info)) command_string += " %s %s" % (uncompressed_tile_path, output_path) LOGGER.info("Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF") else: raise DatasetError( "Unable to perform gdalwarp: " + '"%s" failed: %s' % (command_string, result["stderr"]) ) else: retry = False # No retry on success
def _nc2vrt(nc_path, vrt_path): """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack""" nc_abs_path = os.path.abspath(nc_path) vrt_abs_path = os.path.abspath(vrt_path) # Create VRT file using absolute pathnames nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path) LOGGER.debug("nc2vrt_cmd = %s", nc2vrt_cmd) result = execute(nc2vrt_cmd) # , shell=False) if result["returncode"] != 0: raise DatasetError("Unable to perform gdalbuildvrt: " + '"%s" failed: %s' % (nc2vrt_cmd, result["stderr"]))
def get_dataset_size(self): """The size of the dataset in kilobytes as an integer.""" command = "du -sk %s | cut -f1" % self.get_dataset_path() LOGGER.debug('executing "%s"', command) result = execute(command) if result["returncode"] != 0: raise DatasetError("Unable to calculate directory size: " + '"%s" failed: %s' % (command, result["stderr"])) LOGGER.debug("stdout = %s", result["stdout"]) return int(result["stdout"])
def vrt2bin(input_vrt_path, output_dataset_path=None, file_format='ENVI', file_extension='_envi', format_options=None, layer_name_list=None, no_data_value=None, overwrite=False, debug=False): if debug: console_handler.setLevel(logging.DEBUG) logger.debug('vrt2bin(input_vrt_path=%s, output_dataset_path=%s, file_format=%s, file_extension=%s, format_options=%s, layer_name_list=%s, no_data_value=%s, debug=%s) called' % (input_vrt_path, output_dataset_path, file_format, file_extension, format_options, layer_name_list, no_data_value, debug)) assert output_dataset_path or file_extension, 'Output path or file extension must be provided' # Derive the output dataset path if it wasn't provided if not output_dataset_path: output_dataset_path = re.sub('\.\w+$', file_extension, input_vrt_path) if os.path.exists(output_dataset_path) and not overwrite: logger.info('Skipped existing dataset %s', output_dataset_path) return output_dataset_path command_string = 'gdal_translate' if not debug: command_string += ' -q' command_string += ' -of %s' % file_format if format_options: for format_option in format_options.split(','): command_string += ' -co %s' % format_option command_string += ' %s %s' % (input_vrt_path, output_dataset_path) logger.debug('command_string = %s', command_string) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') raise Exception('%s failed', command_string) if layer_name_list and file_format == 'ENVI': create_envi_hdr(envi_file=output_dataset_path, noData=no_data_value, band_names=layer_name_list) return output_dataset_path
def _get_directory_size(self): """Calculate the size of the dataset in kB.""" command = "du -sk %s | cut -f1" % self.get_dataset_path() LOGGER.debug('executing "%s"', command) result = execute(command) if result['returncode'] != 0: raise DatasetError('Unable to calculate directory size: ' + '"%s" failed: %s' % (command, result['stderr'])) LOGGER.debug('stdout = %s', result['stdout']) return int(result['stdout'])
def nc2vrt(self, nc_path, vrt_path): """Create a VRT file to present a netCDF file with multiple subdatasets to GDAL as a band stack""" nc_abs_path = os.path.abspath(nc_path) vrt_abs_path = os.path.abspath(vrt_path) # Create VRT file using absolute pathnames nc2vrt_cmd = "gdalbuildvrt -separate -allow_projection_difference -overwrite %s %s" % (vrt_abs_path, nc_abs_path) LOGGER.debug('nc2vrt_cmd = %s', nc2vrt_cmd) result = execute(nc2vrt_cmd) #, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s' % (nc2vrt_cmd, result['stderr']))
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2, data1, data2): """Given two arrays and the level name, check that the data arrays agree. If the level is 'PQA' and the tile is a mosaic, then only compare mosaics at pixels where the contiguity bit is set in both versions of the mosaic tile. Returns a message in string msg which, if empty indicates agreement on the tile data.""" # pylint:disable=too-many-arguments # pylint:disable=too-many-locals # pylint:disable=unused-argument different = False msg = "" if tile_class_id2 not in MOSAIC_CLASS_ID: if (data1 != data2).any(): msg += "Difference in Tile data: %s and %s\n" \ %(path1, path2) else: # mosaic tile if level == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind].ravel() data2_diff = data2[~ind].ravel() contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) if not contiguity_diff.all(): msg += "On %d pixels, mosaiced tile benchmark %s differs"\ "from Fresh Ingest %s\n"\ %(np.count_nonzero(~contiguity_diff), path1, path2) different = True else: diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %path1, "%s" %path2 ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': msg += "Difference between mosaic vrt files:\n" + \ result['stdout'] different = True if result['stderr'] != '': msg += "Error in system diff command:\n" + result['stderr'] return (different, msg)
def _compare_data(level, tile_class_id1, tile_class_id2, path1, path2, data1, data2): """Given two arrays and the level name, check that the data arrays agree. If the level is 'PQA' and the tile is a mosaic, then only compare mosaics at pixels where the contiguity bit is set in both versions of the mosaic tile. Returns a message in string msg which, if empty indicates agreement on the tile data.""" # pylint:disable=too-many-arguments # pylint:disable=too-many-locals # pylint:disable=unused-argument different = False msg = "" if tile_class_id2 not in MOSAIC_CLASS_ID: if (data1 != data2).any(): msg += "Difference in Tile data: %s and %s\n" \ %(path1, path2) else: # mosaic tile if level == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind].ravel() data2_diff = data2[~ind].ravel() contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) if not contiguity_diff.all(): msg += "On %d pixels, mosaiced tile benchmark %s differs"\ "from Fresh Ingest %s\n"\ %(np.count_nonzero(~contiguity_diff), path1, path2) different = True else: diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % path1, "%s" % path2 ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': msg += "Difference between mosaic vrt files:\n" + \ result['stdout'] different = True if result['stderr'] != '': msg += "Error in system diff command:\n" + result['stderr'] return (different, msg)
def buildvrt(self, temp_dir): """Given a dataset_record and corresponding dataset, build the vrt that will be used to reproject the dataset's data to tile coordinates""" #Make the list of filenames from the dataset_path/scene01 and each #file_number's file_pattern. Also get list of nodata_value. self.source_file_list, self.nodata_list = self.list_source_files() nodata_value = self.nodata_list[0] #TODO: check that this works for PQA where nodata_value is None if nodata_value is not None: nodata_spec = [ "-srcnodata", "%d" % nodata_value, "-vrtnodata", "%d" % (nodata_value) ] else: nodata_spec = [] #Form the vrt_band_stack_filename. #This is done using #args = shlex.split(command_line) #where command_line is the buildvrt command create_directory(temp_dir) self.vrt_name = self.get_vrt_name(temp_dir) #Build the vrt buildvrt_cmd = [ "gdalbuildvrt", "-separate", "-q", ] buildvrt_cmd.extend(nodata_spec) buildvrt_cmd.extend(["-overwrite", "%s" % self.vrt_name]) buildvrt_cmd.extend(self.source_file_list) #for fle in self.source_file_list: # buildvrt_cmd.append(fle) #buildvrt_cmd = ' '.join(buildvrt_cmd) result = execute(buildvrt_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalbuildvrt: ' + '"%s" failed: %s'\ % (buildvrt_cmd, result['stderr'])) #Add the metadata and return the band_stack as a gdal datatset, storing #as an attribute of the Bandstack object self.vrt_band_stack = self.add_metadata(self.vrt_name)
def check_buildvrt(self, idataset): """Test the LandsatBandstack.buildvrt() method by comparing output to a file on disk""" assert idataset in range(len(DATASETS_TO_INGEST)) print 'Testing Dataset %s' %DATASETS_TO_INGEST[idataset] dset = LandsatDataset(DATASETS_TO_INGEST[idataset]) # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) self.collection.commit_transaction() tile_type_list = dset_record.list_tile_types() #Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = self.collection.get_temp_tile_directory() ls_bandstack.buildvrt(temp_dir) # Get benchmark vrt for comparision vrt_benchmark = os.path.join(self.BENCHMARK_DIR, os.path.basename(ls_bandstack.vrt_name)) diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %vrt_benchmark, "%s" %ls_bandstack.vrt_name ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': self.fail("Differences between vrt files:\n" + result['stdout']) if result['stderr'] != '': self.fail("Error in system diff command:\n" + result['stderr'])
def check_buildvrt(self, idataset): """Test the LandsatBandstack.buildvrt() method by comparing output to a file on disk""" assert idataset in range(len(DATASETS_TO_INGEST)) print 'Testing Dataset %s' % DATASETS_TO_INGEST[idataset] dset = LandsatDataset(DATASETS_TO_INGEST[idataset]) # Create a DatasetRecord instance so that we can access its # list_tile_types() method. In doing this we need to create a # collection object and entries on the acquisition and dataset # tables of the database. self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) self.collection.commit_transaction() tile_type_list = dset_record.list_tile_types() #Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = self.collection.get_temp_tile_directory() ls_bandstack.buildvrt(temp_dir) # Get benchmark vrt for comparision vrt_benchmark = os.path.join(self.BENCHMARK_DIR, os.path.basename(ls_bandstack.vrt_name)) diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % vrt_benchmark, "%s" % ls_bandstack.vrt_name ] result = execute(diff_cmd, shell=False) if result['stdout'] != '': self.fail("Differences between vrt files:\n" + result['stdout']) if result['stderr'] != '': self.fail("Error in system diff command:\n" + result['stderr'])
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method']) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = [ "-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) reproject_cmd = [ "gdalwarp", "-q", "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend([ "-overwrite", "%s" % self.band_stack.vrt_name, "%s" % self.temp_tile_output_path ]) result = execute(reproject_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (reproject_cmd, result['stderr']))
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method'] ) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = ["-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks temp_tile_output_path = self.nc_temp_tile_output_path or self.temp_tile_output_path reproject_cmd = ["gdalwarp", "-q", "-of", "%s" % self.tile_type_info['file_format'], "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend(["-overwrite", "%s" % self.band_stack.vrt_name, "%s" % temp_tile_output_path # Use locally-defined output path, not class instance value ]) command_string = ' '.join(reproject_cmd) LOGGER.info('Performing gdalwarp for tile %s', self.tile_footprint) retry=True while retry: LOGGER.debug('command_string = %s', command_string) start_datetime = datetime.now() result = execute(command_string) LOGGER.debug('gdalwarp time = %s', datetime.now() - start_datetime) if result['stdout']: log_multiline(LOGGER.debug, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: # Return code is non-zero log_multiline(LOGGER.error, result['stderr'], 'stderr from ' + command_string, '\t') # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if (result['stderr'].find('LZW') > -1 # LZW-related error and self.tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF and 'COMPRESS=LZW' in format_spec): # LZW compression requested uncompressed_tile_path = temp_tile_output_path + '.tmp' # Write uncompressed tile to a temporary path command_string = command_string.replace('COMPRESS=LZW', 'COMPRESS=NONE') command_string = command_string.replace(temp_tile_output_path, uncompressed_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += '; gdal_translate -of GTiff' command_string += ' ' + ' '.join(format_spec) command_string += ' %s %s' % ( uncompressed_tile_path, temp_tile_output_path ) LOGGER.info('Creating compressed GeoTIFF tile via temporary uncompressed GeoTIFF') else: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (command_string, result['stderr'])) else: retry = False # No retry on success # Work-around to allow existing code to work with netCDF subdatasets as GDAL band stacks if self.nc_temp_tile_output_path: self.nc2vrt(self.nc_temp_tile_output_path, self.temp_tile_output_path)
def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals dataset_list = \ [TestIngest.DATASETS_TO_INGEST[level][i] for i in range(6) for level in ['PQA', 'NBAR', 'ORTHO']] dataset_list.extend(TestIngest.MOSAIC_SOURCE_NBAR) dataset_list.extend(TestIngest.MOSAIC_SOURCE_PQA) dataset_list.extend(TestIngest.MOSAIC_SOURCE_ORTHO) random.shuffle(dataset_list) LOGGER.info("Ingesting following datasets:") for dset in dataset_list: LOGGER.info('%d) %s', dataset_list.index(dset), dset) for dataset_path in dataset_list: LOGGER.info('Ingesting Dataset %d:\n%s', dataset_list.index(dataset_path), dataset_path) dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(141, -38)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue # Test mosaic tiles against benchmark # At this stage, transaction for this dataset not yet # commited and so the tiles from this dataset, including # any mosaics are still in the temporary location. if self.POPULATE_EXPECTED: continue mosaic_benchmark = \ TestTileContents.swap_dir_in_path(tile_contents .mosaic_final_pathname, 'output', 'expected') mosaic_new = tile_contents.mosaic_temp_pathname LOGGER.info("Comparing test output with benchmark:\n"\ "benchmark: %s\ntest output: %s", mosaic_benchmark, mosaic_new) if dset_record.mdd['processing_level'] == 'PQA': LOGGER.info("For PQA mosaic, calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if ~(data1 == data2).all(): self.fail("Difference in PQA mosaic " "from expected result: %s and %s" %(mosaic_benchmark, mosaic_new)) # Check that differences are due to differing treatment # of contiguity bit. else: diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %mosaic_benchmark, "%s" %mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA # Set the list of datset paths which should result in mosaic tiles dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2] dataset_list = [pqa1, pqa2] for dataset_path in dataset_list: dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(150, -26)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue #Test mosaic tiles against benchmark mosaic_benchmark = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join(TestIngest.BENCHMARK_DIR, 'mosaic_cache'), tile_ftprint) mosaic_new = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join(os.path.dirname( tile_contents.temp_tile_output_path), 'mosaic_cache'), tile_ftprint) LOGGER.info("Calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if dset_record.mdd['processing_level'] == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind] data2_diff = data2[~ind] contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) assert contiguity_diff.all(), \ "mosaiced tile %s differs from benchmark %s" \ %(mosaic_new, mosaic_benchmark) else: diff_cmd = ["diff", "-I", "[Ff]ilename", "%s" %mosaic_benchmark, "%s" %mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals nbar1, nbar2 = TestIngest.MOSAIC_SOURCE_NBAR ortho1, ortho2 = TestIngest.MOSAIC_SOURCE_ORTHO pqa1, pqa2 = TestIngest.MOSAIC_SOURCE_PQA # Set the list of datset paths which should result in mosaic tiles dataset_list = [nbar1, nbar2, ortho1, ortho2, pqa1, pqa2] dataset_list = [pqa1, pqa2] for dataset_path in dataset_list: dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(150, -26)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue #Test mosaic tiles against benchmark mosaic_benchmark = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join(TestIngest.BENCHMARK_DIR, 'mosaic_cache'), tile_ftprint) mosaic_new = TestTileContents.get_benchmark_tile( dset_record.mdd, os.path.join( os.path.dirname( tile_contents.temp_tile_output_path), 'mosaic_cache'), tile_ftprint) LOGGER.info("Calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if dset_record.mdd['processing_level'] == 'PQA': ind = (data1 == data2) # Check that differences are due to differing treatment # of contiguity bit. data1_diff = data1[~ind] data2_diff = data2[~ind] contiguity_diff = \ np.logical_or( np.bitwise_and(data1_diff, 1 << 8) == 0, np.bitwise_and(data2_diff, 1 << 8) == 0) assert contiguity_diff.all(), \ "mosaiced tile %s differs from benchmark %s" \ %(mosaic_new, mosaic_benchmark) else: diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % mosaic_benchmark, "%s" % mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
result = [] for path, subdirs, files in os.walk(directory): for name in files: fileName, fileExtension = os.path.splitext(name) if fileExtension == '.nc': result.append(os.path.join(path, name)) return result if __name__ == '__main__': vrt_creater = VRTCreater() dataset_dir = "/g/data/u83/data/modis/datacube/" file_list = vrt_creater.get_NetCDF_list(dataset_dir) # print file_list for file in file_list: if not file.endswith("float64.nc"): continue print file fname = os.path.splitext(basename(file))[0] dataset = gdal.Open(file, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = 'gdalbuildvrt -separate -overwrite ' command_string += dataset_dir + fname command_string += '_rbq1000.vrt' command_string += ' ' + subDataSets[1][0] print command_string result = execute(command_string=command_string) # dataset_size = os.path.getsize(dataset_file)
def warp(shape_dataset, master_dataset_path, output_filename, buffer_widths, output_format, resammple_method="bilinear", bounds_getter=default_bounds_getter): """ Use the gdalwarp executable to clip (and potentially resample) a region. Preconditions on this method are: - the directory specified for the output (``output_filename``) exists, - that ``master_dataset_path`` exists, and - gdalwarp is on the path. :param shape_dataset: Object to extract the shape of the desired region from. This is done using ``bounds_getter`` (see :py:func:`default_bounds_getter` for specification of the interface). :param master_dataset_path: The path to the dataset to clip from. This should be a valid argument to :py:func:`gdal.Open`. :type shape_dataset: str :param output_filename: The name of the output file (passed as the output argument to gdalwarp). :type shape_dataset: str :param buffer_widths: An object of type :py:class:`ImageShape` (or one that supports the same interface). :type buffer_widths: :py:class:`Buffers` :param output_format: The desired format of the clipped dataset. (passed as argument -of to gdalwarp). :type output_format: str :param resample_method: The resampling method to be used (passed as argument -r to gdalwarp). :type resample_method: str :param bounds_getter: Callable used to extract the bounds from ``shape_dataset``. :return: The name of the dataset written to disk. """ assert not execute("which gdalwarp")["returncode"], "gdalwarp not available" output_dir = os.path.dirname(output_filename) assert os.stat(output_dir), "output_dir (%s) must exist." % output_dir assert os.stat(master_dataset_path), "master_dataset (%s) must exist" % master_dataset_path shape = bounds_getter(shape_dataset) xres = shape.RasterXCellSize yres = shape.RasterYCellSize xmin = shape.RasterXOrigin - xres*buffer_widths.left xmax = shape.RasterXOrigin + xres*(shape.RasterXSize + buffer_widths.right) ymax = shape.RasterYOrigin - yres*buffer_widths.top # in the cases I've looked at, yres is negative. ymin = shape.RasterYOrigin + yres*(shape.RasterYSize + buffer_widths.bottom) command_string = 'gdalwarp -overwrite -of %s -t_srs "%s" -r %s -te %f %f %f %f -tr %f %f %s %s' % ( output_format, shape.GetProjection(as_proj4=True), resammple_method, float(xmin), float(ymin), float(xmax), float(ymax), float(xres), float(yres), master_dataset_path, output_filename) result = execute(command_string) if result["returncode"]: print "error in executing %s\n\n\tstdout: %s\n\n\tstderr: %s\n" % (command_string, result['stdout'], result['stderr']) assert not result["returncode"], "error in executing %s\n\n\tstdout: %s\n\n\tstderr: %s\n" % (command_string, result['stdout'], result['stderr']) return output_filename
result = [] for path, subdirs, files in os.walk(directory): for name in files: fileName, fileExtension = os.path.splitext(name) if fileExtension == '.nc': result.append(os.path.join(path, name)) return result if __name__ == '__main__': vrt_creater = VRTCreater() dataset_dir = "/g/data/u83/data/modis/datacube/" file_list = vrt_creater.get_NetCDF_list(dataset_dir) # print file_list for file in file_list: if not file.endswith("float64.nc"): continue print file fname = os.path.splitext(basename(file))[0] dataset = gdal.Open(file, gdal.GA_ReadOnly) subDataSets = dataset.GetSubDatasets() command_string = 'gdalbuildvrt -separate -overwrite ' command_string += dataset_dir + fname command_string += '_rbq500.vrt' command_string += ' ' + subDataSets[13][0] print command_string result = execute(command_string=command_string) # dataset_size = os.path.getsize(dataset_file)
def process_dataset(dataset_info): log_multiline(logger.debug, dataset_info, 'Dataset values', '\t') def find_file(dataset_dir, file_pattern): # logger.debug('find_file(%s, %s) called', dataset_dir, file_pattern) assert os.path.isdir(dataset_dir), '%s is not a valid directory' % dataset_dir filelist = [filename for filename in os.listdir(dataset_dir) if re.match(file_pattern, filename)] # logger.debug('filelist = %s', filelist) assert len(filelist) == 1, 'Unable to find unique match for file pattern %s' % file_pattern return os.path.join(dataset_dir, filelist[0]) def get_tile_index_range(dataset_filename): """Returns integer (xmin, ymin, xmax, ymax) tuple for input GDAL dataset filename""" dataset = gdal.Open(dataset_filename) assert dataset, 'Unable to open dataset %s' % dataset_filename spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(dataset.GetProjection()) geotransform = dataset.GetGeoTransform() logger.debug('geotransform = %s', geotransform) # latlong_spatial_reference = spatial_reference.CloneGeogCS() tile_spatial_reference = osr.SpatialReference() s = re.match('EPSG:(\d+)', tile_type_info['crs']) if s: epsg_code = int(s.group(1)) logger.debug('epsg_code = %d', epsg_code) assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, 'Invalid EPSG code for tile projection' else: assert tile_spatial_reference.ImportFromWkt(tile_type_info['crs']), 'Invalid WKT for tile projection' logger.debug('Tile WKT = %s', tile_spatial_reference.ExportToWkt()) coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference) # Upper Left xmin, ymax, _z = coord_transform_to_tile.TransformPoint(geotransform[0], geotransform[3], 0) # Lower Right xmax, ymin, _z = coord_transform_to_tile.TransformPoint(geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] + geotransform[5] * dataset.RasterYSize, 0) logger.debug('Coordinates: xmin = %f, ymin = %f, xmax = %f, ymax = %f', xmin, ymin, xmax, ymax) return (int(floor((xmin - tile_type_info['x_origin']) / tile_type_info['x_size'])), int(floor((ymin - tile_type_info['y_origin']) / tile_type_info['y_size'])), int(ceil((xmax - tile_type_info['x_origin']) / tile_type_info['x_size'])), int(ceil((ymax - tile_type_info['y_origin']) / tile_type_info['y_size']))) def find_tiles(x_index = None, y_index = None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) where (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and tile_type_id = %(tile_type_id)s and dataset_id = %(fc_dataset_id)s and ctime is not null -- TODO: Remove this after reload ; """ params = {'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'fc_dataset_id': dataset_info['fc_dataset_id']} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { 'x_index': record[1], 'y_index': record[2], 'tile_type_id': record[3], 'tile_pathname': record[4], 'dataset_id': record[5], 'tile_class_id': record[6], 'tile_size': record[7] } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, 'tile_info', '\t') return tile_info def get_vrt_band_list(): """Returns list of band information to create tiles """ logger.debug('get_vrt_band_list() called') vrt_band_list = [] #=============================================================================== # sensor_dict = self.bands[tile_type_id][(dataset_info['satellite_tag'], dataset_info['sensor_name'])] # # log_multiline(logger.debug, sensor, 'Sensor', '\t') # for file_number in sorted(sensor_dict.keys()): # band_info = sensor_dict[file_number] # if band_info['level_name'] == 'NBAR': # dataset_dir = dataset_info['nbar_dataset_path'] # dataset_id = dataset_info['nbar_dataset_id'] # processing_level = dataset_info['nbar_level_name'] # nodata_value = dataset_info['nbar_nodata_value'] # resampling_method = dataset_info['nbar_resampling_method'] # elif band_info['level_name'] == 'ORTHO': # dataset_dir = dataset_info['l1t_dataset_path'] # dataset_id = dataset_info['l1t_dataset_id'] # processing_level = dataset_info['l1t_level_name'] # nodata_value = dataset_info['l1t_nodata_value'] # resampling_method = dataset_info['l1t_resampling_method'] # else: # continue # Ignore any pan-chromatic and derived bands # # dataset_dir = os.path.join(dataset_dir, 'scene01') # filename = find_file(dataset_dir, band_info['file_pattern']) # vrt_band_list.append({'file_number': band_info['file_number'], # 'filename': filename, # 'name': band_info['band_name'], # 'dataset_id': dataset_id, # 'band_id': band_info['band_id'], # 'processing_level': processing_level, # 'nodata_value': nodata_value, # 'resampling_method': resampling_method, # 'tile_layer': band_info['tile_layer']}) #=============================================================================== #TODO: Make this able to handle multiple derived layers for band_level in ['FC']: derived_bands = self.bands[tile_type_id][('DERIVED', band_level)] for file_number in sorted(derived_bands.keys()): band_info = derived_bands[file_number] file_pattern = band_info['file_pattern'] dataset_dir = os.path.join(dataset_info['fc_dataset_path'], 'scene01') dataset_id = dataset_info['fc_dataset_id'] filename = find_file(dataset_dir, file_pattern) processing_level = dataset_info['fc_level_name'] nodata_value = dataset_info['fc_nodata_value'] # Should be None for FC resampling_method = dataset_info['fc_resampling_method'] vrt_band_list.append({'file_number': None, 'filename': filename, 'name': band_info['band_name'], 'dataset_id': dataset_id, 'band_id': band_info['band_id'], 'processing_level': processing_level, 'nodata_value': nodata_value, 'resampling_method': resampling_method, 'tile_layer': 1}) log_multiline(logger.debug, vrt_band_list, 'vrt_band_list = %s', '\t') return vrt_band_list def get_tile_has_data(tile_index_range): tile_has_data = {} db_cursor2 = self.db_connection.cursor() sql = """-- Find all PQA tiles which exist for the dataset select x_index, y_index from dataset inner join tile using(dataset_id) where tile_type_id = %(tile_type_id)s and level_id = 3 -- PQA and tile_class_id = 1 -- Tile containing live data and acquisition_id = %(acquisition_id)s """ params = {'tile_type_id': tile_type_info['tile_type_id'], 'acquisition_id': dataset_info['acquisition_id']} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_has_data[(x_index, y_index)] = False # Set tile_has_data element to True if PQA tile exists for record in db_cursor2: tile_has_data[(record[0], record[1])] = True return tile_has_data # process_dataset function starts here result = False db_cursor1 = self.db_connection.cursor() logger.info('Processing dataset %s', dataset_info['fc_dataset_path']) vrt_band_stack_basename = '_'.join([dataset_info['satellite_tag'], re.sub('\W', '', dataset_info['sensor_name']), dataset_info['start_datetime'].date().strftime('%Y%m%d'), '%03d' % dataset_info['x_ref'], '%03d' % dataset_info['y_ref']] ) + '.vrt' logger.debug('vrt_band_stack_basename = %s', vrt_band_stack_basename) tile_output_root = os.path.join(self.tile_root, tile_type_info['tile_directory'], dataset_info['satellite_tag'] + '_' + re.sub('\W', '', dataset_info['sensor_name'])) logger.debug('tile_output_root = %s', tile_output_root) vrt_band_list = get_vrt_band_list() tile_index_range = get_tile_index_range(vrt_band_list[0]['filename']) # Find extents of first band dataset tile_count = abs(tile_index_range[2] - tile_index_range[0]) * (tile_index_range[3] - tile_index_range[1]) # Check whether tiles exist for every band tile_record_count = len(find_tiles()) logger.info('Found %d tile records in database for %d tiles', tile_record_count, tile_count) # Count FC only if tile_record_count == tile_count: logger.info('All tiles already exist in database - skipping tile creation for %s', dataset_info['fc_dataset_path']) return result try: #TODO: Create all new acquisition records and commit the transaction here # Use NBAR dataset name for dataset lock (could have been any other level) work_directory = os.path.join(self.temp_dir, os.path.basename(dataset_info['fc_dataset_path']) ) tile_has_data = get_tile_has_data(tile_index_range) any_tile_has_data = False for value in tile_has_data.values(): any_tile_has_data |= value if not any_tile_has_data: logger.info('No valid PQ tiles found - skipping tile creation for %s', dataset_info['fc_dataset_path']) return result #TODO: Apply lock on path/row instead of on dataset to try to force the same node to process the full depth if not self.lock_object(work_directory): logger.info('Already processing %s - skipping', dataset_info['fc_dataset_path']) return result if self.refresh and os.path.exists(work_directory): shutil.rmtree(work_directory) self.create_directory(work_directory) for processing_level in ['FC']: vrt_band_info_list = [vrt_band_info for vrt_band_info in vrt_band_list if vrt_band_info['processing_level'] == processing_level] nodata_value = vrt_band_info_list[0]['nodata_value'] # All the same for a given processing_level resampling_method = vrt_band_info_list[0]['resampling_method'] # All the same for a given processing_level vrt_band_stack_filename = os.path.join(work_directory, processing_level + '_' + vrt_band_stack_basename) if not os.path.exists(vrt_band_stack_filename) or self.check_object_locked(vrt_band_stack_filename): # Check whether this dataset is already been processed if not self.lock_object(vrt_band_stack_filename): logger.warning('Band stack %s already being processed - skipping.', vrt_band_stack_filename) continue logger.info('Creating %s band stack file %s', processing_level, vrt_band_stack_filename) command_string = 'gdalbuildvrt -separate' if not self.debug: command_string += ' -q' if nodata_value is not None: command_string += ' -srcnodata %d -vrtnodata %d' % ( nodata_value, nodata_value) command_string += ' -overwrite %s %s' % ( vrt_band_stack_filename, ' '.join([vrt_band_info['filename'] for vrt_band_info in vrt_band_info_list]) ) logger.debug('command_string = %s', command_string) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') raise Exception('%s failed', command_string) band_stack_dataset = gdal.Open(vrt_band_stack_filename) assert band_stack_dataset, 'Unable to open VRT %s' % vrt_band_stack_filename band_stack_dataset.SetMetadata( {'satellite': dataset_info['satellite_tag'], 'sensor': dataset_info['sensor_name'], 'start_datetime': dataset_info['start_datetime'].isoformat(), 'end_datetime': dataset_info['end_datetime'].isoformat(), 'path': '%03d' % dataset_info['x_ref'], 'row': '%03d' % dataset_info['y_ref']} ) for band_index in range(len(vrt_band_info_list)): band = band_stack_dataset.GetRasterBand(band_index + 1) band.SetMetadata({'name': vrt_band_info_list[band_index]['name'], 'filename': vrt_band_info_list[band_index]['filename']}) # Need to set nodata values for each band - can't seem to do it in gdalbuildvrt nodata_value = vrt_band_info_list[band_index]['nodata_value'] if nodata_value is not None: band.SetNoDataValue(nodata_value) band_stack_dataset.FlushCache() self.unlock_object(vrt_band_stack_filename) else: logger.info('Band stack %s already exists', vrt_band_stack_filename) band_stack_dataset = gdal.Open(vrt_band_stack_filename) logger.info('Processing %d %s Tiles', tile_count, processing_level) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_extents = (tile_type_info['x_origin'] + x_index * tile_type_info['x_size'], tile_type_info['y_origin'] + y_index * tile_type_info['y_size'], tile_type_info['x_origin'] + (x_index + 1) * tile_type_info['x_size'], tile_type_info['y_origin'] + (y_index + 1) * tile_type_info['y_size']) logger.debug('tile_extents = %s', tile_extents) tile_output_dir = os.path.join(tile_output_root, re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)), '%04d' % dataset_info['start_datetime'].year ) self.create_directory(os.path.join(tile_output_dir, 'mosaic_cache')) tile_output_path = os.path.join(tile_output_dir, '_'.join([dataset_info['satellite_tag'], re.sub('\W', '', dataset_info['sensor_name']), processing_level, re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)), re.sub(':', '-', dataset_info['start_datetime'].isoformat()) ]) + tile_type_info['file_extension'] ) # Check whether this tile has already been processed if not self.lock_object(tile_output_path): logger.warning('Tile %s already being processed - skipping.', tile_output_path) continue # Only generate tile file if PQA tile or tile contains data if tile_has_data.get((x_index, y_index)) is None or tile_has_data[(x_index, y_index)]: command_string = 'gdalwarp' if not self.debug: command_string += ' -q' command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % ( tile_type_info['crs'], tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], tile_type_info['x_pixel_size'], tile_type_info['y_pixel_size'], resampling_method ) if nodata_value is not None: command_string += ' -srcnodata %d -dstnodata %d' % (nodata_value, nodata_value) command_string += ' -of %s' % tile_type_info['file_format'] if tile_type_info['format_options']: for format_option in tile_type_info['format_options'].split(','): command_string += ' -co %s' % format_option command_string += ' -overwrite %s %s' % ( vrt_band_stack_filename, tile_output_path ) logger.debug('command_string = %s', command_string) retry=True while retry: result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: # Return code is non-zero log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if (string.find(result['stderr'], 'LZW') > -1 # LZW-related error and tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF and string.find(tile_type_info['format_options'], 'COMPRESS=LZW') > -1): # LZW compression requested temp_tile_path = os.path.join(os.path.dirname(vrt_band_stack_filename), os.path.basename(tile_output_path)) # Write uncompressed tile to a temporary path command_string = string.replace(command_string, 'COMPRESS=LZW', 'COMPRESS=NONE') command_string = string.replace(command_string, tile_output_path, temp_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += '; gdal_translate -of GTiff' if tile_type_info['format_options']: for format_option in tile_type_info['format_options'].split(','): command_string += ' -co %s' % format_option command_string += ' %s %s' % ( temp_tile_path, tile_output_path ) else: raise Exception('%s failed', command_string) else: retry = False # No retry on success # Set tile metadata tile_dataset = gdal.Open(tile_output_path) assert tile_dataset, 'Unable to open tile dataset %s' % tile_output_path # Check whether PQA tile contains any contiguous data if tile_has_data.get((x_index, y_index)) is None and processing_level == 'PQA': tile_has_data[(x_index, y_index)] = ((numpy.bitwise_and(tile_dataset.GetRasterBand(1).ReadAsArray(), 1 << LandsatTiler.CONTIGUITY_BIT_INDEX)) > 0).any() logger.debug('%s tile (%d, %d) has data = %s', processing_level, x_index, y_index, tile_has_data[(x_index, y_index)]) # Only bother setting metadata if tile has valid data if tile_has_data[(x_index, y_index)]: metadata = band_stack_dataset.GetMetadata() metadata['x_index'] = str(x_index) metadata['y_index'] = str(y_index) tile_dataset.SetMetadata(metadata) # Set tile band metadata for band_index in range(len(vrt_band_info_list)): scene_band = band_stack_dataset.GetRasterBand(band_index + 1) tile_band = tile_dataset.GetRasterBand(band_index + 1) tile_band.SetMetadata(scene_band.GetMetadata()) # Need to set nodata values for each band - gdalwarp doesn't copy it across nodata_value = vrt_band_info_list[band_index]['nodata_value'] if nodata_value is not None: tile_band.SetNoDataValue(nodata_value) logger.info('Processed %s Tile (%d, %d)', processing_level, x_index, y_index) else: logger.info('Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index) else: logger.info('Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index) # Change permissions on any recently created files command_string = 'chmod -R a-wxs,u+rwX,g+rsX %s; chown -R %s %s' % (tile_output_dir, TILE_OWNER, tile_output_dir) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') # N.B: command may return errors for files not owned by user if result['returncode']: log_multiline(logger.warning, result['stderr'], 'stderr from ' + command_string, '\t') # raise Exception('%s failed', command_string) self.unlock_object(tile_output_path) # Check whether tile contains any data if tile_has_data[(x_index, y_index)]: tile_class_id = 1 # Valid tile tile_size = self.getFileSizeMB(tile_output_path) else: # PQA tile contains no data # Remove empty PQA tile file tile_class_id = 2 # Dummy tile record with no file self.remove(tile_output_path) tile_size = 0 sql = """-- Insert new tile_footprint record if necessary insert into tile_footprint ( x_index, y_index, tile_type_id, x_min, y_min, x_max, y_max ) select %(x_index)s, %(y_index)s, %(tile_type_id)s, %(x_min)s, %(y_min)s, %(x_max)s, %(y_max)s where not exists (select x_index, y_index, tile_type_id from tile_footprint where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s); -- Update any existing tile record update tile set tile_pathname = %(tile_pathname)s, tile_class_id = %(tile_class_id)s, tile_size = %(tile_size)s, ctime = now() where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s; -- Insert new tile record if necessary insert into tile ( tile_id, x_index, y_index, tile_type_id, dataset_id, tile_pathname, tile_class_id, tile_size, ctime ) select nextval('tile_id_seq'::regclass), %(x_index)s, %(y_index)s, %(tile_type_id)s, %(dataset_id)s, %(tile_pathname)s, %(tile_class_id)s, %(tile_size)s, now() where not exists (select tile_id from tile where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s ); """ params = {'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'x_min': tile_extents[0], 'y_min': tile_extents[1], 'x_max': tile_extents[2], 'y_max': tile_extents[3], 'dataset_id': vrt_band_info_list[0]['dataset_id'], # All the same 'tile_pathname': tile_output_path, 'tile_class_id': tile_class_id, 'tile_size': tile_size } log_multiline(logger.debug, db_cursor1.mogrify(sql, params), 'SQL', '\t') db_cursor1.execute(sql, params) self.unlock_object(work_directory) if not self.debug: shutil.rmtree(work_directory) result = True self.db_connection.commit() logger.info('Dataset tiling completed - Transaction committed') return result except Exception, e: logger.error('Tiling operation failed: %s', e.message) # Keep on processing self.db_connection.rollback() if self.debug: raise
def __init__(self, dataset_path): """Opens the dataset and extracts metadata. """ self._satellite_tag = "MT" self._satellite_sensor = "MODIS-Terra" self._dataset_file = os.path.abspath(dataset_path) fileName, fileExtension = os.path.splitext(self._dataset_file) if (fileName.endswith("RBQ500")): self._processor_level = "RBQ500" else: self._processor_level = "MOD09" vrt_file = open(dataset_path, 'r') vrt_string = vrt_file.read() vrt_file.close() self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0] self._vrt_file = dataset_path self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly) if not self._ds: raise DatasetError("Unable to open %s" % self.get_dataset_path()) self._dataset_size = os.path.getsize(self._dataset_path) LOGGER.debug('Transform = %s', self._ds.GetGeoTransform()); LOGGER.debug('Projection = %s', self._ds.GetProjection()); LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize); LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize); command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path result = execute(command) if result['returncode'] != 0: raise DatasetError('Unable to perform ncdump: ' + '"%s" failed: %s' % (command, result['stderr'])) s = re.sub(r"\s+", "", result['stdout']) LOGGER.debug('%s = %s', command, s); self._rangeendingdate = re.search('RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE', s).groups(1)[0] LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate) self._rangeendingtime = re.search('RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME', s).groups(1)[0] LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime) self._rangebeginningdate = re.search('RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE', s).groups(1)[0] LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate) self._rangebeginningtime = re.search('RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME', s).groups(1)[0] LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime) self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime self._orbitnumber = int(re.search('ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER', s).groups(1)[0]) LOGGER.debug('OrbitNumber = %d', self._orbitnumber) self._cloud_cover_percentage = float(re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0]) LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage) self._completion_datetime = re.search('PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME', s).groups(1)[0] LOGGER.debug('ProcessedTime = %s', self._completion_datetime) self._metadata = self._ds.GetMetadata('SUBDATASETS') band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME']) # Get Coordinates self._width = band1.RasterXSize self._height = band1.RasterYSize self._gt = band1.GetGeoTransform() self._minx = self._gt[0] self._miny = self._gt[3] + self._width*self._gt[4] + self._height*self._gt[5] # from self._maxx = self._gt[0] + self._width*self._gt[1] + self._height*self._gt[2] # from self._maxy = self._gt[3] LOGGER.debug('min/max x coordinates (%s, %s)',str(self._minx), str(self._maxx)) # min/max x coordinates LOGGER.debug('min/max y coordinates (%s, %s)',str(self._miny), str(self._maxy)) # min/max y coordinates LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]), str(self._gt[5])) # pixel size self._pixelX = self._width self._pixelY = self._height LOGGER.debug('pixels (%s, %s)', str(self._pixelX), str(self._pixelY)) # pixels self._gcp_count = None self._mtl_text = None self._xml_text = None AbstractDataset.__init__(self)
def test_make_mosaics(self): """Make mosaic tiles from two adjoining scenes.""" # pylint: disable=too-many-locals dataset_list = \ [TestIngest.DATASETS_TO_INGEST[level][i] for i in range(6) for level in ['PQA', 'NBAR', 'ORTHO']] dataset_list.extend(TestIngest.MOSAIC_SOURCE_NBAR) dataset_list.extend(TestIngest.MOSAIC_SOURCE_PQA) dataset_list.extend(TestIngest.MOSAIC_SOURCE_ORTHO) random.shuffle(dataset_list) LOGGER.info("Ingesting following datasets:") for dset in dataset_list: LOGGER.info('%d) %s', dataset_list.index(dset), dset) for dataset_path in dataset_list: LOGGER.info('Ingesting Dataset %d:\n%s', dataset_list.index(dataset_path), dataset_path) dset = LandsatDataset(dataset_path) self.collection.begin_transaction() acquisition = \ self.collection.create_acquisition_record(dset) dset_record = acquisition.create_dataset_record(dset) # Get tile types dummy_tile_type_list = dset_record.list_tile_types() # Assume dataset has tile_type = 1 only: tile_type_id = 1 dataset_bands_dict = dset_record.get_tile_bands(tile_type_id) ls_bandstack = dset.stack_bands(dataset_bands_dict) temp_dir = os.path.join(self.ingester.datacube.tile_root, 'ingest_temp') # Form scene vrt ls_bandstack.buildvrt(temp_dir) # Reproject scene data onto selected tile coverage tile_footprint_list = dset_record.get_coverage(tile_type_id) LOGGER.info('coverage=%s', str(tile_footprint_list)) for tile_ftprint in tile_footprint_list: #Only do that footprint for which we have benchmark mosaics if tile_ftprint not in [(141, -38)]: continue tile_contents = \ self.collection.create_tile_contents(tile_type_id, tile_ftprint, ls_bandstack) LOGGER.info('Calling reproject for %s tile %s...', dset_record.mdd['processing_level'], tile_ftprint) tile_contents.reproject() LOGGER.info('...finished') if tile_contents.has_data(): LOGGER.info('tile %s has data', tile_contents.temp_tile_output_path) tile_record = dset_record.create_tile_record(tile_contents) mosaic_required = tile_record.make_mosaics() if not mosaic_required: continue # Test mosaic tiles against benchmark # At this stage, transaction for this dataset not yet # commited and so the tiles from this dataset, including # any mosaics are still in the temporary location. if self.POPULATE_EXPECTED: continue mosaic_benchmark = \ TestTileContents.swap_dir_in_path(tile_contents .mosaic_final_pathname, 'output', 'expected') mosaic_new = tile_contents.mosaic_temp_pathname LOGGER.info("Comparing test output with benchmark:\n"\ "benchmark: %s\ntest output: %s", mosaic_benchmark, mosaic_new) if dset_record.mdd['processing_level'] == 'PQA': LOGGER.info( "For PQA mosaic, calling load_and_check...") ([data1, data2], dummy_nlayers) = \ TestLandsatTiler.load_and_check( mosaic_benchmark, mosaic_new, tile_contents.band_stack.band_dict, tile_contents.band_stack.band_dict) LOGGER.info('Checking arrays ...') if ~(data1 == data2).all(): self.fail("Difference in PQA mosaic " "from expected result: %s and %s" % (mosaic_benchmark, mosaic_new)) # Check that differences are due to differing treatment # of contiguity bit. else: diff_cmd = [ "diff", "-I", "[Ff]ilename", "%s" % mosaic_benchmark, "%s" % mosaic_new ] result = execute(diff_cmd, shell=False) assert result['stdout'] == '', \ "Differences between vrt files" assert result['stderr'] == '', \ "Error in system diff command" else: LOGGER.info('... tile has no data') tile_contents.remove() self.collection.commit_transaction()
class DBUpdater(DataCube): def parse_args(self): """Parse the command line arguments. Returns: argparse namespace object """ logger.debug(' Calling parse_args()') _arg_parser = argparse.ArgumentParser('dbupdater') # N.B: modtran_root is a direct overrides of config entries # and its variable name must be prefixed with "_" to allow lookup in conf file _arg_parser.add_argument('-C', '--config', dest='config_file', default=os.path.join(self.agdc_root, 'agdc_default.conf'), help='DBUpdater configuration file') _arg_parser.add_argument('-d', '--debug', dest='debug', default=False, action='store_const', const=True, help='Debug mode flag') _arg_parser.add_argument( '--source', dest='source_dir', required=True, help='Source root directory containing datasets') _arg_parser.add_argument( '--refresh', dest='refresh', default=False, action='store_const', const=True, help='Refresh mode flag to force updating of existing records') _arg_parser.add_argument( '--purge', dest='purge', default=False, action='store_const', const=True, help= 'Purge mode flag to force removal of nonexistent dataset records') _arg_parser.add_argument( '--removedblist', dest='remove_existing_dblist', default=False, action='store_const', const=True, help='Delete any pre-existing dataset list from disk') _arg_parser.add_argument( '--followsymlinks', dest='follow_symbolic_links', default=False, action='store_const', const=True, help='Follow symbolic links when finding datasets to ingest') return _arg_parser.parse_args() def __init__(self, source_datacube=None, tile_type_id=1): """Constructor Arguments: source_datacube: Optional DataCube object whose connection and data will be shared tile_type_id: Optional tile_type_id value (defaults to 1) """ if source_datacube: # Copy values from source_datacube and then override command line args self.__dict__ = copy(source_datacube.__dict__) args = self.parse_args() # Set instance attributes for every value in command line arguments file for attribute_name in args.__dict__.keys(): attribute_value = args.__dict__[attribute_name] self.__setattr__(attribute_name, attribute_value) else: DataCube.__init__(self) # Call inherited constructor self.temp_dir = os.path.join( self.temp_dir, re.sub('^/', '', os.path.abspath(self.source_dir))) self.create_directory(self.temp_dir) logger.debug('self.temp_dir = %s', self.temp_dir) if self.debug: console_handler.setLevel(logging.DEBUG) def update_records(self): def purge_scenes(db_cursor, dataset_root): logger.info('Purging all nonexistent datasets in directory "%s"', dataset_root) sql = """-- Retrieve all dataset paths select dataset_id, dataset_path from dataset where position(%(dataset_root)s in dataset_path) = 1 order by dataset_path; """ params = {'dataset_root': dataset_root} log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) db_cursor2 = self.db_connection.cursor() for row in db_cursor: if not os.path.isdir(os.path.join(row[1], 'scene01')): logger.info( 'Removing dataset record for nonexistent directory "%s"', row[1]) sql = """-- Removing %(bad_dataset)s delete from tile where dataset_id = %(dataset_id)s; delete from dataset where dataset_id = %(dataset_id)s; """ params = {'dataset_id': row[0], 'bad_dataset': row[1]} log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') try: db_cursor2.execute(sql, params) self.db_connection.commit() except Exception, e: logger.warning('Delete operation failed for "%s": %s', sql, e.message) self.db_connection.rollback() logger.info('Scene purging completed for %s', dataset_root) dataset_list_file = os.path.join(self.temp_dir, 'dataset.list') if self.remove_existing_dblist: try: os.remove(dataset_list_file) except: pass db_cursor = self.db_connection.cursor() if self.purge: # Remove rows for nonexistent files purge_scenes(db_cursor, self.source_dir) # Wait for locked file to become unlocked unlock_retries = 0 while os.path.exists(dataset_list_file) and self.check_object_locked( dataset_list_file): unlock_retries += 1 assert unlock_retries > DBUpdater.MAX_RETRIES, 'Timed out waiting for list file %s to be unlocked' % dataset_list_file logger.debug('Waiting for locked list file %s to become unlocked', dataset_list_file) time.sleep(DBUpdater.LOCK_WAIT) if os.path.exists(dataset_list_file): logger.info('Loading existing list file %s', dataset_list_file) shelf = shelve.open(dataset_list_file) dataset_list = shelf['dataset_list'] shelf.close() else: self.lock_object(dataset_list_file) shelf = shelve.open(dataset_list_file) logger.info('Creating new list file %s', dataset_list_file) # Create master list of datasets logger.info('Searching for datasets in %s', self.source_dir) if self.follow_symbolic_links: command = "find -L %s -name 'scene01' | sort" % self.source_dir else: command = "find %s -name 'scene01' | sort" % self.source_dir logger.debug('executing "%s"', command) result = execute(command) assert not result['returncode'], '"%s" failed: %s' % ( command, result['stderr']) dataset_list = [ os.path.abspath(re.sub('/scene01$', '', scenedir)) for scenedir in result['stdout'].split('\n') if scenedir ] # Save dataset dict for other instances to use logger.debug('Saving new dataset list file %s', dataset_list_file) # assert not os.path.getsize(dataset_list_file), 'File %s has already been written to' shelf['dataset_list'] = dataset_list shelf.close() self.unlock_object(dataset_list_file) # log_multiline(logger.debug, dataset_list, 'dataset_list') print 'Not here' for dataset_dir in dataset_list: if not os.path.isdir(os.path.join(dataset_dir, 'scene01')): logger.warning('Skipping nonexistent dataset %s', dataset_dir) continue try: if re.search('PQ', dataset_dir): update_pqa_dataset_record.update_dataset_record( dataset_dir, db_cursor, self.refresh, self.debug) elif re.search('FC', dataset_dir): update_fc_dataset_record.update_dataset_record( dataset_dir, db_cursor, self.refresh, self.debug) else: update_dataset_record.update_dataset_record( dataset_dir, db_cursor, self.refresh, self.debug) self.db_connection.commit() except Exception, e: logger.warning('Database operation failed for %s: %s', dataset_dir, e.message) self.db_connection.rollback()
def process_dataset(dataset_info): log_multiline(logger.debug, dataset_info, 'Dataset values', '\t') def find_file(dataset_dir, file_pattern): # logger.debug('find_file(%s, %s) called', dataset_dir, file_pattern) assert os.path.isdir( dataset_dir), '%s is not a valid directory' % dataset_dir filelist = [ filename for filename in os.listdir(dataset_dir) if re.match(file_pattern, filename) ] # logger.debug('filelist = %s', filelist) assert len( filelist ) == 1, 'Unable to find unique match for file pattern %s' % file_pattern return os.path.join(dataset_dir, filelist[0]) def get_tile_index_range(dataset_filename): """Returns integer (xmin, ymin, xmax, ymax) tuple for input GDAL dataset filename""" dataset = gdal.Open(dataset_filename) assert dataset, 'Unable to open dataset %s' % dataset_filename spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(dataset.GetProjection()) geotransform = dataset.GetGeoTransform() logger.debug('geotransform = %s', geotransform) # latlong_spatial_reference = spatial_reference.CloneGeogCS() tile_spatial_reference = osr.SpatialReference() s = re.match('EPSG:(\d+)', tile_type_info['crs']) if s: epsg_code = int(s.group(1)) logger.debug('epsg_code = %d', epsg_code) assert tile_spatial_reference.ImportFromEPSG( epsg_code ) == 0, 'Invalid EPSG code for tile projection' else: assert tile_spatial_reference.ImportFromWkt( tile_type_info['crs'] ), 'Invalid WKT for tile projection' logger.debug('Tile WKT = %s', tile_spatial_reference.ExportToWkt()) coord_transform_to_tile = osr.CoordinateTransformation( spatial_reference, tile_spatial_reference) # Upper Left xmin, ymax, _z = coord_transform_to_tile.TransformPoint( geotransform[0], geotransform[3], 0) # Lower Right xmax, ymin, _z = coord_transform_to_tile.TransformPoint( geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] + geotransform[5] * dataset.RasterYSize, 0) logger.debug( 'Coordinates: xmin = %f, ymin = %f, xmax = %f, ymax = %f', xmin, ymin, xmax, ymax) return (int( floor((xmin - tile_type_info['x_origin']) / tile_type_info['x_size'])), int( floor((ymin - tile_type_info['y_origin']) / tile_type_info['y_size'])), int( ceil((xmax - tile_type_info['x_origin']) / tile_type_info['x_size'])), int( ceil((ymax - tile_type_info['y_origin']) / tile_type_info['y_size']))) def find_tiles(x_index=None, y_index=None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) where (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and tile_type_id = %(tile_type_id)s and dataset_id = %(fc_dataset_id)s and ctime is not null -- TODO: Remove this after reload ; """ params = { 'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'fc_dataset_id': dataset_info['fc_dataset_id'] } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { 'x_index': record[1], 'y_index': record[2], 'tile_type_id': record[3], 'tile_pathname': record[4], 'dataset_id': record[5], 'tile_class_id': record[6], 'tile_size': record[7] } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, 'tile_info', '\t') return tile_info def get_vrt_band_list(): """Returns list of band information to create tiles """ logger.debug('get_vrt_band_list() called') vrt_band_list = [] #=============================================================================== # sensor_dict = self.bands[tile_type_id][(dataset_info['satellite_tag'], dataset_info['sensor_name'])] # # log_multiline(logger.debug, sensor, 'Sensor', '\t') # for file_number in sorted(sensor_dict.keys()): # band_info = sensor_dict[file_number] # if band_info['level_name'] == 'NBAR': # dataset_dir = dataset_info['nbar_dataset_path'] # dataset_id = dataset_info['nbar_dataset_id'] # processing_level = dataset_info['nbar_level_name'] # nodata_value = dataset_info['nbar_nodata_value'] # resampling_method = dataset_info['nbar_resampling_method'] # elif band_info['level_name'] == 'ORTHO': # dataset_dir = dataset_info['l1t_dataset_path'] # dataset_id = dataset_info['l1t_dataset_id'] # processing_level = dataset_info['l1t_level_name'] # nodata_value = dataset_info['l1t_nodata_value'] # resampling_method = dataset_info['l1t_resampling_method'] # else: # continue # Ignore any pan-chromatic and derived bands # # dataset_dir = os.path.join(dataset_dir, 'scene01') # filename = find_file(dataset_dir, band_info['file_pattern']) # vrt_band_list.append({'file_number': band_info['file_number'], # 'filename': filename, # 'name': band_info['band_name'], # 'dataset_id': dataset_id, # 'band_id': band_info['band_id'], # 'processing_level': processing_level, # 'nodata_value': nodata_value, # 'resampling_method': resampling_method, # 'tile_layer': band_info['tile_layer']}) #=============================================================================== #TODO: Make this able to handle multiple derived layers for band_level in ['FC']: derived_bands = self.bands[tile_type_id][('DERIVED', band_level)] for file_number in sorted(derived_bands.keys()): band_info = derived_bands[file_number] file_pattern = band_info['file_pattern'] dataset_dir = os.path.join( dataset_info['fc_dataset_path'], 'scene01') dataset_id = dataset_info['fc_dataset_id'] filename = find_file(dataset_dir, file_pattern) processing_level = dataset_info['fc_level_name'] nodata_value = dataset_info[ 'fc_nodata_value'] # Should be None for FC resampling_method = dataset_info[ 'fc_resampling_method'] vrt_band_list.append({ 'file_number': None, 'filename': filename, 'name': band_info['band_name'], 'dataset_id': dataset_id, 'band_id': band_info['band_id'], 'processing_level': processing_level, 'nodata_value': nodata_value, 'resampling_method': resampling_method, 'tile_layer': 1 }) log_multiline(logger.debug, vrt_band_list, 'vrt_band_list = %s', '\t') return vrt_band_list def get_tile_has_data(tile_index_range): tile_has_data = {} db_cursor2 = self.db_connection.cursor() sql = """-- Find all PQA tiles which exist for the dataset select x_index, y_index from dataset inner join tile using(dataset_id) where tile_type_id = %(tile_type_id)s and level_id = 3 -- PQA and tile_class_id = 1 -- Tile containing live data and acquisition_id = %(acquisition_id)s """ params = { 'tile_type_id': tile_type_info['tile_type_id'], 'acquisition_id': dataset_info['acquisition_id'] } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_has_data[(x_index, y_index)] = False # Set tile_has_data element to True if PQA tile exists for record in db_cursor2: tile_has_data[(record[0], record[1])] = True return tile_has_data # process_dataset function starts here result = False db_cursor1 = self.db_connection.cursor() logger.info('Processing dataset %s', dataset_info['fc_dataset_path']) vrt_band_stack_basename = '_'.join([ dataset_info['satellite_tag'], re.sub('\W', '', dataset_info['sensor_name']), dataset_info['start_datetime'].date().strftime('%Y%m%d'), '%03d' % dataset_info['x_ref'], '%03d' % dataset_info['y_ref'] ]) + '.vrt' logger.debug('vrt_band_stack_basename = %s', vrt_band_stack_basename) tile_output_root = os.path.join( self.tile_root, tile_type_info['tile_directory'], dataset_info['satellite_tag'] + '_' + re.sub('\W', '', dataset_info['sensor_name'])) logger.debug('tile_output_root = %s', tile_output_root) vrt_band_list = get_vrt_band_list() tile_index_range = get_tile_index_range( vrt_band_list[0] ['filename']) # Find extents of first band dataset tile_count = abs(tile_index_range[2] - tile_index_range[0]) * ( tile_index_range[3] - tile_index_range[1]) # Check whether tiles exist for every band tile_record_count = len(find_tiles()) logger.info('Found %d tile records in database for %d tiles', tile_record_count, tile_count) # Count FC only if tile_record_count == tile_count: logger.info( 'All tiles already exist in database - skipping tile creation for %s', dataset_info['fc_dataset_path']) return result try: #TODO: Create all new acquisition records and commit the transaction here # Use NBAR dataset name for dataset lock (could have been any other level) work_directory = os.path.join( self.temp_dir, os.path.basename(dataset_info['fc_dataset_path'])) tile_has_data = get_tile_has_data(tile_index_range) any_tile_has_data = False for value in tile_has_data.values(): any_tile_has_data |= value if not any_tile_has_data: logger.info( 'No valid PQ tiles found - skipping tile creation for %s', dataset_info['fc_dataset_path']) return result #TODO: Apply lock on path/row instead of on dataset to try to force the same node to process the full depth if not self.lock_object(work_directory): logger.info('Already processing %s - skipping', dataset_info['fc_dataset_path']) return result if self.refresh and os.path.exists(work_directory): shutil.rmtree(work_directory) self.create_directory(work_directory) for processing_level in ['FC']: vrt_band_info_list = [ vrt_band_info for vrt_band_info in vrt_band_list if vrt_band_info['processing_level'] == processing_level ] nodata_value = vrt_band_info_list[0][ 'nodata_value'] # All the same for a given processing_level resampling_method = vrt_band_info_list[0][ 'resampling_method'] # All the same for a given processing_level vrt_band_stack_filename = os.path.join( work_directory, processing_level + '_' + vrt_band_stack_basename) if not os.path.exists(vrt_band_stack_filename ) or self.check_object_locked( vrt_band_stack_filename): # Check whether this dataset is already been processed if not self.lock_object(vrt_band_stack_filename): logger.warning( 'Band stack %s already being processed - skipping.', vrt_band_stack_filename) continue logger.info('Creating %s band stack file %s', processing_level, vrt_band_stack_filename) command_string = 'gdalbuildvrt -separate' if not self.debug: command_string += ' -q' if nodata_value is not None: command_string += ' -srcnodata %d -vrtnodata %d' % ( nodata_value, nodata_value) command_string += ' -overwrite %s %s' % ( vrt_band_stack_filename, ' '.join([ vrt_band_info['filename'] for vrt_band_info in vrt_band_info_list ])) logger.debug('command_string = %s', command_string) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') raise Exception('%s failed', command_string) band_stack_dataset = gdal.Open(vrt_band_stack_filename) assert band_stack_dataset, 'Unable to open VRT %s' % vrt_band_stack_filename band_stack_dataset.SetMetadata({ 'satellite': dataset_info['satellite_tag'], 'sensor': dataset_info['sensor_name'], 'start_datetime': dataset_info['start_datetime'].isoformat(), 'end_datetime': dataset_info['end_datetime'].isoformat(), 'path': '%03d' % dataset_info['x_ref'], 'row': '%03d' % dataset_info['y_ref'] }) for band_index in range(len(vrt_band_info_list)): band = band_stack_dataset.GetRasterBand( band_index + 1) band.SetMetadata({ 'name': vrt_band_info_list[band_index]['name'], 'filename': vrt_band_info_list[band_index]['filename'] }) # Need to set nodata values for each band - can't seem to do it in gdalbuildvrt nodata_value = vrt_band_info_list[band_index][ 'nodata_value'] if nodata_value is not None: band.SetNoDataValue(nodata_value) band_stack_dataset.FlushCache() self.unlock_object(vrt_band_stack_filename) else: logger.info('Band stack %s already exists', vrt_band_stack_filename) band_stack_dataset = gdal.Open(vrt_band_stack_filename) logger.info('Processing %d %s Tiles', tile_count, processing_level) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_extents = ( tile_type_info['x_origin'] + x_index * tile_type_info['x_size'], tile_type_info['y_origin'] + y_index * tile_type_info['y_size'], tile_type_info['x_origin'] + (x_index + 1) * tile_type_info['x_size'], tile_type_info['y_origin'] + (y_index + 1) * tile_type_info['y_size']) logger.debug('tile_extents = %s', tile_extents) tile_output_dir = os.path.join( tile_output_root, re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)), '%04d' % dataset_info['start_datetime'].year) self.create_directory( os.path.join(tile_output_dir, 'mosaic_cache')) tile_output_path = os.path.join( tile_output_dir, '_'.join([ dataset_info['satellite_tag'], re.sub('\W', '', dataset_info['sensor_name']), processing_level, re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)), re.sub( ':', '-', dataset_info['start_datetime']. isoformat()) ]) + tile_type_info['file_extension']) # Check whether this tile has already been processed if not self.lock_object(tile_output_path): logger.warning( 'Tile %s already being processed - skipping.', tile_output_path) continue # Only generate tile file if PQA tile or tile contains data if tile_has_data.get( (x_index, y_index)) is None or tile_has_data[( x_index, y_index)]: command_string = 'gdalwarp' if not self.debug: command_string += ' -q' command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % ( tile_type_info['crs'], tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], tile_type_info['x_pixel_size'], tile_type_info['y_pixel_size'], resampling_method) if nodata_value is not None: command_string += ' -srcnodata %d -dstnodata %d' % ( nodata_value, nodata_value) command_string += ' -of %s' % tile_type_info[ 'file_format'] if tile_type_info['format_options']: for format_option in tile_type_info[ 'format_options'].split(','): command_string += ' -co %s' % format_option command_string += ' -overwrite %s %s' % ( vrt_band_stack_filename, tile_output_path) logger.debug('command_string = %s', command_string) retry = True while retry: result = execute( command_string=command_string) if result['stdout']: log_multiline( logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result[ 'returncode']: # Return code is non-zero log_multiline( logger.error, result['stderr'], 'stderr from ' + command_string, '\t') # Work-around for gdalwarp error writing LZW-compressed GeoTIFFs if (string.find( result['stderr'], 'LZW') > -1 # LZW-related error and tile_type_info['file_format'] == 'GTiff' # Output format is GeoTIFF and string.find( tile_type_info[ 'format_options'], 'COMPRESS=LZW') > -1 ): # LZW compression requested temp_tile_path = os.path.join( os.path.dirname( vrt_band_stack_filename), os.path.basename( tile_output_path)) # Write uncompressed tile to a temporary path command_string = string.replace( command_string, 'COMPRESS=LZW', 'COMPRESS=NONE') command_string = string.replace( command_string, tile_output_path, temp_tile_path) # Translate temporary uncompressed tile to final compressed tile command_string += '; gdal_translate -of GTiff' if tile_type_info[ 'format_options']: for format_option in tile_type_info[ 'format_options'].split( ','): command_string += ' -co %s' % format_option command_string += ' %s %s' % ( temp_tile_path, tile_output_path) else: raise Exception( '%s failed', command_string) else: retry = False # No retry on success # Set tile metadata tile_dataset = gdal.Open(tile_output_path) assert tile_dataset, 'Unable to open tile dataset %s' % tile_output_path # Check whether PQA tile contains any contiguous data if tile_has_data.get( (x_index, y_index )) is None and processing_level == 'PQA': tile_has_data[(x_index, y_index)] = ( (numpy.bitwise_and( tile_dataset.GetRasterBand( 1).ReadAsArray(), 1 << LandsatTiler.CONTIGUITY_BIT_INDEX)) > 0).any() logger.debug( '%s tile (%d, %d) has data = %s', processing_level, x_index, y_index, tile_has_data[(x_index, y_index)]) # Only bother setting metadata if tile has valid data if tile_has_data[(x_index, y_index)]: metadata = band_stack_dataset.GetMetadata() metadata['x_index'] = str(x_index) metadata['y_index'] = str(y_index) tile_dataset.SetMetadata(metadata) # Set tile band metadata for band_index in range( len(vrt_band_info_list)): scene_band = band_stack_dataset.GetRasterBand( band_index + 1) tile_band = tile_dataset.GetRasterBand( band_index + 1) tile_band.SetMetadata( scene_band.GetMetadata()) # Need to set nodata values for each band - gdalwarp doesn't copy it across nodata_value = vrt_band_info_list[ band_index]['nodata_value'] if nodata_value is not None: tile_band.SetNoDataValue( nodata_value) logger.info('Processed %s Tile (%d, %d)', processing_level, x_index, y_index) else: logger.info( 'Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index) else: logger.info('Skipped empty %s Tile (%d, %d)', processing_level, x_index, y_index) # Change permissions on any recently created files command_string = 'chmod -R a-wxs,u+rwX,g+rsX %s; chown -R %s %s' % ( tile_output_dir, TILE_OWNER, tile_output_dir) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') # N.B: command may return errors for files not owned by user if result['returncode']: log_multiline(logger.warning, result['stderr'], 'stderr from ' + command_string, '\t') # raise Exception('%s failed', command_string) self.unlock_object(tile_output_path) # Check whether tile contains any data if tile_has_data[(x_index, y_index)]: tile_class_id = 1 # Valid tile tile_size = self.getFileSizeMB( tile_output_path) else: # PQA tile contains no data # Remove empty PQA tile file tile_class_id = 2 # Dummy tile record with no file self.remove(tile_output_path) tile_size = 0 sql = """-- Insert new tile_footprint record if necessary insert into tile_footprint ( x_index, y_index, tile_type_id, x_min, y_min, x_max, y_max ) select %(x_index)s, %(y_index)s, %(tile_type_id)s, %(x_min)s, %(y_min)s, %(x_max)s, %(y_max)s where not exists (select x_index, y_index, tile_type_id from tile_footprint where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s); -- Update any existing tile record update tile set tile_pathname = %(tile_pathname)s, tile_class_id = %(tile_class_id)s, tile_size = %(tile_size)s, ctime = now() where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s; -- Insert new tile record if necessary insert into tile ( tile_id, x_index, y_index, tile_type_id, dataset_id, tile_pathname, tile_class_id, tile_size, ctime ) select nextval('tile_id_seq'::regclass), %(x_index)s, %(y_index)s, %(tile_type_id)s, %(dataset_id)s, %(tile_pathname)s, %(tile_class_id)s, %(tile_size)s, now() where not exists (select tile_id from tile where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s ); """ params = { 'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'x_min': tile_extents[0], 'y_min': tile_extents[1], 'x_max': tile_extents[2], 'y_max': tile_extents[3], 'dataset_id': vrt_band_info_list[0] ['dataset_id'], # All the same 'tile_pathname': tile_output_path, 'tile_class_id': tile_class_id, 'tile_size': tile_size } log_multiline(logger.debug, db_cursor1.mogrify(sql, params), 'SQL', '\t') db_cursor1.execute(sql, params) self.unlock_object(work_directory) if not self.debug: shutil.rmtree(work_directory) result = True self.db_connection.commit() logger.info('Dataset tiling completed - Transaction committed') return result except Exception, e: logger.error('Tiling operation failed: %s', e.message) # Keep on processing self.db_connection.rollback() if self.debug: raise
def create_tiles(self, filename=None, level_name=None, tile_type_id=None): # Set default values to instance values filename = filename or self.filename level_name = level_name or self.level_name tile_type_id = tile_type_id or self.default_tile_type_id nodata_value = None tile_type_info = self.tile_type_dict[tile_type_id] dem_band_info = self.bands[tile_type_id].get(('DERIVED', level_name)) assert dem_band_info, 'No band level information defined for level %s' % level_name def find_tiles(x_index=None, y_index=None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using(dataset_id) inner join processing_level using(level_id) where tile_type_id = %(tile_type_id)s and (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and level_name = %(level_name)s and ctime is not null ; """ params = { 'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'level_name': level_name } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), 'SQL', '\t') db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { 'x_index': record[1], 'y_index': record[2], 'tile_type_id': record[3], 'tile_pathname': record[4], 'dataset_id': record[5], 'tile_class_id': record[6], 'tile_size': record[7] } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, 'tile_info', '\t') return tile_info # Function create_tiles starts here db_cursor = self.db_connection.cursor() dataset = gdal.Open(filename) assert dataset, 'Unable to open dataset %s' % filename spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(dataset.GetProjection()) geotransform = dataset.GetGeoTransform() logger.debug('geotransform = %s', geotransform) latlong_spatial_reference = spatial_reference.CloneGeogCS() coord_transform_to_latlong = osr.CoordinateTransformation( spatial_reference, latlong_spatial_reference) tile_spatial_reference = osr.SpatialReference() s = re.match('EPSG:(\d+)', tile_type_info['crs']) if s: epsg_code = int(s.group(1)) logger.debug('epsg_code = %d', epsg_code) assert tile_spatial_reference.ImportFromEPSG( epsg_code) == 0, 'Invalid EPSG code for tile projection' else: assert tile_spatial_reference.ImportFromWkt( tile_type_info['crs']), 'Invalid WKT for tile projection' logger.debug('Tile WKT = %s', tile_spatial_reference.ExportToWkt()) coord_transform_to_tile = osr.CoordinateTransformation( spatial_reference, tile_spatial_reference) # Need to keep tile and lat/long references separate even though they may be equivalent # Upper Left ul_x, ul_y = geotransform[0], geotransform[3] ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint( ul_x, ul_y, 0) tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint( ul_x, ul_y, 0) # Upper Right ur_x, ur_y = geotransform[ 0] + geotransform[1] * dataset.RasterXSize, geotransform[3] ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint( ur_x, ur_y, 0) tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint( ur_x, ur_y, 0) # Lower Right lr_x, lr_y = geotransform[ 0] + geotransform[1] * dataset.RasterXSize, geotransform[ 3] + geotransform[5] * dataset.RasterYSize lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint( lr_x, lr_y, 0) tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint( lr_x, lr_y, 0) # Lower Left ll_x, ll_y = geotransform[ 0], geotransform[3] + geotransform[5] * dataset.RasterYSize ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint( ll_x, ll_y, 0) tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint( ll_x, ll_y, 0) tile_min_x = min(tile_ul_x, tile_ll_x) tile_max_x = max(tile_ur_x, tile_lr_x) tile_min_y = min(tile_ll_y, tile_lr_y) tile_max_y = max(tile_ul_y, tile_ur_y) tile_index_range = (int( floor((tile_min_x - tile_type_info['x_origin']) / tile_type_info['x_size'])), int( floor( (tile_min_y - tile_type_info['y_origin']) / tile_type_info['y_size'])), int( ceil( (tile_max_x - tile_type_info['x_origin']) / tile_type_info['x_size'])), int( ceil( (tile_max_y - tile_type_info['y_origin']) / tile_type_info['y_size']))) sql = """-- Find dataset_id for given path select dataset_id from dataset where dataset_path like '%%' || %(basename)s """ params = {'basename': os.path.basename(filename)} log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) result = db_cursor.fetchone() if result: # Record already exists dataset_id = result[0] if self.refresh: logger.info('Updating existing record for %s', filename) sql = """ update dataset set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s where dataset_id = %(dataset_id)s; select %(dataset_id)s """ else: logger.info('Skipping existing record for %s', filename) return else: # Record doesn't already exist logger.info('Creating new record for %s', filename) dataset_id = None sql = """-- Create new dataset record insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels ) select nextval('dataset_id_seq') as dataset_id, null as acquisition_id, %(dataset_path)s, (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s where not exists (select dataset_id from dataset where dataset_path = %(dataset_path)s ); select dataset_id from dataset where dataset_path = %(dataset_path)s ; """ dataset_size = self.getFileSizekB( filename) # Need size in kB to match other datasets # same params for insert or update params = { 'dataset_id': dataset_id, 'dataset_path': filename, 'processing_level': level_name, 'datetime_processed': None, 'dataset_size': dataset_size, 'll_lon': ll_lon, 'll_lat': ll_lat, 'lr_lon': lr_lon, 'lr_lat': lr_lat, 'ul_lon': ul_lon, 'ul_lat': ul_lat, 'ur_lon': ur_lon, 'ur_lat': ur_lat, 'crs': dataset.GetProjection(), 'll_x': ll_x, 'll_y': ll_y, 'lr_x': lr_x, 'lr_y': lr_y, 'ul_x': ul_x, 'ul_y': ul_y, 'ur_x': ur_x, 'ur_y': ur_y, 'x_pixels': dataset.RasterXSize, 'y_pixels': dataset.RasterYSize, 'gcp_count': None, 'mtl_text': None, 'cloud_cover': None } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) result = db_cursor.fetchone() # Retrieve new dataset_id if required dataset_id = dataset_id or result[0] tile_output_root = os.path.join(self.tile_root, tile_type_info['tile_directory'], level_name, os.path.basename(filename)) logger.debug('tile_output_root = %s', tile_output_root) self.create_directory(tile_output_root) work_directory = os.path.join(self.temp_dir, os.path.basename(filename)) logger.debug('work_directory = %s', work_directory) self.create_directory(work_directory) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_info = find_tiles(x_index, y_index) if tile_info: logger.info('Skipping existing tile (%d, %d)', x_index, y_index) continue tile_basename = '_'.join([ level_name, re.sub('\+', '', '%+04d_%+04d' % (x_index, y_index)) ]) + tile_type_info['file_extension'] tile_output_path = os.path.join(tile_output_root, tile_basename) # Check whether this tile has already been processed if not self.lock_object(tile_output_path): logger.warning( 'Tile %s already being processed - skipping.', tile_output_path) continue try: self.remove(tile_output_path) temp_tile_path = os.path.join(self.temp_dir, tile_basename) tile_extents = (tile_type_info['x_origin'] + x_index * tile_type_info['x_size'], tile_type_info['y_origin'] + y_index * tile_type_info['y_size'], tile_type_info['x_origin'] + (x_index + 1) * tile_type_info['x_size'], tile_type_info['y_origin'] + (y_index + 1) * tile_type_info['y_size']) logger.debug('tile_extents = %s', tile_extents) command_string = 'gdalwarp' if not self.debug: command_string += ' -q' command_string += ' -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s' % ( tile_type_info['crs'], tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], tile_type_info['x_pixel_size'], tile_type_info['y_pixel_size'], dem_band_info[10]['resampling_method']) if nodata_value is not None: command_string += ' -srcnodata %d -dstnodata %d' % ( nodata_value, nodata_value) command_string += ' -of %s' % tile_type_info['file_format'] if tile_type_info['format_options']: for format_option in tile_type_info[ 'format_options'].split(','): command_string += ' -co %s' % format_option command_string += ' -overwrite %s %s' % (filename, temp_tile_path) logger.debug('command_string = %s', command_string) result = execute(command_string=command_string) if result['stdout']: log_multiline(logger.info, result['stdout'], 'stdout from ' + command_string, '\t') if result['returncode']: log_multiline(logger.error, result['stderr'], 'stderr from ' + command_string, '\t') raise Exception('%s failed', command_string) temp_dataset = gdal.Open(temp_tile_path) gdal_driver = gdal.GetDriverByName( tile_type_info['file_format']) #output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create( tile_output_path, temp_dataset.RasterXSize, temp_dataset.RasterYSize, len(dem_band_info), temp_dataset.GetRasterBand(1).DataType, tile_type_info['format_options'].split(',')) assert output_dataset, 'Unable to open output dataset %s' % output_dataset output_geotransform = temp_dataset.GetGeoTransform() output_dataset.SetGeoTransform(output_geotransform) output_dataset.SetProjection(temp_dataset.GetProjection()) elevation_array = temp_dataset.GetRasterBand( 1).ReadAsArray() del temp_dataset self.remove(temp_tile_path) pixel_x_size = abs(output_geotransform[1]) pixel_y_size = abs(output_geotransform[5]) x_m_array, y_m_array = self.get_pixel_size_grids( output_dataset) dzdx_array = ndimage.sobel(elevation_array, axis=1) / ( 8. * abs(output_geotransform[1])) dzdx_array = numexpr.evaluate( "dzdx_array * pixel_x_size / x_m_array") del x_m_array dzdy_array = ndimage.sobel(elevation_array, axis=0) / ( 8. * abs(output_geotransform[5])) dzdy_array = numexpr.evaluate( "dzdy_array * pixel_y_size / y_m_array") del y_m_array for band_file_number in sorted(dem_band_info.keys()): output_band_number = dem_band_info[band_file_number][ 'tile_layer'] output_band = output_dataset.GetRasterBand( output_band_number) if band_file_number == 10: # Elevation output_band.WriteArray(elevation_array) del elevation_array elif band_file_number == 20: # Slope hypotenuse_array = numpy.hypot( dzdx_array, dzdy_array) slope_array = numexpr.evaluate( "arctan(hypotenuse_array) / RADIANS_PER_DEGREE" ) del hypotenuse_array output_band.WriteArray(slope_array) del slope_array elif band_file_number == 30: # Aspect # Convert angles from conventional radians to compass heading 0-360 aspect_array = numexpr.evaluate( "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360" ) output_band.WriteArray(aspect_array) del aspect_array if nodata_value is not None: output_band.SetNoDataValue(nodata_value) output_band.FlushCache() #=========================================================== # # This is not strictly necessary - copy metadata to output dataset # output_dataset_metadata = temp_dataset.GetMetadata() # if output_dataset_metadata: # output_dataset.SetMetadata(output_dataset_metadata) # log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') #=========================================================== output_dataset.FlushCache() del output_dataset logger.info('Finished writing dataset %s', tile_output_path) tile_size = self.getFileSizeMB(tile_output_path) sql = """-- Insert new tile_footprint record if necessary insert into tile_footprint ( x_index, y_index, tile_type_id, x_min, y_min, x_max, y_max ) select %(x_index)s, %(y_index)s, %(tile_type_id)s, %(x_min)s, %(y_min)s, %(x_max)s, %(y_max)s where not exists (select x_index, y_index, tile_type_id from tile_footprint where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s); -- Update any existing tile record update tile set tile_pathname = %(tile_pathname)s, tile_class_id = %(tile_class_id)s, tile_size = %(tile_size)s, ctime = now() where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s; -- Insert new tile record if necessary insert into tile ( tile_id, x_index, y_index, tile_type_id, dataset_id, tile_pathname, tile_class_id, tile_size, ctime ) select nextval('tile_id_seq'::regclass), %(x_index)s, %(y_index)s, %(tile_type_id)s, %(dataset_id)s, %(tile_pathname)s, %(tile_class_id)s, %(tile_size)s, now() where not exists (select tile_id from tile where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s ); """ params = { 'x_index': x_index, 'y_index': y_index, 'tile_type_id': tile_type_info['tile_type_id'], 'x_min': tile_extents[0], 'y_min': tile_extents[1], 'x_max': tile_extents[2], 'y_max': tile_extents[3], 'dataset_id': dataset_id, 'tile_pathname': tile_output_path, 'tile_class_id': 1, 'tile_size': tile_size } log_multiline(logger.debug, db_cursor.mogrify(sql, params), 'SQL', '\t') db_cursor.execute(sql, params) self.db_connection.commit() finally: self.unlock_object(tile_output_path) logger.info('Finished creating all tiles')
def reproject(self): """Reproject the scene dataset into tile coordinate reference system and extent. This method uses gdalwarp to do the reprojection.""" # pylint: disable=too-many-locals x_origin = self.tile_type_info['x_origin'] y_origin = self.tile_type_info['y_origin'] x_size = self.tile_type_info['x_size'] y_size = self.tile_type_info['y_size'] x_pixel_size = self.tile_type_info['x_pixel_size'] y_pixel_size = self.tile_type_info['y_pixel_size'] x0 = x_origin + self.tile_footprint[0] * x_size y0 = y_origin + self.tile_footprint[1] * y_size tile_extents = (x0, y0, x0 + x_size, y0 + y_size) # Make the tile_extents visible to tile_record self.tile_extents = tile_extents nodata_value = self.band_stack.nodata_list[0] #Assume resampling method is the same for all bands, this is #because resampling_method is per proessing_level #TODO assert this is the case first_file_number = self.band_stack.band_dict.keys()[0] resampling_method = ( self.band_stack.band_dict[first_file_number]['resampling_method'] ) if nodata_value is not None: #TODO: Check this works for PQA, where #band_dict[10]['resampling_method'] == None nodata_spec = ["-srcnodata", "%d" % nodata_value, "-dstnodata", "%d" % nodata_value ] else: nodata_spec = [] format_spec = [] for format_option in self.tile_type_info['format_options'].split(','): format_spec.extend(["-co", "%s" % format_option]) reproject_cmd = ["gdalwarp", "-q", "-t_srs", "%s" % self.tile_type_info['crs'], "-te", "%f" % tile_extents[0], "%f" % tile_extents[1], "%f" % tile_extents[2], "%f" % tile_extents[3], "-tr", "%f" % x_pixel_size, "%f" % y_pixel_size, "-tap", "-tap", "-r", "%s" % resampling_method, ] reproject_cmd.extend(nodata_spec) reproject_cmd.extend(format_spec) reproject_cmd.extend(["-overwrite", "%s" % self.band_stack.vrt_name, "%s" % self.temp_tile_output_path ]) result = execute(reproject_cmd, shell=False) if result['returncode'] != 0: raise DatasetError('Unable to perform gdalwarp: ' + '"%s" failed: %s' % (reproject_cmd, result['stderr']))
def create_tiles(self, filename=None, level_name=None, tile_type_id=None): # Set default values to instance values filename = filename or self.filename level_name = level_name or self.level_name tile_type_id = tile_type_id or self.default_tile_type_id nodata_value = None tile_type_info = self.tile_type_dict[tile_type_id] dem_band_info = self.bands[tile_type_id].get(("DERIVED", level_name)) assert dem_band_info, "No band level information defined for level %s" % level_name def find_tiles(x_index=None, y_index=None): """Find any tile records for current dataset returns dict of tile information keyed by tile_id """ db_cursor2 = self.db_connection.cursor() sql = """-- Check for any existing tiles select tile_id, x_index, y_index, tile_type_id, tile_pathname, dataset_id, tile_class_id, tile_size from tile_footprint inner join tile using(x_index, y_index, tile_type_id) inner join dataset using(dataset_id) inner join processing_level using(level_id) where tile_type_id = %(tile_type_id)s and (%(x_index)s is null or x_index = %(x_index)s) and (%(y_index)s is null or y_index = %(y_index)s) and level_name = %(level_name)s and ctime is not null ; """ params = { "x_index": x_index, "y_index": y_index, "tile_type_id": tile_type_info["tile_type_id"], "level_name": level_name, } log_multiline(logger.debug, db_cursor2.mogrify(sql, params), "SQL", "\t") db_cursor2.execute(sql, params) tile_info = {} for record in db_cursor2: tile_info_dict = { "x_index": record[1], "y_index": record[2], "tile_type_id": record[3], "tile_pathname": record[4], "dataset_id": record[5], "tile_class_id": record[6], "tile_size": record[7], } tile_info[record[0]] = tile_info_dict # Keyed by tile_id log_multiline(logger.debug, tile_info, "tile_info", "\t") return tile_info # Function create_tiles starts here db_cursor = self.db_connection.cursor() dataset = gdal.Open(filename) assert dataset, "Unable to open dataset %s" % filename spatial_reference = osr.SpatialReference() spatial_reference.ImportFromWkt(dataset.GetProjection()) geotransform = dataset.GetGeoTransform() logger.debug("geotransform = %s", geotransform) latlong_spatial_reference = spatial_reference.CloneGeogCS() coord_transform_to_latlong = osr.CoordinateTransformation(spatial_reference, latlong_spatial_reference) tile_spatial_reference = osr.SpatialReference() s = re.match("EPSG:(\d+)", tile_type_info["crs"]) if s: epsg_code = int(s.group(1)) logger.debug("epsg_code = %d", epsg_code) assert tile_spatial_reference.ImportFromEPSG(epsg_code) == 0, "Invalid EPSG code for tile projection" else: assert tile_spatial_reference.ImportFromWkt(tile_type_info["crs"]), "Invalid WKT for tile projection" logger.debug("Tile WKT = %s", tile_spatial_reference.ExportToWkt()) coord_transform_to_tile = osr.CoordinateTransformation(spatial_reference, tile_spatial_reference) # Need to keep tile and lat/long references separate even though they may be equivalent # Upper Left ul_x, ul_y = geotransform[0], geotransform[3] ul_lon, ul_lat, _z = coord_transform_to_latlong.TransformPoint(ul_x, ul_y, 0) tile_ul_x, tile_ul_y, _z = coord_transform_to_tile.TransformPoint(ul_x, ul_y, 0) # Upper Right ur_x, ur_y = geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] ur_lon, ur_lat, _z = coord_transform_to_latlong.TransformPoint(ur_x, ur_y, 0) tile_ur_x, tile_ur_y, _z = coord_transform_to_tile.TransformPoint(ur_x, ur_y, 0) # Lower Right lr_x, lr_y = ( geotransform[0] + geotransform[1] * dataset.RasterXSize, geotransform[3] + geotransform[5] * dataset.RasterYSize, ) lr_lon, lr_lat, _z = coord_transform_to_latlong.TransformPoint(lr_x, lr_y, 0) tile_lr_x, tile_lr_y, _z = coord_transform_to_tile.TransformPoint(lr_x, lr_y, 0) # Lower Left ll_x, ll_y = geotransform[0], geotransform[3] + geotransform[5] * dataset.RasterYSize ll_lon, ll_lat, _z = coord_transform_to_latlong.TransformPoint(ll_x, ll_y, 0) tile_ll_x, tile_ll_y, _z = coord_transform_to_tile.TransformPoint(ll_x, ll_y, 0) tile_min_x = min(tile_ul_x, tile_ll_x) tile_max_x = max(tile_ur_x, tile_lr_x) tile_min_y = min(tile_ll_y, tile_lr_y) tile_max_y = max(tile_ul_y, tile_ur_y) tile_index_range = ( int(floor((tile_min_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])), int(floor((tile_min_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])), int(ceil((tile_max_x - tile_type_info["x_origin"]) / tile_type_info["x_size"])), int(ceil((tile_max_y - tile_type_info["y_origin"]) / tile_type_info["y_size"])), ) sql = """-- Find dataset_id for given path select dataset_id from dataset where dataset_path like '%%' || %(basename)s """ params = {"basename": os.path.basename(filename)} log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) result = db_cursor.fetchone() if result: # Record already exists dataset_id = result[0] if self.refresh: logger.info("Updating existing record for %s", filename) sql = """ update dataset set level_id = (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), datetime_processed = %(datetime_processed)s, dataset_size = %(dataset_size)s, crs = %(crs)s, ll_x = %(ll_x)s, ll_y = %(ll_y)s, lr_x = %(lr_x)s, lr_y = %(lr_y)s, ul_x = %(ul_x)s, ul_y = %(ul_y)s, ur_x = %(ur_x)s, ur_y = %(ur_y)s, x_pixels = %(x_pixels)s, y_pixels = %(y_pixels)s where dataset_id = %(dataset_id)s; select %(dataset_id)s """ else: logger.info("Skipping existing record for %s", filename) return else: # Record doesn't already exist logger.info("Creating new record for %s", filename) dataset_id = None sql = """-- Create new dataset record insert into dataset( dataset_id, acquisition_id, dataset_path, level_id, datetime_processed, dataset_size, crs, ll_x, ll_y, lr_x, lr_y, ul_x, ul_y, ur_x, ur_y, x_pixels, y_pixels ) select nextval('dataset_id_seq') as dataset_id, null as acquisition_id, %(dataset_path)s, (select level_id from processing_level where upper(level_name) = upper(%(processing_level)s)), %(datetime_processed)s, %(dataset_size)s, %(crs)s, %(ll_x)s, %(ll_y)s, %(lr_x)s, %(lr_y)s, %(ul_x)s, %(ul_y)s, %(ur_x)s, %(ur_y)s, %(x_pixels)s, %(y_pixels)s where not exists (select dataset_id from dataset where dataset_path = %(dataset_path)s ); select dataset_id from dataset where dataset_path = %(dataset_path)s ; """ dataset_size = self.getFileSizekB(filename) # Need size in kB to match other datasets # same params for insert or update params = { "dataset_id": dataset_id, "dataset_path": filename, "processing_level": level_name, "datetime_processed": None, "dataset_size": dataset_size, "ll_lon": ll_lon, "ll_lat": ll_lat, "lr_lon": lr_lon, "lr_lat": lr_lat, "ul_lon": ul_lon, "ul_lat": ul_lat, "ur_lon": ur_lon, "ur_lat": ur_lat, "crs": dataset.GetProjection(), "ll_x": ll_x, "ll_y": ll_y, "lr_x": lr_x, "lr_y": lr_y, "ul_x": ul_x, "ul_y": ul_y, "ur_x": ur_x, "ur_y": ur_y, "x_pixels": dataset.RasterXSize, "y_pixels": dataset.RasterYSize, "gcp_count": None, "mtl_text": None, "cloud_cover": None, } log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) result = db_cursor.fetchone() # Retrieve new dataset_id if required dataset_id = dataset_id or result[0] tile_output_root = os.path.join( self.tile_root, tile_type_info["tile_directory"], level_name, os.path.basename(filename) ) logger.debug("tile_output_root = %s", tile_output_root) self.create_directory(tile_output_root) work_directory = os.path.join(self.temp_dir, os.path.basename(filename)) logger.debug("work_directory = %s", work_directory) self.create_directory(work_directory) for x_index in range(tile_index_range[0], tile_index_range[2]): for y_index in range(tile_index_range[1], tile_index_range[3]): tile_info = find_tiles(x_index, y_index) if tile_info: logger.info("Skipping existing tile (%d, %d)", x_index, y_index) continue tile_basename = ( "_".join([level_name, re.sub("\+", "", "%+04d_%+04d" % (x_index, y_index))]) + tile_type_info["file_extension"] ) tile_output_path = os.path.join(tile_output_root, tile_basename) # Check whether this tile has already been processed if not self.lock_object(tile_output_path): logger.warning("Tile %s already being processed - skipping.", tile_output_path) continue try: self.remove(tile_output_path) temp_tile_path = os.path.join(self.temp_dir, tile_basename) tile_extents = ( tile_type_info["x_origin"] + x_index * tile_type_info["x_size"], tile_type_info["y_origin"] + y_index * tile_type_info["y_size"], tile_type_info["x_origin"] + (x_index + 1) * tile_type_info["x_size"], tile_type_info["y_origin"] + (y_index + 1) * tile_type_info["y_size"], ) logger.debug("tile_extents = %s", tile_extents) command_string = "gdalwarp" if not self.debug: command_string += " -q" command_string += " -t_srs %s -te %f %f %f %f -tr %f %f -tap -tap -r %s" % ( tile_type_info["crs"], tile_extents[0], tile_extents[1], tile_extents[2], tile_extents[3], tile_type_info["x_pixel_size"], tile_type_info["y_pixel_size"], dem_band_info[10]["resampling_method"], ) if nodata_value is not None: command_string += " -srcnodata %d -dstnodata %d" % (nodata_value, nodata_value) command_string += " -of %s" % tile_type_info["file_format"] if tile_type_info["format_options"]: for format_option in tile_type_info["format_options"].split(","): command_string += " -co %s" % format_option command_string += " -overwrite %s %s" % (filename, temp_tile_path) logger.debug("command_string = %s", command_string) result = execute(command_string=command_string) if result["stdout"]: log_multiline(logger.info, result["stdout"], "stdout from " + command_string, "\t") if result["returncode"]: log_multiline(logger.error, result["stderr"], "stderr from " + command_string, "\t") raise Exception("%s failed", command_string) temp_dataset = gdal.Open(temp_tile_path) gdal_driver = gdal.GetDriverByName(tile_type_info["file_format"]) # output_dataset = gdal_driver.Create(output_tile_path, # nbar_dataset.RasterXSize, nbar_dataset.RasterYSize, # 1, nbar_dataset.GetRasterBand(1).DataType, # tile_type_info['format_options'].split(',')) output_dataset = gdal_driver.Create( tile_output_path, temp_dataset.RasterXSize, temp_dataset.RasterYSize, len(dem_band_info), temp_dataset.GetRasterBand(1).DataType, tile_type_info["format_options"].split(","), ) assert output_dataset, "Unable to open output dataset %s" % output_dataset output_geotransform = temp_dataset.GetGeoTransform() output_dataset.SetGeoTransform(output_geotransform) output_dataset.SetProjection(temp_dataset.GetProjection()) elevation_array = temp_dataset.GetRasterBand(1).ReadAsArray() del temp_dataset self.remove(temp_tile_path) pixel_x_size = abs(output_geotransform[1]) pixel_y_size = abs(output_geotransform[5]) x_m_array, y_m_array = self.get_pixel_size_grids(output_dataset) dzdx_array = ndimage.sobel(elevation_array, axis=1) / (8.0 * abs(output_geotransform[1])) dzdx_array = numexpr.evaluate("dzdx_array * pixel_x_size / x_m_array") del x_m_array dzdy_array = ndimage.sobel(elevation_array, axis=0) / (8.0 * abs(output_geotransform[5])) dzdy_array = numexpr.evaluate("dzdy_array * pixel_y_size / y_m_array") del y_m_array for band_file_number in sorted(dem_band_info.keys()): output_band_number = dem_band_info[band_file_number]["tile_layer"] output_band = output_dataset.GetRasterBand(output_band_number) if band_file_number == 10: # Elevation output_band.WriteArray(elevation_array) del elevation_array elif band_file_number == 20: # Slope hypotenuse_array = numpy.hypot(dzdx_array, dzdy_array) slope_array = numexpr.evaluate("arctan(hypotenuse_array) / RADIANS_PER_DEGREE") del hypotenuse_array output_band.WriteArray(slope_array) del slope_array elif band_file_number == 30: # Aspect # Convert angles from conventional radians to compass heading 0-360 aspect_array = numexpr.evaluate( "(450 - arctan2(dzdy_array, -dzdx_array) / RADIANS_PER_DEGREE) % 360" ) output_band.WriteArray(aspect_array) del aspect_array if nodata_value is not None: output_band.SetNoDataValue(nodata_value) output_band.FlushCache() # =========================================================== # # This is not strictly necessary - copy metadata to output dataset # output_dataset_metadata = temp_dataset.GetMetadata() # if output_dataset_metadata: # output_dataset.SetMetadata(output_dataset_metadata) # log_multiline(logger.debug, output_dataset_metadata, 'output_dataset_metadata', '\t') # =========================================================== output_dataset.FlushCache() del output_dataset logger.info("Finished writing dataset %s", tile_output_path) tile_size = self.getFileSizeMB(tile_output_path) sql = """-- Insert new tile_footprint record if necessary insert into tile_footprint ( x_index, y_index, tile_type_id, x_min, y_min, x_max, y_max ) select %(x_index)s, %(y_index)s, %(tile_type_id)s, %(x_min)s, %(y_min)s, %(x_max)s, %(y_max)s where not exists (select x_index, y_index, tile_type_id from tile_footprint where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s); -- Update any existing tile record update tile set tile_pathname = %(tile_pathname)s, tile_class_id = %(tile_class_id)s, tile_size = %(tile_size)s, ctime = now() where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s; -- Insert new tile record if necessary insert into tile ( tile_id, x_index, y_index, tile_type_id, dataset_id, tile_pathname, tile_class_id, tile_size, ctime ) select nextval('tile_id_seq'::regclass), %(x_index)s, %(y_index)s, %(tile_type_id)s, %(dataset_id)s, %(tile_pathname)s, %(tile_class_id)s, %(tile_size)s, now() where not exists (select tile_id from tile where x_index = %(x_index)s and y_index = %(y_index)s and tile_type_id = %(tile_type_id)s and dataset_id = %(dataset_id)s ); """ params = { "x_index": x_index, "y_index": y_index, "tile_type_id": tile_type_info["tile_type_id"], "x_min": tile_extents[0], "y_min": tile_extents[1], "x_max": tile_extents[2], "y_max": tile_extents[3], "dataset_id": dataset_id, "tile_pathname": tile_output_path, "tile_class_id": 1, "tile_size": tile_size, } log_multiline(logger.debug, db_cursor.mogrify(sql, params), "SQL", "\t") db_cursor.execute(sql, params) self.db_connection.commit() finally: self.unlock_object(tile_output_path) logger.info("Finished creating all tiles")
def __init__(self, dataset_path): """Opens the dataset and extracts metadata. """ self._satellite_tag = "MT" self._satellite_sensor = "MODIS-Terra" self._dataset_file = os.path.abspath(dataset_path) fileName, fileExtension = os.path.splitext(self._dataset_file) if (fileName.endswith("RBQ500")): self._processor_level = "RBQ500" else: self._processor_level = "MOD09" vrt_file = open(dataset_path, 'r') vrt_string = vrt_file.read() vrt_file.close() self._dataset_path = re.search('NETCDF:(.*):', vrt_string).groups(1)[0] self._vrt_file = dataset_path self._ds = gdal.Open(self._dataset_path, gdal.GA_ReadOnly) if not self._ds: raise DatasetError("Unable to open %s" % self.get_dataset_path()) self._dataset_size = os.path.getsize(self._dataset_path) LOGGER.debug('Transform = %s', self._ds.GetGeoTransform()) LOGGER.debug('Projection = %s', self._ds.GetProjection()) LOGGER.debug('RasterXSize = %s', self._ds.RasterXSize) LOGGER.debug('RasterYSize = %s', self._ds.RasterYSize) command = "ncdump -v InputFileGlobalAttributes %s" % self._dataset_path result = execute(command) if result['returncode'] != 0: raise DatasetError('Unable to perform ncdump: ' + '"%s" failed: %s' % (command, result['stderr'])) s = re.sub(r"\s+", "", result['stdout']) LOGGER.debug('%s = %s', command, s) self._rangeendingdate = re.search( 'RANGEENDINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGDATE', s).groups(1)[0] LOGGER.debug('RangeEndingDate = %s', self._rangeendingdate) self._rangeendingtime = re.search( 'RANGEENDINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEENDINGTIME', s).groups(1)[0] LOGGER.debug('RangeEndingTime = %s', self._rangeendingtime) self._rangebeginningdate = re.search( 'RANGEBEGINNINGDATE\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGDATE', s).groups(1)[0] LOGGER.debug('RangeBeginningDate = %s', self._rangebeginningdate) self._rangebeginningtime = re.search( 'RANGEBEGINNINGTIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)\\\\\"\\\\nEND_OBJECT=RANGEBEGINNINGTIME', s).groups(1)[0] LOGGER.debug('RangeBeginningTime = %s', self._rangebeginningtime) self.scene_start_datetime = self._rangebeginningdate + " " + self._rangebeginningtime self.scene_end_datetime = self._rangeendingdate + " " + self._rangeendingtime self._orbitnumber = int( re.search( 'ORBITNUMBER\\\\nCLASS=\\\\\"1\\\\\"\\\\nNUM_VAL=1\\\\nVALUE=(.*)\\\\nEND_OBJECT=ORBITNUMBER', s).groups(1)[0]) LOGGER.debug('OrbitNumber = %d', self._orbitnumber) self._cloud_cover_percentage = float( re.search('Cloudy:\\\\t(.*)\\\\n\\\\tMixed', s).groups(1)[0]) LOGGER.debug('CloudCover = %f', self._cloud_cover_percentage) self._completion_datetime = re.search( 'PRODUCTIONDATETIME\\\\nNUM_VAL=1\\\\nVALUE=\\\\\"(.*)Z\\\\\"\\\\nEND_OBJECT=PRODUCTIONDATETIME', s).groups(1)[0] LOGGER.debug('ProcessedTime = %s', self._completion_datetime) self._metadata = self._ds.GetMetadata('SUBDATASETS') band1 = gdal.Open(self._metadata['SUBDATASET_1_NAME']) # Get Coordinates self._width = band1.RasterXSize self._height = band1.RasterYSize self._gt = band1.GetGeoTransform() self._minx = self._gt[0] self._miny = self._gt[ 3] + self._width * self._gt[4] + self._height * self._gt[5] # from self._maxx = self._gt[ 0] + self._width * self._gt[1] + self._height * self._gt[2] # from self._maxy = self._gt[3] LOGGER.debug('min/max x coordinates (%s, %s)', str(self._minx), str(self._maxx)) # min/max x coordinates LOGGER.debug('min/max y coordinates (%s, %s)', str(self._miny), str(self._maxy)) # min/max y coordinates LOGGER.debug('pixel size (%s, %s)', str(self._gt[1]), str(self._gt[5])) # pixel size self._pixelX = self._width self._pixelY = self._height LOGGER.debug('pixels (%s, %s)', str(self._pixelX), str(self._pixelY)) # pixels self._gcp_count = None self._mtl_text = None self._xml_text = None AbstractDataset.__init__(self)