def submit_rulecheck(sensor_name, target, date): sensordef = count_defs[sensor_name] targetdef = sensordef[target] submitted = [] s = Sensors("", "ua-mac") if "parent" in targetdef: target_dir = os.path.join(sensordef[targetdef["parent"]]["path"], date) target_timestamps = os.listdir(target_dir) disp_name = s.get_display_name(targetdef["parent"]) for ts in target_timestamps: if ts.find("-") > -1 and ts.find("__") > -1: # TODO: and os.listdir(os.path.join(target_dir, ts)): # Get first populated timestamp for the date that has a Clowder ID dataset_name = disp_name+" - "+ts raw_dsid = get_dsid_by_name(dataset_name) if raw_dsid: # Submit associated Clowder ID to rulechecker submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, "ncsa.rulechecker.terra") submitted.append({"name": dataset_name, "id": raw_dsid}) break return json.dumps({ "extractor": "ncsa.rulechecker.terra", "datasets submitted": submitted })
def check_message_individual(self, connector, host, secret_key, resource, parameters): """This is deprecated method that operates on single capture, not field mosaic""" ds_md = get_info(connector, host, secret_key, resource['parent']['id']) s = Sensors('', 'ua-mac', 'rgb_geotiff') if ds_md['name'].find(s.get_display_name()) > -1: timestamp = ds_md['name'].split(" - ")[1] side = 'left' if resource['name'].find("_left") > -1 else 'right' out_csv = self.sensors.get_sensor_path(timestamp, opts=[side], ext='csv') if not os.path.exists(out_csv) or self.overwrite: return CheckMessage.download else: logging.info("output file already exists; skipping %s" % resource['id']) return CheckMessage.ignore
def fullFieldMosaicStitcher(extractor, connector, host, secret_key, resource, rulemap): results = {} full_field_ready = False # full-field queues must have at least this percent of the raw datasets present to trigger tolerance_pct = 100 # full-field queues must have at least this many datasets to trigger min_datasets = 200 # Determine output dataset dsname = resource["dataset_info"]["name"] sensor = dsname.split(" - ")[0] # Map sensor display names to the GeoTIFF stitching target in those sensor datasets, # including directory to look for date subfolder to count # of datasets on that date if os.path.exists('/projects/arpae/terraref/sites'): TERRAREF_BASE = '/projects/arpae/terraref/sites' elif os.path.exists('/home/clowder/sites'): TERRAREF_BASE = '/home/clowder/sites' else: TERRAREF_BASE = '/home/extractor/sites' sensor_lookup = Sensors(TERRAREF_BASE, 'ua-mac') stitchable_sensors = { sensor_lookup.get_display_name('rgb_geotiff'): { "target": "_left.tif", "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='stereoTop').split("/")[:-2])) }, sensor_lookup.get_display_name('ir_geotiff'): { "target": ".tif", "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='flirIrCamera').split("/")[:-2])) }, sensor_lookup.get_display_name('laser3d_heightmap'): { "target": "_west.tif", "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='scanner3DTop').split("/")[:-2])) }, 'scanner3DTop': { "target": "_west.tif", "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='scanner3DTop').split("/")[:-2])) } } if sensor in stitchable_sensors.keys(): timestamp = dsname.split(" - ")[1] date = timestamp.split("__")[0] progress_key = "Full Field -- " + sensor + " - " + date # Is there actually a new left geoTIFF to add to the stack? target_id = None for f in resource['files']: if f['filename'].endswith(stitchable_sensors[sensor]["target"]): target_id = f['id'] target_path = f['filepath'] if not target_id: # If not, no need to trigger anything for now. logging.info("no target geoTIFF found in %s" % dsname) for trig_extractor in rulemap["extractors"]: results[trig_extractor] = { "process": False, "parameters": {} } return results logging.info("[%s] found target: %s" % (progress_key, target_id)) # Fetch all existing file IDs that would be fed into this field mosaic progress = rule_utils.retrieveProgressFromDB(progress_key) # Is current ID already included in the list? If not, add it submit_record = False if 'ids' in progress: ds_count = len(progress['ids'].keys()) if target_id not in progress['ids'].keys(): submit_record = True ds_count += 1 else: # Already seen this geoTIFF, so skip for now. logging.info("previously logged target geoTIFF from %s" % dsname) for trig_extractor in rulemap["extractors"]: results[trig_extractor] = { "process": False, "parameters": {} } else: submit_record = True ds_count = 1 if submit_record: for trig_extractor in rulemap["extractors"]: rule_utils.submitProgressToDB("fullFieldMosaicStitcher", trig_extractor, progress_key, target_id, target_path) if ds_count >= min_datasets: # Check to see if list of geotiffs is same length as list of raw datasets root_dir = stitchable_sensors[sensor]["raw_dir"] if len(connector.mounted_paths) > 0: for source_path in connector.mounted_paths: if root_dir.startswith(source_path): root_dir = root_dir.replace(source_path, connector.mounted_paths[source_path]) date_directory = os.path.join(root_dir, date) date_directory = ("/"+date_directory if not date_directory.startswith("/") else "") raw_file_count = float(subprocess.check_output("ls %s | wc -l" % date_directory, shell=True).strip()) logging.info("found %s raw files in %s" % (int(raw_file_count), date_directory)) if raw_file_count == 0: raise Exception("problem communicating with file system") else: # If we have enough raw files accounted for and more than min_datasets, trigger prog_pct = (len(progress['ids'])/raw_file_count)*100 if prog_pct >= tolerance_pct: full_field_ready = True else: logging.info("found %s/%s necessary geotiffs (%s%%)" % (len(progress['ids']), int(raw_file_count), "{0:.2f}".format(prog_pct))) for trig_extractor in rulemap["extractors"]: results[trig_extractor] = { "process": full_field_ready, "parameters": {} } if full_field_ready: results[trig_extractor]["parameters"]["output_dataset"] = "Full Field - "+date # Write output ID list to a text file output_dir = os.path.dirname(sensor_lookup.get_sensor_path(date, 'fullfield')) logging.info("writing %s_file_ids.json to %s" % (sensor, output_dir)) if not os.path.exists(output_dir): os.makedirs(output_dir) output_file = os.path.join(output_dir, sensor+"_file_paths.json") # Sort IDs by file path before writing to disk # TODO: Eventually alternate every other image so we have half complete and half "underneath" paths = [] for fid in progress['ids'].keys(): paths.append(progress['ids'][fid]) with open(output_file, 'w') as out: json.dump(sorted(paths), out) results[trig_extractor]["parameters"]["file_paths"] = output_file else: for trig_extractor in rulemap["extractors"]: results[trig_extractor] = { "process": False, "parameters": {} } return results
class Transformer(): """Generic class for supporting transformers """ def __init__(self, **kwargs): """Performs initialization of class instance Arguments: kwargs: additional parameters passed in to Transformer """ # pylint: disable=unused-argument self.sensor = None self.args = None @property def default_epsg(self): """Returns the default EPSG code that utilities expect """ return 4326 @property def sensor_name(self): """Returns the name of the sensor we represent """ return configuration.TRANSFORMER_SENSOR @property def supported_image_file_exts(self): """Returns the list of supported image file extension strings (in lower case) """ return ['tif', 'tiff', 'jpg'] def get_image_file_epsg(self, source_path: str) -> str: """Returns the EPSG of the georeferenced image file Arguments: source_path: the path to the image to load the EPSG code from Return: Returns the EPSG code loaded from the file. None is returned if there is a problem or the file doesn't have an EPSG code """ # pylint: disable=no-self-use return tr_get_epsg(source_path) def get_image_file_geobounds(self, source_path: str) -> list: """Uses gdal functionality to retrieve rectilinear boundaries from the file Args: source_path(str): path of the file to get the boundaries from Returns: The upper-left and calculated lower-right boundaries of the image in a list upon success. The values are returned in following order: min_y, max_y, min_x, max_x. A list of numpy.nan is returned if the boundaries can't be determined """ # pylint: disable=no-self-use return tr_image_get_geobounds(source_path) def generate_transformer_md(self) -> dict: """Generates metadata about this transformer Returns: Returns the transformer metadata """ # pylint: disable=no-self-use return { 'version': configuration.TRANSFORMER_VERSION, 'name': configuration.TRANSFORMER_NAME, 'author': configuration.AUTHOR_NAME, 'description': configuration.TRANSFORMER_DESCRIPTION, 'repository': { 'repUrl': configuration.REPOSITORY } } def add_parameters(self, parser: argparse.ArgumentParser) -> None: """Adds processing parameters to existing parameters Arguments: parser: instance of argparse """ # pylint: disable=no-self-use parser.add_argument( '--logging', '-l', nargs='?', default=os.getenv("LOGGING"), help='file or url or logging configuration (default=None)') parser.epilog = configuration.TRANSFORMER_NAME + ' version ' + configuration.TRANSFORMER_VERSION + \ ' author ' + configuration.AUTHOR_NAME + ' ' + configuration.AUTHOR_EMAIL def get_transformer_params(self, args: argparse.Namespace, metadata_list: list) -> dict: """Returns a parameter list for processing data Arguments: args: result of calling argparse.parse_args metadata: the loaded metadata """ # pylint: disable=no-self-use # Setup logging pyc_setup_logging(args.logging) self.args = args # Determine if we're using JSONLD (which we should be) metadata = metadata_list[0] if 'content' in metadata: parse_md = metadata['content'] else: parse_md = metadata terraref_md = tr_get_terraref_metadata( parse_md, configuration.TRANSFORMER_SENSOR) if not terraref_md: return {'code': -5001, 'error': "Unable to load Gantry information from metadata for '%s'" % \ configuration.TRANSFORMER_TYPE} timestamp = __internal__.get_metadata_timestamp(parse_md) if not timestamp: return {'code': -5002, 'error': "Unable to locate timestamp in metadata for '%s'" % \ configuration.TRANSFORMER_TYPE} # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = \ tr_get_season_and_experiment(__internal__.get_datestamp(timestamp), configuration.TRANSFORMER_TYPE, terraref_md) # Setup our sensor self.sensor = Sensors(base='', station='ua-mac', sensor=configuration.TRANSFORMER_SENSOR) leaf_name = self.sensor.get_display_name() # Get our trimmed metadata terraref_md_trim = tr_get_terraref_metadata(parse_md) if updated_experiment is not None: terraref_md_trim['experiment_metadata'] = updated_experiment # Get the list of files, if there are some file_list = [] if args.file_list: for one_file in args.file_list: # Filter out arguments that are obviously not files if not one_file.startswith('-'): file_list.append(one_file) # Prepare our parameters check_md = { 'timestamp': timestamp, 'season': season_name, 'experiment': experiment_name, 'container_name': None, 'target_container_name': leaf_name, # TODO: Is this needed? 'trigger_name': None, 'context_md': terraref_md_trim, 'working_folder': args.working_space, 'list_files': lambda: file_list } return { 'check_md': check_md, 'transformer_md': tr_get_extractor_metadata(terraref_md, configuration.TRANSFORMER_NAME), 'full_md': [parse_md] }
def bin2tif(filename: str, metadata: str, working_space: str) -> dict: """Converts the bin file to a geotiff file ArgumentsL filename: the path to the .bin file metadata: the path to the cleaned metadata file working_space: the path to our working space """ result = {} loaded_json = do_load_json_file(metadata) if not loaded_json: msg = "Unable to load JSON from file '%s'" % metadata logging.error(msg) logging.error(" JSON may be missing or invalid. Returning an error") result['error'] = {'message': msg} result['code'] = -1 return result if 'content' in loaded_json: parse_json = loaded_json['content'] else: parse_json = loaded_json terra_md_full = do_get_terraref_metadata(parse_json, EXTRACTOR_NAME) if not terra_md_full: msg = "Unable to find %s metadata in JSON file '%s'" % (EXTRACTOR_NAME, metadata) logging.error(msg) logging.error(" JSON may be missing or invalid. Returning an error") result['error'] = {'message': msg} result['code'] = -2 return result timestamp = get_metadata_timestamp(terra_md_full) if not timestamp: msg = "Unable to find timestamp in JSON file '%s'" % filename logging.error(msg) logging.error(" JSON may be missing or invalid. Returning an error") result['error'] = {'message': msg} result['code'] = -3 return result # Fetch experiment name from terra metadata _, _, updated_experiment = do_get_season_and_experiment( timestamp, 'stereoTop', terra_md_full) # if None in [season_name, experiment_name]: # raise ValueError("season and experiment could not be determined") # # # Determine output directory # self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), # timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) # target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, # season_name, experiment_name, self.sensors.get_display_name(), # timestamp[:4], timestamp[5:7], timestamp[8:10], # leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) sensor = Sensors(base='', station='ua-mac', sensor='rgb_geotiff') leaf_name = sensor.get_display_name() bin_type = 'left' if filename.endswith( '_left.bin') else 'right' if filename.endswith('_right.bin') else None if not bin_type: msg = "Bin file must be a left or right file: '%s'" % filename logging.error(msg) logging.error(" Returning an error") result['error'] = {'message': msg} result['code'] = -4 return result terra_md_trim = do_get_terraref_metadata(parse_json) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = filename tiff_filename = os.path.splitext(os.path.basename(filename))[0] + '.tif' tiff_path = os.path.join(working_space, tiff_filename) try: bin_shape = terraref.stereo_rgb.get_image_shape( terra_md_full, bin_type) gps_bounds_bin = do_geojson_to_tuples( terra_md_full['spatial_metadata'][bin_type]['bounding_box']) except KeyError: msg = "Spatial metadata is not properly identified. Unable to continue" logging.error(msg) logging.error(" Returning an error") result['error'] = {'message': msg} result['code'] = -5 return result # Extractor info extractor_info = { 'name': EXTRACTOR_NAME, 'version': EXTRCTOR_VERSION, 'author': "*****@*****.**", 'description': "Maricopa agricultural gantry bin to geotiff converter", 'repository': [{ "repType": "git", "repUrl": "https://github.com/terraref/extractors-stereo-rgb.git" }] } # Perform actual processing new_image = terraref.stereo_rgb.process_raw(bin_shape, filename, None) do_create_geotiff(new_image, gps_bounds_bin, tiff_path, None, True, extractor_info, terra_md_full, compress=True) # level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') context = ['https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld'] terra_md_trim['extractor_version'] = EXTRCTOR_VERSION new_md = { '@context': context, 'content': terra_md_trim, 'filename': tiff_filename, 'agent': { '@type': 'cat:extractor', 'version': EXTRCTOR_VERSION, 'name': EXTRACTOR_NAME } } # Setup the result result['container'] = [{ 'name': leaf_name, 'exists': False, 'metadata': { 'replace': True, 'data': new_md }, 'file': [{ 'path': tiff_path, 'key': sensor.sensor }] }] result['code'] = 0 return result
def perform_uploads(self, connector, host, secret_key, resource, default_dsid, content, season_name, experiment_name, timestamp): """Perform the uploading of all the files we're put onto the upload list Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API default_dsid(str): the default dataset to load files to content(str): content information for the files we're uploading season_name(str): the name of the season experiment_name(str): the name of the experiment timestamp(str): the timestamp string associated with the source dataset Notes: We loop through the files, compressing, and remapping the names as needed. If the sensor associated with the file is missing, we upload the file to the default dataset. Otherwise, we use the dataset associated with the sensor and create the dataset if necessary """ for one_file in self.files_to_upload: sourcefile = os.path.join(one_file["source_path"], one_file["source_name"]) # Make sure we have the original file and then compress it if needed, or remane is if os.path.isfile(sourcefile): # make sure we have the full destination path if not os.path.exists(one_file["dest_path"]): os.makedirs(one_file["dest_path"]) resultfile = os.path.join(one_file["dest_path"], one_file["dest_name"]) if one_file["compress"]: resultfile = resultfile + ".zip" with open(sourcefile, 'rb') as f_in: with gzip.open(resultfile, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) elif not sourcefile == resultfile: shutil.move(sourcefile, resultfile) # Find or create the target dataset for this entry if it doesn't exist cur_dataset_id = default_dsid if "sensor" in one_file: sensor_type = one_file["sensor"] if sensor_type in self.sensor_dsid_map: cur_dataset_id = self.sensor_dsid_map[sensor_type] else: new_sensor = Sensors(base=self.sensors.base, station=self.sensors.station, sensor=sensor_type) sensor_leaf_name = new_sensor.get_display_name( ) + ' - ' + timestamp ds_exists = get_datasetid_by_name( host, secret_key, sensor_leaf_name) new_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, new_sensor.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=sensor_leaf_name) if (self.overwrite_ok or not ds_exists) and self.experiment_metadata: self.update_dataset_extractor_metadata( connector, host, secret_key, new_dsid, prepare_pipeline_metadata( self.experiment_metadata), self.extractor_info['name']) self.sensor_dsid_map[sensor_type] = new_dsid cur_dataset_id = new_dsid # Check if file already exists in the dataset file_in_dataset = check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=False) # If the files is already in the dataset, determine if we need to delete it first if self.overwrite_ok and file_in_dataset: # Delete the file from the dataset before uploading the new copy self.log_info( resource, "Removing existing file in dataset " + resultfile) check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=True) elif not self.overwrite_ok and file_in_dataset: # We won't overwrite an existing file self.log_skip( resource, "Not overwriting existing file in dataset " + resultfile) continue # Upload the file to the dataset fid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, cur_dataset_id, resultfile) # Generate our metadata meta = build_metadata(host, self.extractor_info, fid, content, 'file') # Upload the meadata to the dataset upload_metadata(connector, host, secret_key, fid, meta) self.created += 1 self.bytes += os.path.getsize(resultfile) else: raise Exception("%s was not found" % sourcefile)