def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def file_is_image_type(identify_binary, filename, metadata_filename=None): """Uses the identify application to generate the MIME type of the file and looks for an image MIME type. If a metadata filename is specified, the JSON in the file is loaded first and the MIME type is looked for. If the metadata filename is not specified, or a MIME type was not found in the metadata, the identity application is used. Args: identify_binary(str): path to the executable which will return a MIME type on an image file filename(str): the path to the file to check metadata_filename(str): the path to JSON metadata associated with the file in which to look for a 'contentType' tag containing the MIME type Returns: True is returned if the file is a MIME image type False is returned upon failure or the file is not a type of image """ logger = logging.getLogger(__name__) # Try to determine the file type from its JSON information (metadata if from Clowder API) try: if metadata_filename and file_exists(metadata_filename): file_md = load_json_file(metadata_filename) if file_md: if 'contentType' in file_md: if file_md['contentType'].startswith('image'): return True # pylint: disable=broad-except except Exception as ex: logger.info("Exception caught: %s", str(ex)) # pylint: enable=broad-except # Try to determine the file type locally try: is_image_type = find_image_mime_type( subprocess.check_output([identify_binary, "-verbose", filename], stderr=subprocess.STDOUT)) if not is_image_type is None: return is_image_type # pylint: disable=broad-except except Exception as ex: logger.info("Exception caught: %s", str(ex)) # pylint: enable=broad-except return False
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) sensor_type, timestamp = resource['name'].split(" - ") # First, re-check metadata to verify it hasn't been added in meantime ds_md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(ds_md) if terra_md: self.log_info(resource, "Found TERRA-REF metadata; not cleaning") return # These datasets do not have TERRA md uncleanables = ["Full Field"] if sensor_type in uncleanables: self.log_info(resource, "Cannot clean metadata for %s" % sensor_type) return # For these datasets, we must get TERRA md from raw_data source lv1_types = {"RGB GeoTIFFs": "stereoTop", "Thermal IR GeoTIFFs": "flirIrCamera"} if sensor_type in lv1_types: raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type]) source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv)) else: # Search for metadata.json source file source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name'])) source_dir = self.remapMountPath(connector, source_dir) if self.delete: # Delete all existing metadata from this dataset self.log_info(resource, "Deleting existing metadata") delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id']) # TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor if sensor_type == "scanner3DTop": source_dir = source_dir.replace("Level_1", "raw_data") self.log_info(resource, "Searching for metadata.json in %s" % source_dir) if os.path.isdir(source_dir): md_file = None for f in os.listdir(source_dir): if f.endswith("metadata.json"): md_file = os.path.join(source_dir, f) if md_file: self.log_info(resource, "Found metadata.json; cleaning") md_json = clean_metadata(load_json_file(md_file), sensor_type) format_md = { "@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}], "content": md_json, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid } } self.log_info(resource, "Uploading cleaned metadata") upload_metadata(connector, host, secret_key, resource['id'], format_md) # Now trigger a callback extraction if given if len(self.callback) > 0: self.log_info(resource, "Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: self.log_info(resource, "Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: self.log_info(resource, "No default callback found for %s" % sensor_type) else: self.log_error(resource, "metadata.json not found in %s" % source_dir) else: self.log_error(resource, "%s could not be found" % source_dir) # TODO: Have extractor check for existence of Level_1 output product and delete if exists? self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) metadata = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, metadata]: self.log_error( "could not locate each of left+right+metadata in processing") raise ValueError( "could not locate each of left+right+metadata in processing") # Determine output location & filenames timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image shapes & gps bounds") left_shape = bin2tiff.get_image_shape(metadata, 'left') right_shape = bin2tiff.get_image_shape(metadata, 'right') left_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['left']['bounding_box']) right_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['right']['bounding_box']) out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) if (not os.path.isfile(left_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % left_tiff) left_image = bin2tiff.process_image(left_shape, img_left, None) # Rename output.tif after creation to avoid long path errors create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) # TODO: we're moving zero byte files shutil.move(out_tmp_tiff, left_tiff) if left_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(left_tiff) if (not os.path.isfile(right_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % right_tiff) right_image = bin2tiff.process_image(right_shape, img_right, None) create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) shutil.move(out_tmp_tiff, right_tiff) if right_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(right_tiff) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading LemnaTec metadata") upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, "ps2Top") # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['local_paths']: if f.endswith(format_ind+'.bin'): frames[ind] = f if None in [metadata] or len(frames) < 101: self.log_error(resource, 'could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) (img_width, img_height) = self.get_image_dimensions(metadata) gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box']) self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height)) png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) tif_path = png_path.replace(".png", ".tif") png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: self.log_info(resource, "generating and uploading %s" % png_path) pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)]) create_image(pixels, png_path) create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs self.log_info(resource, "generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: # TODO: Coerce histogram and pseudocolor to geotiff? self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message(resource)
df = df.append(pd.Series(new_entry, index=indices), ignore_index=True) logging.info("Writing %s" % output_file) df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') df.sort_values(by=['date'], inplace=True, ascending=True) df.to_csv(output_file, index=False) SCAN_LOCK = False return psql_conn if __name__ == '__main__': logger = logging.getLogger('counter') config = load_json_file(os.path.join(app_dir, "config_default.json")) if os.path.exists(os.path.join(app_dir, "data/config_custom.json")): print("...loading configuration from config_custom.json") config = update_nested_dict(config, load_json_file(os.path.join(app_dir, "data/config_custom.json"))) try: DEFAULT_COUNT_START = str(config["default_count_start"]) DEFAULT_COUNT_END = str(config["default_count_end"]) print(DEFAULT_COUNT_START, DEFAULT_COUNT_END) print("default start and end provided") except: print("No default values for start and end") else: print("...no custom configuration file found. using default values") # Initialize logger handlers
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_extractor_metadata(all_dsmd) # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find( '/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind + '.bin'): frames[ind] = f['filename'] if None in [metadata] or len(frames) < 101: logging.error('could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path( timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( connector, host, secret_key, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=resource['dataset_info']['name']) img_width = 1936 img_height = 1216 png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: logging.info("...generating and uploading %s" % png_path) pixels = numpy.fromfile(frames[ind], numpy.dtype('uint8')).reshape( [img_height, img_width]) create_image(pixels, png_path) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs logging.info("...generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: psiiCore.psii_analysis(png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) self.bytes += os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_enh_tiff) EI = getEnhancedImage(img_left) create_geotiff(EI, left_bounds, left_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(right_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % right_rgb_enh_tiff) EI = getEnhancedImage(img_right) create_geotiff(EI, right_bounds, right_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) upload_metadata(connector, host, secret_key, target_dsid, ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get BIN file and metadata bin_file, metadata = None, None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera') # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, metadata]: logging.getLogger(__name__).error( 'could not find all both of ir.bin/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.create_sensor_path(timestamp, ext='png') tiff_path = self.sensors.create_sensor_path(timestamp) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) skipped_png = False if not os.path.exists(png_path) or self.overwrite: logging.getLogger(__name__).info("Generating %s" % png_path) # get raw data from bin file raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) # Only upload the newly generated file to Clowder if it isn't already in dataset if png_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True if not os.path.exists(tiff_path) or self.overwrite: logging.getLogger(__name__).info("Generating temperature matrix") gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, metadata) # get temperature logging.getLogger(__name__).info("Creating %s" % tiff_path) # Rename temporary tif after creation to avoid long path errors out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True, self.extractor_info, metadata) shutil.move(out_tmp_tiff, tiff_path) if tiff_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # clean tmp directory from any potential failed previous runs flist = os.listdir("/tmp") for f in flist: try: os.remove(os.path.join("/tmp", f)) except: pass """ if file is above configured limit, skip it max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM for fname in resource['local_paths']: if fname.endswith('raw'): rawfile = fname rawsize = os.stat(rawfile).st_size if rawsize > max_gb * 1000000000: self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000)) return False """ timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_rawname = 'SWIR' sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_rawname = 'VNIR' sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) out_nc = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) xps_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['xps']) ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['ind']) csv_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname.replace( "_netcdf", "_traits")) raw_file, terra_md_full = None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname) elif fname.endswith('raw'): raw_file = fname if None in [raw_file, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_rawname, terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory print_name = self.sensors.get_display_name(sensor=sensor_fullname) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) uploaded_file_ids = [] # Perform actual processing if (not file_exists(out_nc)) or self.overwrite: """TODO: OLD AND NOT USED self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc) if soil_mask and file_exists(soil_mask): # If soil mask exists, we can generate an _ind indices file returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth else: # Otherwise we cannot, and need to trigger soilmask extractor and circle back later returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth if returncode != 0: raise ValueError('script encountered an error') """ self.log_info(resource, 'invoking python calibration to create: %s' % out_nc) create_empty_netCDF(raw_file, out_nc) self.log_info(resource, 'applying calibration to: %s' % out_nc) apply_calibration(raw_file, out_nc) self.log_info(resource, '...done' % raw_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_nc, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_nc) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_nc) # TODO: Still compatible? #if not soil_mask: # self.log_info(resource, "triggering soil mask extractor on %s" % fileid) # submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal") # TODO: Sent output to BETYdb """ # Send indices to betyDB if file_exists(ind_file): # TODO: Use ncks to trim ind_file to plots before this step plot_no = 'Full Field' with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Map the remaining ~50 variables in BETY to create indices file # TODO: In netCDF header, csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(csv_file, 'w') as c: c.write(csv_header+'\n'+csv_vals) # TODO: Send this CSV to betydb & geostreams extractors instead submit_traits(csv_file, bety_key=self.bety_key) """ self.end_message(resource)
def upload_dataset(dataset_path, level, product, timestamp, sess, logfile): contents = os.listdir(dataset_path) if len(contents) == 0: logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "ERR: No files found")) return False # Find and prepare the metadata clean_md = None if product == "scanner3DTop" and level == "Level_1": # Special check between Level_1/raw_data for scanner3DTop only path3d = dataset_path.replace("Level_1", "raw_data") contents3d = os.listdir(path3d) for f in contents3d: if f.endswith("_metadata.json"): md = load_json_file(os.path.join(path3d, f)) clean_md = clean_metadata(md, product) if dry_run: print("...%s successfully cleaned." % os.path.join(path3d, f)) else: for f in contents: if f.endswith("_metadata.json"): md = load_json_file(os.path.join(dataset_path, f)) clean_md = clean_metadata(md, product) if dry_run: print("...%s successfully cleaned." % os.path.join(dataset_path, f)) elif f.endswith("_metadata_cleaned.json"): clean_md = load_json_file(os.path.join(dataset_path, f)) if dry_run: print("...%s successfully loaded." % os.path.join(dataset_path, f)) if clean_md is None and product is not "EnvironmentLogger": logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "ERR: No metadata found")) return False # Create the dataset in Clowder season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, product, clean_md) YYYY = timestamp[:4] MM = timestamp[5:7] DD = timestamp[8:10] dataset_name = "%s - %s" % (product, timestamp) if not dry_run: dsid = build_dataset_hierarchy_crawl(clowder_host, clowder_admin_key, clowder_user, clowder_pass, clowder_space, season_name, experiment_name, product, YYYY, MM, DD, dataset_name) else: dsid = "JustPretend" logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "OK: %s" % dsid)) # Upload metadata if not dry_run and product is not "EnvironmentLogger": sess.post( "%sapi/datasets/%s/metadata.jsonld" % (clowder_host, dsid), headers={'Content-Type': 'application/json'}, data=json.dumps({ "@context": [ "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", { "@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#" } ], "content": clean_md, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_userid } })) # Add each file for f in contents: if not (f.endswith("_metadata.json") or f.endswith("_metadata_cleaned.json")): filepath = os.path.join(dataset_path, f) if not dry_run: upload_to_dataset(conn, clowder_host, clowder_user, clowder_pass, dsid, filepath) else: print("...would upload %s" % f) return True
import logging import sys lib_path = os.path.abspath(os.path.join('..', '..', 'scanner_3d')) sys.path.append(lib_path) from terrautils.metadata import get_terraref_metadata, clean_metadata from terrautils.extractors import load_json_file from scanner_3d.ply2las import generate_las_from_pdal, combine_east_west_las, geo_referencing_las, \ geo_referencing_las_for_eachpoint_in_mac test_id = '85f9c8c2-fa68-48a6-b63c-375daa438414' path = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc', test_id) dire = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc') all_dsmd = load_json_file(path + '_metadata.json') cleanmetadata = clean_metadata(all_dsmd, "scanner3DTop") terra_md = get_terraref_metadata(cleanmetadata, 'scanner3DTop') in_east = '/data/' + test_id + '__Top-heading-east_0.ply' in_west = '/data/' + test_id + '__Top-heading-west_0.ply' pdal_base = "docker run -v %s:/data pdal/pdal:1.5 " % dire tmp_east_las = "/data/east_temp.las" tmp_west_las = "/data/west_temp.las" merge_las = "/data/merged.las" convert_las = dire + "/converted.las" convert_pt_las = dire + "/converted_pts.las" def test_east_las():
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] right_ratio, left_ratio = 0, 0 left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac") if (not file_exists(left_rgb_mask_tiff)) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_mask_tiff) #if qual_md and 'left_quality_score' in qual_md: #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score'])) left_ratio, left_rgb = gen_cc_enhanced(img_left) if left_ratio is not None and left_rgb is not None: # Bands must be reordered to avoid swapping R and B left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB) create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(left_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % left_rgb_mask_tiff) os.remove(left_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_rgb_mask_tiff)) or self.overwrite: right_ratio, right_rgb = gen_cc_enhanced(img_right) if right_ratio is not None and right_rgb is not None: # Bands must be reordered to avoid swapping R and B right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB) create_geotiff(right_rgb, right_bounds, right_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(right_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % right_rgb_mask_tiff) os.remove(right_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: md = { "files_created": uploaded_file_ids, "left_mask_ratio": left_ratio } if not self.leftonly: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image quality") left_qual = getImageQuality(img_left) if not self.leftonly: right_qual = getImageQuality(img_right) left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_nrmac_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_nrmac_tiff) create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds, left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % left_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_nrmac_tiff) or self.overwrite): self.log_info(resource, "creating %s" % right_nrmac_tiff) create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds, right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % right_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain md = { "files_created": uploaded_file_ids, "left_quality_score": left_qual } if not self.leftonly: md["right_quality_score"] = right_qual extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)