def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) f = resource['local_paths'][0] self.log_info(resource, "determining image quality") qual = getImageQuality(f) self.log_info(resource, "creating output image") md = download_ds_metadata(connector, host, secret_key, resource['parent']['id']) terramd = get_terraref_metadata(md) if "left" in f: bounds = geojson_to_tuples( terramd['spatial_metadata']['left']['bounding_box']) else: bounds = geojson_to_tuples( terramd['spatial_metadata']['right']['bounding_box']) output = f.replace(".tif", "_nrmac.tif") create_geotiff(np.array([[qual, qual], [qual, qual]]), bounds, output) upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], output) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"quality_score": qual}, 'file') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.force: # Check NRMAC score > 15 before proceeding if available nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac") if not (nrmac_md and 'left_quality_score' in nrmac_md): self.log_skip(resource, "NRMAC quality score not available") return CheckMessage.ignore elif float(nrmac_md['left_quality_score']) > self.threshold: self.log_skip( resource, "NRMAC quality score %s is above threshold of %s" % (float( nrmac_md['left_quality_score']), self.threshold)) return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_enh_tiff = self.sensors.create_sensor_path( timestamp, opts=['right']) if file_exists(left_enh_tiff) and file_exists(right_enh_tiff): if contains_required_files(resource, [ os.path.basename(left_enh_tiff), os.path.basename(right_enh_tiff) ]): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info( resource, "output files exist but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.bin', '_right.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if file_exists(left_tiff) and file_exists(right_tiff): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_error(resource, "no terraref metadata found; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for an _ir.bin file before beginning processing if not contains_required_files(resource, ['_ir.bin']): self.log_skip(resource, "missing required files") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] tif = self.sensors.get_sensor_path(timestamp) png = tif.replace(".tif", ".png") if file_exists(png) and file_exists(tif): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def process_message_individual(self, connector, host, secret_key, resource, parameters): """This is deprecated method that operates on single capture, not field mosaic""" self.start_message() input_image = resource['local_paths'][0] # Create output in same directory as input, but check name ds_md = get_info(connector, host, secret_key, resource['parent']['id']) terra_md = get_terraref_metadata( download_metadata(connector, host, secret_key, resource['parent']['id']), 'stereoTop') dataset_name = ds_md['name'] timestamp = dataset_name.split(" - ")[1] # Is this left or right half? side = 'left' if resource['name'].find("_left") > -1 else 'right' gps_bounds = geojson_to_tuples( terra_md['spatial_metadata'][side]['bounding_box']) out_csv = self.sensors.create_sensor_path(timestamp, opts=[side], ext='csv') out_dgci = out_csv.replace(".csv", "_dgci.png") out_edge = out_csv.replace(".csv", "_edge.png") out_label = out_csv.replace(".csv", "_label.png") out_dgci_tif = out_dgci.replace('.png', '.tif') out_edge_tif = out_edge.replace('.png', '.tif') out_label_tif = out_label.replace('.png', '.tif') self.generate_all_outputs(input_image, out_csv, out_dgci, out_edge, out_label, gps_bounds) fileids = [] for file_to_upload in [ out_csv, out_dgci_tif, out_edge_tif, out_label_tif ]: if os.path.isfile(file_to_upload): if file_to_upload not in resource['local_paths']: # TODO: Should this be written to a separate dataset? #target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace, # self.sensors.get_display_name(), # timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=dataset_name) # Send output to Clowder source dataset fileids.append( upload_to_dataset(connector, host, secret_key, resource['parent']['id'], file_to_upload)) self.created += 1 self.bytes += os.path.getsize(file_to_upload) # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, self.extractor_info, resource['parent']['id'], {"files_created": fileids}, 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], ext_meta) self.end_message()
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found if not contains_required_files(resource, ['_left.tif', '_right.tif']): self.log_skip(resource, "missing required files") # Check for raw_data_source in metadata and resumbit to bin2tif if available... md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(md) if 'raw_data_source' in terra_md: raw_id = str(terra_md['raw_data_source'].split("/")[-1]) self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id) submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_nrmac_tiff)) or ( not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)): if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]): self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore else: self.log_info(resource, "output file exists but not yet uploaded") # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check for a left and right BIN file - skip if not found found_left = False found_right = False for f in resource['files']: if 'filename' in f: if f['filename'].endswith('_left.bin'): found_left = True elif f['filename'].endswith('_right.bin'): found_right = True if not (found_left and found_right): self.log_skip( resource, "found left: %s, right: %s" % (found_left, found_right)) return CheckMessage.ignore # Check if outputs already exist unless overwrite is forced - skip if found if not self.overwrite: timestamp = resource['dataset_info']['name'].split(" - ")[1] lbase = self.sensors.get_sensor_path(timestamp, opts=['left'], ext='') rbase = self.sensors.get_sensor_path(timestamp, opts=['right'], ext='') out_dir = os.path.dirname(lbase) if (os.path.isfile(lbase + 'tif') and os.path.isfile(rbase + 'tif')): self.log_skip(resource, "outputs found in %s" % out_dir) return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: self.log_skip("metadata indicates it was already processed") return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download else: self.log_skip("no terraref metadata found") return CheckMessage.ignore
def do_work(left_file, right_file, json_file): """Make the calls to convert the files Args: left_file(str): Path to the left BIN file right_file(str): Path to the right BIN file json_file(str): Path to the JSON file """ out_left = os.path.splitext(left_file)[0] + ".tif" out_right = os.path.splitext(right_file)[0] + ".tif" file_name, file_ext = os.path.splitext(json_file) out_json = file_name + "_updated" + file_ext # Load the JSON with open(json_file, "r") as infile: metadata = json.load(infile) if not metadata: raise RuntimeError("JSON file appears to be invalid: " + json_file) md_len = len(metadata) if md_len <= 0: raise RuntimeError("JSON file is empty: " + json_file) # Prepare the metadata clean_md = get_terraref_metadata(clean_metadata(metadata, 'stereoTop'), 'stereoTop') # Pull out the information we need from the JSON try: left_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'left') gps_bounds_left = geojson_to_tuples(clean_md['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'right') gps_bounds_right = geojson_to_tuples(clean_md['spatial_metadata']['right']['bounding_box']) except KeyError: print("ERROR: Spatial metadata not properly identified in JSON file") return # Make the conversion calls print("creating %s" % out_left) left_image = terraref.stereo_rgb.process_raw(left_shape, left_file, None) create_geotiff(left_image, gps_bounds_left, out_left, asfloat=False, system_md=clean_md, compress=False) print("creating %s" % out_right) right_image = terraref.stereo_rgb.process_raw(right_shape, right_file, None) create_geotiff(right_image, gps_bounds_right, out_right, asfloat=False, system_md=clean_md, compress=True) # Write the metadata print("creating %s" % out_json) with open(out_json, "w") as outfile: json.dump(clean_md, outfile, indent=4)
def check_message(self, connector, host, secret_key, resource, parameters): if parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): return CheckMessage.ignore # Check for an ir.BIN file and metadata before beginning processing found_ir = None found_md = None for f in resource['files']: if 'filename' in f and f['filename'].endswith('_ir.bin'): found_ir = f['filepath'] elif 'filename' in f and f['filename'].endswith('_metadata.json'): found_md = f['filepath'] if found_ir: # Check if outputs already exist timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.get_sensor_path(timestamp, ext='png') tiff_path = self.sensors.get_sensor_path(timestamp) if os.path.exists(png_path) and os.path.exists( tiff_path) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, outputs already exist" % resource['id']) return CheckMessage.ignore # If we don't find _metadata.json file, check if we have metadata attached to dataset instead if not found_md: md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download return CheckMessage.ignore else: return CheckMessage.download return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): # Check for 0000-0101 bin files before beginning processing if len(resource['files']) < 102: self.log_skip(resource, "less than 102 files found") return CheckMessage.ignore if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.get_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.get_sensor_path(timestamp, opts=['combined_pseudocolored']) # Count number of bin files in dataset, as well as number of existing outputs ind_add = 0 ind_output = 0 for ind in range(0, 102): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['files']: if f['filename'].endswith(format_ind+'.bin'): ind_add += 1 out_png = self.sensors.get_sensor_path(timestamp, opts=[format_ind]) if os.path.exists(out_png) and not self.overwrite: ind_output += 1 break # Do the outputs already exist? if ind_output == 102 and os.path.exists(hist_path) and os.path.exists(coloredImg_path): self.log_skip(resource, "outputs already exist") return CheckMessage.ignore # Do we have too few input BIN files? if ind_add < 102: self.log_skip(resource, "less than 102 .bin files found") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_extractor_metadata(md, self.extractor_info['name']) and not self.overwrite: self.log_skip(resource, "metadata indicates it was already processed") return CheckMessage.ignore if get_terraref_metadata(md): return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") return CheckMessage.ignore
def check_message(self, connector, host, secret_key, resource, parameters): #if not is_latest_file(resource): # return CheckMessage.ignore # Adjust sensor path based on VNIR vs SWIR if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' if has_all_files(resource): # Check if output already exists timestamp = resource['dataset_info']['name'].split(" - ")[1] outFilePath = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if os.path.exists(outFilePath) and not self.overwrite: logging.getLogger(__name__).info( 'skipping dataset %s, output file already exists' % resource['id']) return CheckMessage.ignore else: # Check if we have necessary metadata, either as a .json file or attached to dataset md = download_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) if get_extractor_metadata( md, self.extractor_info['name']) and not self.overwrite: logging.getLogger(__name__).info( "skipping dataset %s, already processed" % resource['id']) return CheckMessage.ignore elif get_terraref_metadata(md): return CheckMessage.download else: for f in resource['files']: if f['filename'] == 'metadata.json': return CheckMessage.download return CheckMessage.ignore else: logging.getLogger(__name__).info( 'skipping dataset %s, not all input files are ready' % resource['id']) return CheckMessage.ignore
def get_clowder_metadata(key, timestamp): resp = requests.get( "https://terraref.ncsa.illinois.edu/clowder/api/datasets?key=%s&exact=true&title=stereoTop - %s" % (key, timestamp)) resp.raise_for_status() datasetId = resp.json()[0]["id"] resp = requests.get( "https://terraref.ncsa.illinois.edu/clowder/api/datasets/%s/metadata.jsonld?key=%s" % (datasetId, key)) resp.raise_for_status() content = resp.json()[1]["content"] metadata = get_terraref_metadata(content, 'stereoTop') return metadata
def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download self.start_check(resource) if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore # Check metadata to verify we have what we need md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): # Check for a left and right TIF file - skip if not found # If we're only processing the left files, don't check for the right file needed_files = ['_left.tif'] if not self.leftonly: needed_files.append('_right.tif') if not contains_required_files(resource, needed_files): self.log_skip(resource, "missing required files") return CheckMessage.ignore if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist timestamp = resource['dataset_info']['name'].split(" - ")[1] left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) if (self.leftonly and file_exists(left_mask_tiff)) or \ (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))): self.log_skip(resource, "metadata v%s and outputs already exist" % \ self.extractor_info['version']) return CheckMessage.ignore # Check for other images to create a mask on elif not contains_required_files(resource, ['.tif']): self.log_skip(resource, "missing required tiff file") return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Find input files in dataset target_files = { 'raw': None, 'raw.hdr': None, 'image.jpg': None, 'frameIndex.txt': None, 'settings.txt': None, "_metadata.json": None } metafile = None ds_metafile = None last_path = None path_match = None tempdir = None symlinks = [] for f in resource['local_paths']: for fileExt in target_files.keys(): if f.endswith(fileExt): if fileExt != '_metadata.json': filedir = os.path.dirname(f) if not last_path: last_path = filedir else: if filedir != last_path: path_match = False last_path = filedir target_files[fileExt] = {'filename': os.path.basename(f), 'path': f} else: if f.endswith('/_dataset_metadata.json'): ds_metafile = f elif not f.endswith('/_metadata.json'): metafile = f target_files['_metadata.json'] = {'filename': os.path.basename(metafile), 'path': metafile} # Identify md file either with other dataset files, or attached to Clowder dataset if metafile == None: if ds_metafile != None: # Found dataset metadata, so check for the .json file alongside other files logging.getLogger(__name__).info("...checking for local metadata file alongside other files") ds_dir = os.path.dirname(target_files['raw']['path']) for ds_f in os.path.listdir(ds_dir): if ds_f.endswith("_metadata.json"): target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f) else: raise ValueError('could not locate metadata for %s' % resource['id']) # Create symlinks in one directory if inputs aren't in the same one if not path_match: tempdir = tempfile.mkdtemp() for f in target_files.keys(): currf = target_files[f] if currf['filename'] == '_dataset_metadata.json': # Open the temporary file and change the JSON content format with open(currf['path'], 'r') as mdfile: jsondata = json.load(mdfile) md = get_terraref_metadata(jsondata) with open(currf['path'], 'w') as mdfile: json.dump(md, mdfile) newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json') else: newf = os.path.join(tempdir, currf['filename']) os.symlink(currf['path'], newf) symlinks.append(newf) # Adjust sensor path based on VNIR vs SWIR and check for soil mask timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) # Invoke terraref.sh logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath) # TODO: Move this script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh" if soil_mask: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "-m", soil_mask, "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) else: returncode = subprocess.call(["bash", script_path, "-d", "1", "-h", "--new_clb_mth", "-i", target_files['raw']['path'], "-o", outFilePath]) # Verify outfile exists and upload to clowder logging.getLogger(__name__).info('done creating output file (%s)' % (returncode)) if returncode != 0: raise ValueError('script encountered an error') if os.path.exists(outFilePath): if returncode == 0: if outFilePath not in resource['local_paths']: target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(sensor=sensor_fullname), timestamp[:4], timestamp[:7], timestamp[:10], leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp) logging.getLogger(__name__).info('uploading %s' % outFilePath) upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath) self.created += 1 self.bytes += os.path.getsize(outFilePath) else: logging.getLogger(__name__).error('no output file was produced') # Send indices to betyDB ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind']) with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes( standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Create CSV using ndviVal as primary key tmp_csv = 'traits.csv' plot_no = 'Full Field' csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(tmp_csv, 'w') as c: c.write(csv_header+'\n'+csv_vals) submit_traits(tmp_csv, bety_key=self.bety_key) # Remove symlinks and temp directory for sym in symlinks: os.remove(sym) if tempdir: os.rmdir(tempdir) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) sensor_type, timestamp = resource['name'].split(" - ") # First, re-check metadata to verify it hasn't been added in meantime ds_md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(ds_md) if terra_md: self.log_info(resource, "Found TERRA-REF metadata; not cleaning") return # These datasets do not have TERRA md uncleanables = ["Full Field"] if sensor_type in uncleanables: self.log_info(resource, "Cannot clean metadata for %s" % sensor_type) return # For these datasets, we must get TERRA md from raw_data source lv1_types = {"RGB GeoTIFFs": "stereoTop", "Thermal IR GeoTIFFs": "flirIrCamera"} if sensor_type in lv1_types: raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type]) source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv)) else: # Search for metadata.json source file source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name'])) source_dir = self.remapMountPath(connector, source_dir) if self.delete: # Delete all existing metadata from this dataset self.log_info(resource, "Deleting existing metadata") delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id']) # TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor if sensor_type == "scanner3DTop": source_dir = source_dir.replace("Level_1", "raw_data") self.log_info(resource, "Searching for metadata.json in %s" % source_dir) if os.path.isdir(source_dir): md_file = None for f in os.listdir(source_dir): if f.endswith("metadata.json"): md_file = os.path.join(source_dir, f) if md_file: self.log_info(resource, "Found metadata.json; cleaning") md_json = clean_metadata(load_json_file(md_file), sensor_type) format_md = { "@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}], "content": md_json, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid } } self.log_info(resource, "Uploading cleaned metadata") upload_metadata(connector, host, secret_key, resource['id'], format_md) # Now trigger a callback extraction if given if len(self.callback) > 0: self.log_info(resource, "Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: self.log_info(resource, "Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: self.log_info(resource, "No default callback found for %s" % sensor_type) else: self.log_error(resource, "metadata.json not found in %s" % source_dir) else: self.log_error(resource, "%s could not be found" % source_dir) # TODO: Have extractor check for existence of Level_1 output product and delete if exists? self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) metadata = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, metadata]: self.log_error( "could not locate each of left+right+metadata in processing") raise ValueError( "could not locate each of left+right+metadata in processing") # Determine output location & filenames timestamp = resource['dataset_info']['name'].split(" - ")[1] left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image shapes & gps bounds") left_shape = bin2tiff.get_image_shape(metadata, 'left') right_shape = bin2tiff.get_image_shape(metadata, 'right') left_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['left']['bounding_box']) right_gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['right']['bounding_box']) out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) if (not os.path.isfile(left_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % left_tiff) left_image = bin2tiff.process_image(left_shape, img_left, None) # Rename output.tif after creation to avoid long path errors create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) # TODO: we're moving zero byte files shutil.move(out_tmp_tiff, left_tiff) if left_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(left_tiff) if (not os.path.isfile(right_tiff)) or self.overwrite: self.log_info(resource, "creating & uploading %s" % right_tiff) right_image = bin2tiff.process_image(right_shape, img_right, None) create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None, False, self.extractor_info, metadata) shutil.move(out_tmp_tiff, right_tiff) if right_tiff not in resource['local_paths']: fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) else: self.log_info( resource, "file found in dataset already; not re-uploading") self.created += 1 self.bytes += os.path.getsize(right_tiff) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading LemnaTec metadata") upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # clean tmp directory from any potential failed previous runs flist = os.listdir("/tmp") for f in flist: try: os.remove(os.path.join("/tmp", f)) except: pass """ if file is above configured limit, skip it max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM for fname in resource['local_paths']: if fname.endswith('raw'): rawfile = fname rawsize = os.stat(rawfile).st_size if rawsize > max_gb * 1000000000: self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000)) return False """ timestamp = resource['dataset_info']['name'].split(" - ")[1] if resource['dataset_info']['name'].find("SWIR") > -1: sensor_rawname = 'SWIR' sensor_fullname = 'swir_netcdf' soil_mask = None else: sensor_rawname = 'VNIR' sensor_fullname = 'vnir_netcdf' # Check for corresponding soil mask to include in workflow.sh if available soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask']) out_nc = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname) xps_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['xps']) ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['ind']) csv_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname.replace( "_netcdf", "_traits")) raw_file, terra_md_full = None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname) elif fname.endswith('raw'): raw_file = fname if None in [raw_file, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_rawname, terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory print_name = self.sensors.get_display_name(sensor=sensor_fullname) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, print_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) uploaded_file_ids = [] # Perform actual processing if (not file_exists(out_nc)) or self.overwrite: """TODO: OLD AND NOT USED self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc) if soil_mask and file_exists(soil_mask): # If soil mask exists, we can generate an _ind indices file returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth else: # Otherwise we cannot, and need to trigger soilmask extractor and circle back later returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h", "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth if returncode != 0: raise ValueError('script encountered an error') """ self.log_info(resource, 'invoking python calibration to create: %s' % out_nc) create_empty_netCDF(raw_file, out_nc) self.log_info(resource, 'applying calibration to: %s' % out_nc) apply_calibration(raw_file, out_nc) self.log_info(resource, '...done' % raw_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_nc, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_nc) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_nc) # TODO: Still compatible? #if not soil_mask: # self.log_info(resource, "triggering soil mask extractor on %s" % fileid) # submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal") # TODO: Sent output to BETYdb """ # Send indices to betyDB if file_exists(ind_file): # TODO: Use ncks to trim ind_file to plots before this step plot_no = 'Full Field' with Dataset(ind_file, "r") as netCDF_handle: ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705') NDVI705 = ndvi[0].getValue().ravel()[0] # TODO: Map the remaining ~50 variables in BETY to create indices file # TODO: In netCDF header, csv_header = 'local_datetime,NDVI705,access_level,species,site,' \ 'citation_author,citation_year,citation_title,method' csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \ 'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % ( timestamp, NDVI705, plot_no) with open(csv_file, 'w') as c: c.write(csv_header+'\n'+csv_vals) # TODO: Send this CSV to betydb & geostreams extractors instead submit_traits(csv_file, bety_key=self.bety_key) """ self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get bin files and metadata metadata = None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, "ps2Top") # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None: metadata = load_json_file(f) frames = {} for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 for f in resource['local_paths']: if f.endswith(format_ind+'.bin'): frames[ind] = f if None in [metadata] or len(frames) < 101: self.log_error(resource, 'could not find all of frames/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist']) coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored']) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) (img_width, img_height) = self.get_image_dimensions(metadata) gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box']) self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height)) png_frames = {} # skip 0101.bin since 101 is an XML file that lists the frame times for ind in range(0, 101): format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001 png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind]) tif_path = png_path.replace(".png", ".tif") png_frames[ind] = png_path if not os.path.exists(png_path) or self.overwrite: self.log_info(resource, "generating and uploading %s" % png_path) pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)]) create_image(pixels, png_path) create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata) if png_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(fileid) self.created += 1 self.bytes += os.path.getsize(png_path) # Generate aggregate outputs self.log_info(resource, "generating aggregates") if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite: # TODO: Coerce histogram and pseudocolor to geotiff? self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path) self.created += 2 self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path) if hist_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path) uploaded_file_ids.append(fileid) if coloredImg_path not in resource['local_paths']: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path) uploaded_file_ids.append(fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get BIN file and metadata bin_file, terra_md_full = None, None for f in resource['local_paths']: if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera') elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp) tiff_path = self.sensors.create_sensor_path(timestamp) png_path = tiff_path.replace(".tif", ".png") uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) skipped_png = False if not file_exists(png_path) or self.overwrite: # Perform actual processing self.log_info(resource, "creating & uploading %s" % png_path) raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(tiff_path) or self.overwrite: # Generate temperature matrix and perform actual processing self.log_info(resource, "creating & uploading %s" % tiff_path) gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Only upload the newly generated file to Clowder if it isn't already in dataset found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
lib_path = os.path.abspath(os.path.join('..', '..', 'scanner_3d')) sys.path.append(lib_path) from terrautils.metadata import get_terraref_metadata, clean_metadata from terrautils.extractors import load_json_file from scanner_3d.ply2las import generate_las_from_pdal, combine_east_west_las, geo_referencing_las, \ geo_referencing_las_for_eachpoint_in_mac test_id = '85f9c8c2-fa68-48a6-b63c-375daa438414' path = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc', test_id) dire = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc') all_dsmd = load_json_file(path + '_metadata.json') cleanmetadata = clean_metadata(all_dsmd, "scanner3DTop") terra_md = get_terraref_metadata(cleanmetadata, 'scanner3DTop') in_east = '/data/' + test_id + '__Top-heading-east_0.ply' in_west = '/data/' + test_id + '__Top-heading-west_0.ply' pdal_base = "docker run -v %s:/data pdal/pdal:1.5 " % dire tmp_east_las = "/data/east_temp.las" tmp_west_las = "/data/west_temp.las" merge_las = "/data/merged.las" convert_las = dire + "/converted.las" convert_pt_las = dire + "/converted_pts.las" def test_east_las(): generate_las_from_pdal(pdal_base, in_east, tmp_east_las) assert os.path.isfile(dire + '/east_temp.las')
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Get BIN file and metadata bin_file, metadata = None, None for f in resource['local_paths']: # First check metadata attached to dataset in Clowder for item of interest if f.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(f) metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera') # Otherwise, check if metadata was uploaded as a .json file elif f.endswith('_ir.bin'): bin_file = f if None in [bin_file, metadata]: logging.getLogger(__name__).error( 'could not find all both of ir.bin/metadata') return # Determine output directory timestamp = resource['dataset_info']['name'].split(" - ")[1] png_path = self.sensors.create_sensor_path(timestamp, ext='png') tiff_path = self.sensors.create_sensor_path(timestamp) uploaded_file_ids = [] target_dsid = build_dataset_hierarchy( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) skipped_png = False if not os.path.exists(png_path) or self.overwrite: logging.getLogger(__name__).info("Generating %s" % png_path) # get raw data from bin file raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) create_image(raw_data, png_path, self.scale_values) # Only upload the newly generated file to Clowder if it isn't already in dataset if png_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(png_path) else: skipped_png = True if not os.path.exists(tiff_path) or self.overwrite: logging.getLogger(__name__).info("Generating temperature matrix") gps_bounds = geojson_to_tuples( metadata['spatial_metadata']['flirIrCamera']['bounding_box']) if skipped_png: raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape( [480, 640]).astype('float') raw_data = numpy.rot90(raw_data, 3) tc = getFlir.rawData_to_temperature(raw_data, metadata) # get temperature logging.getLogger(__name__).info("Creating %s" % tiff_path) # Rename temporary tif after creation to avoid long path errors out_tmp_tiff = os.path.join(tempfile.gettempdir(), resource['id'].encode('utf8')) create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True, self.extractor_info, metadata) shutil.move(out_tmp_tiff, tiff_path) if tiff_path not in resource["local_paths"]: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(tiff_path) # Tell Clowder this is completed so subsequent file updates don't daisy-chain metadata = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) # Upload original Lemnatec metadata to new Level_1 dataset md = get_terraref_metadata(all_dsmd) md['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] lemna_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, lemna_md) self.end_message()
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_enh_tiff) EI = getEnhancedImage(img_left) create_geotiff(EI, left_bounds, left_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not file_exists(right_rgb_enh_tiff) or self.overwrite: self.log_info(resource, "creating %s" % right_rgb_enh_tiff) EI = getEnhancedImage(img_right) create_geotiff(EI, right_bounds, right_rgb_enh_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_enh_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_enh_tiff, remove=self.overwrite) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_enh_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_enh_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, target_dsid, {"files_created": uploaded_file_ids}, 'dataset') self.log_info(resource, "uploading extractor metadata") remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) upload_metadata(connector, host, secret_key, target_dsid, ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, terra_md_full = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.bin'): img_left = fname elif fname.endswith('_right.bin'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output directory self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp) left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] # Attach LemnaTec source metadata to Level_1 product if necessary target_md = download_metadata(connector, host, secret_key, target_dsid) if not get_extractor_metadata(target_md, self.extractor_info['name']): self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid) remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name']) terra_md_trim = get_terraref_metadata(all_dsmd) if updated_experiment is not None: terra_md_trim['experiment_metadata'] = updated_experiment terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id'] level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset') upload_metadata(connector, host, secret_key, target_dsid, level1_md) try: left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left') gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right') gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) except KeyError: self.log_error(resource, "spatial metadata not properly identified; sending to cleaner") submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner") return if (not file_exists(left_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % left_tiff) left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None) create_geotiff(left_image, gps_bounds_left, left_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if (not file_exists(right_tiff)) or self.overwrite: # Perform actual processing self.log_info(resource, "creating %s" % right_tiff) right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None) create_geotiff(right_image, gps_bounds_right, right_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_tiff) # Check if the file should be uploaded, even if it was already created found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Trigger additional extractors self.log_info(resource, "triggering downstream extractors") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask") submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac") submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif") # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: extractor_md = build_metadata(host, self.extractor_info, target_dsid, { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to raw dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) try: upload_metadata(connector, host, secret_key, resource['id'], extractor_md) except: self.log_info(resource, "problem uploading extractor metadata...") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError( "could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] right_ratio, left_ratio = 0, 0 left_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples( terra_md_full['spatial_metadata']['right']['bounding_box']) #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac") if (not file_exists(left_rgb_mask_tiff)) or self.overwrite: self.log_info(resource, "creating %s" % left_rgb_mask_tiff) #if qual_md and 'left_quality_score' in qual_md: #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score'])) left_ratio, left_rgb = gen_cc_enhanced(img_left) if left_ratio is not None and left_rgb is not None: # Bands must be reordered to avoid swapping R and B left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB) create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(left_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(left_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % left_rgb_mask_tiff) os.remove(left_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % left_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_rgb_mask_tiff)) or self.overwrite: right_ratio, right_rgb = gen_cc_enhanced(img_right) if right_ratio is not None and right_rgb is not None: # Bands must be reordered to avoid swapping R and B right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB) create_geotiff(right_rgb, right_bounds, right_rgb_mask_tiff, None, False, self.extractor_info, terra_md_full) compress_geotiff(right_rgb_mask_tiff) self.created += 1 self.bytes += os.path.getsize(right_rgb_mask_tiff) else: # If the masked version was not generated, delete any old version as well self.log_info( resource, "a faulty version exists; deleting %s" % right_rgb_mask_tiff) os.remove(right_rgb_mask_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_rgb_mask_tiff) if not found_in_dest: self.log_info(resource, "uploading %s" % right_rgb_mask_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_rgb_mask_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain if len(uploaded_file_ids) > 0: md = { "files_created": uploaded_file_ids, "left_mask_ratio": left_ratio } if not self.leftonly: md["right_mask_ratio"] = right_ratio extractor_md = build_metadata(host, self.extractor_info, target_dsid, md, 'dataset') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Load metadata from dataset for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd) if 'spatial_metadata' in terra_md_full: spatial_meta = terra_md_full['spatial_metadata'] else: spatial_meta = None if not spatial_meta: ValueError("No spatial metadata found.") # Determine which files in dataset need clipping files_to_process = {} for f in resource['local_paths']: if f.startswith("ir_geotiff") and f.endswith(".tif"): sensor_name = "ir_geotiff" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": spatial_meta['flirIrCamera']['bounding_box'] } elif f.startswith("rgb_geotiff") and f.endswith(".tif"): sensor_name = "rgb_geotiff" filename = os.path.basename(f) if f.endswith("_left.tif"): side = "left" else: side = "right" files_to_process[filename] = { "path": f, "bounds": spatial_meta[side]['bounding_box'] } elif f.endswith(".las"): sensor_name = "laser3d_las" filename = os.path.basename(f) files_to_process[filename] = { "path": f, "bounds": get_las_extents(f) } # TODO: Add case for laser3d heightmap # Fetch experiment name from terra metadata timestamp = resource['dataset_info']['name'].split(" - ")[1] season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine script name target_scan = "unknown_scan" if 'gantry_variable_metadata' in terra_md_full: if 'script_name' in terra_md_full['gantry_variable_metadata']: target_scan = terra_md_full['gantry_variable_metadata']['script_name'] if 'script_hash' in terra_md_full['gantry_variable_metadata']: target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash'] all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa') uploaded_file_ids = [] for filename in files_to_process: file_path = files_to_process[filename]["path"] file_bounds = files_to_process[filename]["bounds"] overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True) if len(overlap_plots) > 0: self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots))) for plotname in overlap_plots: plot_bounds = overlap_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds)) plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)" leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0] self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset)) target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset) out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename) if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite): """If file is a geoTIFF, simply clip it and upload it to Clowder""" clip_raster(file_path, tuples, out_path=out_file) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) elif filename.endswith(".las"): """If file is LAS, we can merge with any existing scan+plot output safely""" merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las") merged_txt = merged_out.replace(".las", "_contents.txt") already_merged = False if os.path.exists(merged_txt): # Check if contents with open(merged_txt, 'r') as contents: for entry in contents.readlines(): if entry.strip() == file_path: already_merged = True break if not already_merged: clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out) with open(merged_txt, 'a') as contents: contents.write(file_path+"\n") # Upload the individual plot shards for optimizing las2height later found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(out_file) # Upload the merged result if necessary found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite) if not found_in_dest or self.overwrite: fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) self.created += 1 self.bytes += os.path.getsize(merged_out) # Trigger las2height extractor submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height") # Tell Clowder this is completed so subsequent file updates don't daisy-chain extractor_md = build_metadata(host, self.extractor_info, resource['id'], { "files_created": uploaded_file_ids }, 'dataset') self.log_info(resource, "uploading extractor metadata to Level_1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get left/right files and metadata img_left, img_right, metadata = None, None, None for fname in resource['local_paths']: if fname.endswith('_dataset_metadata.json'): all_dsmd = load_json_file(fname) terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop') elif fname.endswith('_left.tif'): img_left = fname elif fname.endswith('_right.tif'): img_right = fname if None in [img_left, img_right, terra_md_full]: raise ValueError("could not locate all files & metadata in processing") timestamp = resource['dataset_info']['name'].split(" - ")[1] target_dsid = resource['id'] left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left']) right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right']) uploaded_file_ids = [] self.log_info(resource, "determining image quality") left_qual = getImageQuality(img_left) if not self.leftonly: right_qual = getImageQuality(img_right) left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box']) right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box']) if not file_exists(left_nrmac_tiff) or self.overwrite: self.log_info(resource, "creating %s" % left_nrmac_tiff) create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds, left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(left_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % left_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) if not self.leftonly: if (not file_exists(right_nrmac_tiff) or self.overwrite): self.log_info(resource, "creating %s" % right_nrmac_tiff) create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds, right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True) self.created += 1 self.bytes += os.path.getsize(right_nrmac_tiff) found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff, remove=self.overwrite) if not found_in_dest or self.overwrite: self.log_info(resource, "uploading %s" % right_nrmac_tiff) fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_nrmac_tiff) uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid) # Tell Clowder this is completed so subsequent file updates don't daisy-chain md = { "files_created": uploaded_file_ids, "left_quality_score": left_qual } if not self.leftonly: md["right_quality_score"] = right_qual extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file') self.log_info(resource, "uploading extractor metadata to Lv1 dataset") remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) upload_metadata(connector, host, secret_key, resource['id'], extractor_md) self.end_message(resource)