def check_message(self, connector, host, secret_key, resource, parameters): if "rulechecked" in parameters and parameters["rulechecked"]: return CheckMessage.download if not is_latest_file(resource): self.log_skip(resource, "not latest file") return CheckMessage.ignore if not contains_required_files( resource, ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']): self.log_skip(resource, "missing required files") return CheckMessage.ignore if resource['dataset_info']['name'].find("SWIR") > -1: sensor_fullname = 'swir_netcdf' else: sensor_fullname = 'vnir_netcdf' timestamp = resource['dataset_info']['name'].split(" - ")[1] md = download_metadata(connector, host, secret_key, resource['id']) if get_terraref_metadata(md): if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']): # Make sure outputs properly exist out_nc = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname) if file_exists(out_nc): self.log_skip( resource, "metadata v%s and outputs already exist" % self.extractor_info['version']) return CheckMessage.ignore # Have TERRA-REF metadata, but not any from this extractor return CheckMessage.download else: self.log_skip(resource, "no terraref metadata found") # See if we can recover it from disk if sensor_fullname == 'vnir_netcdf': date = timestamp.split("__")[0] source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % ( date, timestamp) for f in os.listdir(source_dir): if f.endswith("_metadata.json"): self.log_info(resource, "updating metadata from %s" % f) raw_dsmd = load_json_file(os.path.join(source_dir, f)) clean_md = clean_metadata(raw_dsmd, 'VNIR') complete_md = build_metadata(host, self.extractor_info, resource['id'], clean_md, 'dataset') remove_metadata(connector, host, secret_key, resource['id']) upload_metadata(connector, host, secret_key, resource['id'], complete_md) return CheckMessage.download return CheckMessage.ignore
def do_work(left_file, right_file, json_file): """Make the calls to convert the files Args: left_file(str): Path to the left BIN file right_file(str): Path to the right BIN file json_file(str): Path to the JSON file """ out_left = os.path.splitext(left_file)[0] + ".tif" out_right = os.path.splitext(right_file)[0] + ".tif" file_name, file_ext = os.path.splitext(json_file) out_json = file_name + "_updated" + file_ext # Load the JSON with open(json_file, "r") as infile: metadata = json.load(infile) if not metadata: raise RuntimeError("JSON file appears to be invalid: " + json_file) md_len = len(metadata) if md_len <= 0: raise RuntimeError("JSON file is empty: " + json_file) # Prepare the metadata clean_md = get_terraref_metadata(clean_metadata(metadata, 'stereoTop'), 'stereoTop') # Pull out the information we need from the JSON try: left_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'left') gps_bounds_left = geojson_to_tuples(clean_md['spatial_metadata']['left']['bounding_box']) right_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'right') gps_bounds_right = geojson_to_tuples(clean_md['spatial_metadata']['right']['bounding_box']) except KeyError: print("ERROR: Spatial metadata not properly identified in JSON file") return # Make the conversion calls print("creating %s" % out_left) left_image = terraref.stereo_rgb.process_raw(left_shape, left_file, None) create_geotiff(left_image, gps_bounds_left, out_left, asfloat=False, system_md=clean_md, compress=False) print("creating %s" % out_right) right_image = terraref.stereo_rgb.process_raw(right_shape, right_file, None) create_geotiff(right_image, gps_bounds_right, out_right, asfloat=False, system_md=clean_md, compress=True) # Write the metadata print("creating %s" % out_json) with open(out_json, "w") as outfile: json.dump(clean_md, outfile, indent=4)
import sys lib_path = os.path.abspath(os.path.join('..', '..', 'scanner_3d')) sys.path.append(lib_path) from terrautils.metadata import get_terraref_metadata, clean_metadata from terrautils.extractors import load_json_file from scanner_3d.ply2las import generate_las_from_pdal, combine_east_west_las, geo_referencing_las, \ geo_referencing_las_for_eachpoint_in_mac test_id = '85f9c8c2-fa68-48a6-b63c-375daa438414' path = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc', test_id) dire = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc') all_dsmd = load_json_file(path + '_metadata.json') cleanmetadata = clean_metadata(all_dsmd, "scanner3DTop") terra_md = get_terraref_metadata(cleanmetadata, 'scanner3DTop') in_east = '/data/' + test_id + '__Top-heading-east_0.ply' in_west = '/data/' + test_id + '__Top-heading-west_0.ply' pdal_base = "docker run -v %s:/data pdal/pdal:1.5 " % dire tmp_east_las = "/data/east_temp.las" tmp_west_las = "/data/west_temp.las" merge_las = "/data/merged.las" convert_las = dire + "/converted.las" convert_pt_las = dire + "/converted_pts.las" def test_east_las(): generate_las_from_pdal(pdal_base, in_east, tmp_east_las)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) sensor_type, timestamp = resource['name'].split(" - ") # First, re-check metadata to verify it hasn't been added in meantime ds_md = download_metadata(connector, host, secret_key, resource['id']) terra_md = get_terraref_metadata(ds_md) if terra_md: self.log_info(resource, "Found TERRA-REF metadata; not cleaning") return # These datasets do not have TERRA md uncleanables = ["Full Field"] if sensor_type in uncleanables: self.log_info(resource, "Cannot clean metadata for %s" % sensor_type) return # For these datasets, we must get TERRA md from raw_data source lv1_types = {"RGB GeoTIFFs": "stereoTop", "Thermal IR GeoTIFFs": "flirIrCamera"} if sensor_type in lv1_types: raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type]) source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv)) else: # Search for metadata.json source file source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name'])) source_dir = self.remapMountPath(connector, source_dir) if self.delete: # Delete all existing metadata from this dataset self.log_info(resource, "Deleting existing metadata") delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id']) # TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor if sensor_type == "scanner3DTop": source_dir = source_dir.replace("Level_1", "raw_data") self.log_info(resource, "Searching for metadata.json in %s" % source_dir) if os.path.isdir(source_dir): md_file = None for f in os.listdir(source_dir): if f.endswith("metadata.json"): md_file = os.path.join(source_dir, f) if md_file: self.log_info(resource, "Found metadata.json; cleaning") md_json = clean_metadata(load_json_file(md_file), sensor_type) format_md = { "@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}], "content": md_json, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid } } self.log_info(resource, "Uploading cleaned metadata") upload_metadata(connector, host, secret_key, resource['id'], format_md) # Now trigger a callback extraction if given if len(self.callback) > 0: self.log_info(resource, "Submitting callback extraction to %s" % self.callback) submit_extraction(connector, host, secret_key, resource['id'], self.callback) else: callbacks = self.get_callbacks_by_sensor(sensor_type) if callbacks: for c in callbacks: self.log_info(resource, "Submitting callback extraction to %s" % c) submit_extraction(connector, host, secret_key, resource['id'], c) else: self.log_info(resource, "No default callback found for %s" % sensor_type) else: self.log_error(resource, "metadata.json not found in %s" % source_dir) else: self.log_error(resource, "%s could not be found" % source_dir) # TODO: Have extractor check for existence of Level_1 output product and delete if exists? self.end_message(resource)
def notifyClowderOfCompletedTask(task): # Verify that globus user has a mapping to clowder credentials in config file globUser = task['user'] userMap = config['clowder']['user_map'] if globUser in userMap: logger.info("%s task complete; notifying Clowder" % task['globus_id'], extra={ "globus_id": task['globus_id'], "action": "NOTIFYING CLOWDER OF COMPLETION" }) clowder_host = config['clowder']['host'] clowder_key = config['clowder']['secret_key'] clowder_user = userMap[globUser]['clowder_user'] clowder_pass = userMap[globUser]['clowder_pass'] clowder_id = userMap[globUser]['clowder_id'] clowder_context = userMap[globUser]['context'] sess = requests.Session() sess.auth = (clowder_user, clowder_pass) # Response can be OK, RETRY or ERROR response = "OK" # Prepare upload object with all file(s) found updatedTask = safeCopy(task) space_id = task['contents']['space_id'] if 'space_id' in task[ 'contents'] else config['clowder']['primary_space'] for ds in task['contents']: # Skip any unexpected files at root level, e.g. # /home/clowder/sites/ua-mac/raw_data/GetFluorescenceValues.m # /home/clowder/sites/ua-mac/raw_data/irrigation/2017-06-04/@Recycle/flowmetertotals_March-2017.csv", if ds in ["LemnaTec - MovingSensor"] or ds.find("@Recycle") > -1: continue filesQueued = [] fileFormData = [] datasetMD = None datasetMDFile = False lastFile = None lastFileKey = None sensorname = ds.split(" - ")[0] logger.info("%s -- Processing [%s]" % (task['globus_id'], ds)) # Assign dataset-level metadata if provided if "md" in task['contents'][ds]: datasetMD = task['contents'][ds]['md'] # Add local files to dataset by path if 'files' in task['contents'][ds]: for fkey in task['contents'][ds]['files']: fobj = task['contents'][ds]['files'][fkey] if 'clowder_id' not in fobj or fobj['clowder_id'] == "": if os.path.exists(fobj['path']): if fobj['name'].find("metadata.json") == -1: if 'md' in fobj: # Use [1,-1] to avoid json.dumps wrapping quotes # Replace \" with " to avoid json.dumps escaping quotes mdstr = ', "md":' + json.dumps( fobj['md'])[1:-1].replace('\\"', '"') else: mdstr = "" filesQueued.append((fobj['path'], mdstr)) lastFile = fobj['name'] lastFileKey = fkey else: try: datasetMD = loadJsonFile(fobj['path']) datasetMDFile = fkey except: logger.error( "[%s] could not decode JSON from %s" % (ds, fobj['path'])) updatedTask['contents'][ds]['files'][fkey][ 'clowder_id'] = "FILE NOT FOUND" updatedTask['contents'][ds]['files'][fkey][ 'error'] = "Failed to load JSON" writeTaskToDatabase(updatedTask) if response == "OK": response = "ERROR" # Don't overwrite a RETRY else: logger.error("[%s] file not found: %s" % (ds, fobj['path'])) updatedTask['contents'][ds]['files'][fkey][ 'clowder_id'] = "FILE NOT FOUND" updatedTask['contents'][ds]['files'][fkey][ 'error'] = "File not found" writeTaskToDatabase(updatedTask) if response == "OK": response = "ERROR" # Don't overwrite a RETRY if len(filesQueued) > 0 or datasetMD: # Try to clean metadata first if datasetMD: cleaned_dsmd = None try: cleaned_dsmd = clean_metadata(datasetMD, sensorname) except Exception as e: logger.error("[%s] could not clean md: %s" % (ds, str(e))) task['contents'][ds][ 'error'] = "Could not clean metadata: %s" % str(e) # TODO: possible this could be recoverable with more info from clean_metadata if response == "OK": response = "ERROR" # Don't overwrite a RETRY if ds.find(" - ") > -1: # e.g. "co2Sensor - 2016-12-25" or "VNIR - 2016-12-25__12-32-42-123" c_sensor = ds.split(" - ")[0] c_date = ds.split(" - ")[1] c_year = c_date.split('-')[0] c_month = c_date.split('-')[1] if c_date.find("__") == -1: # If we only have a date and not a timestamp, don't create date collection c_date = None else: c_date = c_date.split("__")[0].split("-")[2] else: c_sensor, c_date, c_year, c_month = ds, None, None, None # Get dataset from clowder, or create & associate with collections try: hierarchy_host = clowder_host + ( "/" if not clowder_host.endswith("/") else "") dsid = build_dataset_hierarchy(hierarchy_host, clowder_key, clowder_user, clowder_pass, space_id, c_sensor, c_year, c_month, c_date, ds) logger.info(" [%s] id: %s" % (ds, dsid)) except Exception as e: logger.error("[%s] could not build hierarchy: %s" % (ds, str(e))) task['contents'][ds][ 'retry'] = "Could not build dataset hierarchy: %s" % str( e) response = "RETRY" continue if dsid: dsFileList = fetchDatasetFileList(dsid, sess) # Only send files not already present in dataset by path for queued in filesQueued: alreadyStored = False for storedFile in dsFileList: if queued[0] == storedFile['filepath']: logger.info( " skipping file %s (already uploaded)" % queued[0]) alreadyStored = True break if not alreadyStored: fileFormData.append( ("file", '{"path":"%s"%s}' % (queued[0], queued[1]))) if datasetMD and cleaned_dsmd: # Check for existing metadata from the site user alreadyAttached = False md_existing = download_metadata( None, hierarchy_host, clowder_key, dsid) for mdobj in md_existing: if 'agent' in mdobj and 'user_id' in mdobj['agent']: if mdobj['agent'][ 'user_id'] == "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_id: logger.info( " skipping metadata (already attached)" ) alreadyAttached = True break if not alreadyAttached: md = { "@context": [ "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", { "@vocab": clowder_context } ], "content": cleaned_dsmd, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_id } } dsmd = sess.post( clowder_host + "/api/datasets/" + dsid + "/metadata.jsonld", headers={'Content-Type': 'application/json'}, data=json.dumps(md)) if dsmd.status_code in [500, 502, 504]: logger.error( "[%s] failed to attach metadata (%s: %s)" % (ds, dsmd.status_code, dsmd.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['retry'] = "%s: %s" % ( dsmd.status_code, dsmd.text) response = "RETRY" elif dsmd.status_code != 200: logger.error( "[%s] failed to attach metadata (%s: %s)" % (ds, dsmd.status_code, dsmd.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['error'] = "%s: %s" % ( dsmd.status_code, dsmd.text) response = "ERROR" else: if datasetMDFile: logger.info( " [%s] added metadata from .json file" % ds, extra={ "dataset_name": ds, "dataset_id": dsid, "action": "METADATA ADDED", "metadata": datasetMD }) updatedTask['contents'][ds]['files'][ datasetMDFile][ 'metadata_loaded'] = True updatedTask['contents'][ds]['files'][ datasetMDFile][ 'clowder_id'] = "attached to dataset" writeTaskToDatabase(updatedTask) else: # Remove metadata from activeTasks on success even if file upload fails in next step, so we don't repeat md logger.info(" [%s] added metadata" % ds, extra={ "dataset_name": ds, "dataset_id": dsid, "action": "METADATA ADDED", "metadata": datasetMD }) del updatedTask['contents'][ds]['md'] writeTaskToDatabase(updatedTask) if len(fileFormData) > 0: # Upload collected files for this dataset # Boundary encoding from http://stackoverflow.com/questions/17982741/python-using-reuests-library-for-multipart-form-data logger.info(" [%s] uploading unprocessed files" % ds, extra={ "dataset_id": dsid, "dataset_name": ds, "action": "UPLOADING FILES", "filelist": fileFormData }) (content, header) = encode_multipart_formdata(fileFormData) fi = sess.post(clowder_host + "/api/uploadToDataset/" + dsid, headers={'Content-Type': header}, data=content) if fi.status_code in [104, 500, 502, 504]: logger.error( "[%s] failed to attach files (%s: %s)" % (ds, fi.status_code, fi.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['retry'] = "%s: %s" % ( fi.status_code, fi.text) response = "RETRY" elif fi.status_code != 200: logger.error( "[%s] failed to attach files (%s: %s)" % (ds, fi.status_code, fi.text)) updatedTask['contents'][ds]['files'][ datasetMDFile]['error'] = "%s: %s" % ( fi.status_code, fi.text) response = "ERROR" else: loaded = fi.json() if 'ids' in loaded: for fobj in loaded['ids']: logger.info(" [%s] added file %s" % (ds, fobj['name'])) for fkey in updatedTask['contents'][ds][ 'files']: if updatedTask['contents'][ds][ 'files'][fkey]['name'] == fobj[ 'name']: updatedTask['contents'][ds][ 'files'][fkey][ 'clowder_id'] = fobj['id'] # remove any previous retry/error messages if 'retry' in updatedTask[ 'contents'][ds]['files'][ fkey]: del ( updatedTask['contents'][ds] ['files'][fkey]['retry']) if 'error' in updatedTask[ 'contents'][ds]['files'][ fkey]: del ( updatedTask['contents'][ds] ['files'][fkey]['error']) break writeTaskToDatabase(updatedTask) else: logger.info(" [%s] added file %s" % (ds, lastFile)) updatedTask['contents'][ds]['files'][ lastFileKey]['clowder_id'] = loaded['id'] # remove any previous retry/error messages if 'retry' in updatedTask['contents'][ds][ 'files'][lastFileKey]: del (updatedTask['contents'][ds]['files'] [lastFileKey]['retry']) if 'error' in updatedTask['contents'][ds][ 'files'][lastFileKey]: del (updatedTask['contents'][ds]['files'] [lastFileKey]['error']) writeTaskToDatabase(updatedTask) return response else: logger.error("%s task: no credentials for Globus user %s" % (task['globus_id'], globUser)) return "ERROR"
action="store_true") args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) for inputfile in [args.left, args.right, args.meta]: if not os.path.isfile(inputfile): logger.error("Input does not exist: %s" % inputfile) sys.exit(1) logger.debug("Cleaning metadata.json contents") sensors = Sensors(base=args.output, station="ua-mac", sensor="rgb_geotiff") with open(args.meta, 'r') as mdfile: j = json.load(mdfile) md = clean_metadata(j, "stereoTop", fixed=True) logger.debug("Preparing embedded geotiff metadata") experiment_names = [] for e in md["experiment_metadata"]: experiment_names.append(e["name"]) tif_meta = { "datetime": str(md["gantry_variable_metadata"]["datetime"]), "sensor_id": str(md["sensor_fixed_metadata"]["sensor_id"]), "experiment_name": ", ".join(experiment_names), "extractor_name": "terra.stereo-rgb.bin2tif", "extractor_version": "1.1", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "Stereo RGB Image Bin to GeoTIFF Converter", "extractor_repo": "https://github.com/terraref/extractors-stereo-rgb.git" }
# Need one additional loop if there is a timestamp-level directory if TIMESTAMP_FOLDER and os.path.isdir(DATE_DIR): log("Scanning datasets in %s" % DATE_DIR) for timestamp in os.listdir(DATE_DIR): if timestamp.startswith('.'): continue TIMESTAMP_DIR = os.path.join(DATE_DIR, timestamp) DS_FILES = [] DS_META = {} # Find files and metadata in the directory for filename in os.listdir(TIMESTAMP_DIR): if filename[0] != ".": FILEPATH = os.path.join(TIMESTAMP_DIR, filename) if filename.find("metadata.json") > -1: DS_META = clean_metadata(loadJsonFile(FILEPATH), sensor) else: DS_FILES.append(FILEPATH) upload_ds(CONN, CLOWDER_HOST, CLOWDER_KEY, sensor, date, timestamp, DS_FILES, DS_META) #failz() # Otherwise the date is the dataset level elif os.path.isdir(DATE_DIR): log("Scanning datasets in %s" % SENSOR_DIR) DS_FILES = [] DS_META = {} for filename in os.listdir(DATE_DIR): if filename[0] != ".": FILEPATH = os.path.join(DATE_DIR, filename)
def read_metadata(): md_file = dire + 'metadata.json' with open(md_file, 'r') as jsonfile: md_data = json.load(jsonfile) cleanmetadata = clean_metadata(md_data, "scanner3DTop") return cleanmetadata
def upload_dataset(dataset_path, level, product, timestamp, sess, logfile): contents = os.listdir(dataset_path) if len(contents) == 0: logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "ERR: No files found")) return False # Find and prepare the metadata clean_md = None if product == "scanner3DTop" and level == "Level_1": # Special check between Level_1/raw_data for scanner3DTop only path3d = dataset_path.replace("Level_1", "raw_data") contents3d = os.listdir(path3d) for f in contents3d: if f.endswith("_metadata.json"): md = load_json_file(os.path.join(path3d, f)) clean_md = clean_metadata(md, product) if dry_run: print("...%s successfully cleaned." % os.path.join(path3d, f)) else: for f in contents: if f.endswith("_metadata.json"): md = load_json_file(os.path.join(dataset_path, f)) clean_md = clean_metadata(md, product) if dry_run: print("...%s successfully cleaned." % os.path.join(dataset_path, f)) elif f.endswith("_metadata_cleaned.json"): clean_md = load_json_file(os.path.join(dataset_path, f)) if dry_run: print("...%s successfully loaded." % os.path.join(dataset_path, f)) if clean_md is None and product is not "EnvironmentLogger": logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "ERR: No metadata found")) return False # Create the dataset in Clowder season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, product, clean_md) YYYY = timestamp[:4] MM = timestamp[5:7] DD = timestamp[8:10] dataset_name = "%s - %s" % (product, timestamp) if not dry_run: dsid = build_dataset_hierarchy_crawl(clowder_host, clowder_admin_key, clowder_user, clowder_pass, clowder_space, season_name, experiment_name, product, YYYY, MM, DD, dataset_name) else: dsid = "JustPretend" logfile.write('%s,%s,"%s",%s\n' % (level, product, dataset_path, "OK: %s" % dsid)) # Upload metadata if not dry_run and product is not "EnvironmentLogger": sess.post( "%sapi/datasets/%s/metadata.jsonld" % (clowder_host, dsid), headers={'Content-Type': 'application/json'}, data=json.dumps({ "@context": [ "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld", { "@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#" } ], "content": clean_md, "agent": { "@type": "cat:user", "user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % clowder_userid } })) # Add each file for f in contents: if not (f.endswith("_metadata.json") or f.endswith("_metadata_cleaned.json")): filepath = os.path.join(dataset_path, f) if not dry_run: upload_to_dataset(conn, clowder_host, clowder_user, clowder_pass, dsid, filepath) else: print("...would upload %s" % f) return True