Exemplo n.º 1
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        f = resource['local_paths'][0]

        self.log_info(resource, "determining image quality")
        qual = getImageQuality(f)

        self.log_info(resource, "creating output image")
        md = download_ds_metadata(connector, host, secret_key,
                                  resource['parent']['id'])
        terramd = get_terraref_metadata(md)
        if "left" in f:
            bounds = geojson_to_tuples(
                terramd['spatial_metadata']['left']['bounding_box'])
        else:
            bounds = geojson_to_tuples(
                terramd['spatial_metadata']['right']['bounding_box'])
        output = f.replace(".tif", "_nrmac.tif")
        create_geotiff(np.array([[qual, qual], [qual, qual]]), bounds, output)
        upload_to_dataset(connector, host, self.clowder_user,
                          self.clowder_pass, resource['parent']['id'], output)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"quality_score": qual}, 'file')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        self.end_message(resource)
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.force:
                # Check NRMAC score > 15 before proceeding if available
                nrmac_md = get_extractor_metadata(md, "terra.stereo-rgb.nrmac")
                if not (nrmac_md and 'left_quality_score' in nrmac_md):
                    self.log_skip(resource,
                                  "NRMAC quality score not available")
                    return CheckMessage.ignore
                elif float(nrmac_md['left_quality_score']) > self.threshold:
                    self.log_skip(
                        resource,
                        "NRMAC quality score %s is above threshold of %s" %
                        (float(
                            nrmac_md['left_quality_score']), self.threshold))
                    return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                                opts=['left'])
                right_enh_tiff = self.sensors.create_sensor_path(
                    timestamp, opts=['right'])
                if file_exists(left_enh_tiff) and file_exists(right_enh_tiff):
                    if contains_required_files(resource, [
                            os.path.basename(left_enh_tiff),
                            os.path.basename(right_enh_tiff)
                    ]):
                        self.log_skip(
                            resource,
                            "metadata v%s and outputs already exist" %
                            self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(
                            resource,
                            "output files exist but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found")
            return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.bin', '_right.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if not self.overwrite and get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if file_exists(left_tiff) and file_exists(right_tiff):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_error(resource, "no terraref metadata found; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return CheckMessage.ignore
Exemplo n.º 4
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for an _ir.bin file before beginning processing
        if not contains_required_files(resource, ['_ir.bin']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                tif = self.sensors.get_sensor_path(timestamp)
                png = tif.replace(".tif", ".png")
                if file_exists(png) and file_exists(tif):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
Exemplo n.º 5
0
    def process_message_individual(self, connector, host, secret_key, resource,
                                   parameters):
        """This is deprecated method that operates on single capture, not field mosaic"""
        self.start_message()

        input_image = resource['local_paths'][0]

        # Create output in same directory as input, but check name
        ds_md = get_info(connector, host, secret_key, resource['parent']['id'])
        terra_md = get_terraref_metadata(
            download_metadata(connector, host, secret_key,
                              resource['parent']['id']), 'stereoTop')
        dataset_name = ds_md['name']
        timestamp = dataset_name.split(" - ")[1]

        # Is this left or right half?
        side = 'left' if resource['name'].find("_left") > -1 else 'right'
        gps_bounds = geojson_to_tuples(
            terra_md['spatial_metadata'][side]['bounding_box'])
        out_csv = self.sensors.create_sensor_path(timestamp,
                                                  opts=[side],
                                                  ext='csv')
        out_dgci = out_csv.replace(".csv", "_dgci.png")
        out_edge = out_csv.replace(".csv", "_edge.png")
        out_label = out_csv.replace(".csv", "_label.png")
        out_dgci_tif = out_dgci.replace('.png', '.tif')
        out_edge_tif = out_edge.replace('.png', '.tif')
        out_label_tif = out_label.replace('.png', '.tif')

        self.generate_all_outputs(input_image, out_csv, out_dgci, out_edge,
                                  out_label, gps_bounds)

        fileids = []
        for file_to_upload in [
                out_csv, out_dgci_tif, out_edge_tif, out_label_tif
        ]:
            if os.path.isfile(file_to_upload):
                if file_to_upload not in resource['local_paths']:
                    # TODO: Should this be written to a separate dataset?
                    #target_dsid = build_dataset_hierarchy(connector, host, secret_key, self.clowderspace,
                    #                                      self.sensors.get_display_name(),
                    #                                      timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=dataset_name)

                    # Send output to Clowder source dataset
                    fileids.append(
                        upload_to_dataset(connector, host, secret_key,
                                          resource['parent']['id'],
                                          file_to_upload))
                self.created += 1
                self.bytes += os.path.getsize(file_to_upload)

        # Add metadata to original dataset indicating this was run
        ext_meta = build_metadata(host, self.extractor_info,
                                  resource['parent']['id'],
                                  {"files_created": fileids}, 'dataset')
        upload_metadata(connector, host, secret_key, resource['parent']['id'],
                        ext_meta)

        self.end_message()
Exemplo n.º 6
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        if not contains_required_files(
                resource,
            ['raw', 'raw.hdr', 'image.jpg', 'frameIndex.txt', 'settings.txt']):
            self.log_skip(resource, "missing required files")
            return CheckMessage.ignore

        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_fullname = 'swir_netcdf'
        else:
            sensor_fullname = 'vnir_netcdf'

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                out_nc = self.sensors.get_sensor_path(timestamp,
                                                      sensor=sensor_fullname)
                if file_exists(out_nc):
                    self.log_skip(
                        resource, "metadata v%s and outputs already exist" %
                        self.extractor_info['version'])
                    return CheckMessage.ignore
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            # See if we can recover it from disk
            if sensor_fullname == 'vnir_netcdf':
                date = timestamp.split("__")[0]
                source_dir = "/home/extractor/sites/ua-mac/raw_data/VNIR/%s/%s/" % (
                    date, timestamp)
                for f in os.listdir(source_dir):
                    if f.endswith("_metadata.json"):
                        self.log_info(resource,
                                      "updating metadata from %s" % f)
                        raw_dsmd = load_json_file(os.path.join(source_dir, f))
                        clean_md = clean_metadata(raw_dsmd, 'VNIR')
                        complete_md = build_metadata(host, self.extractor_info,
                                                     resource['id'], clean_md,
                                                     'dataset')
                        remove_metadata(connector, host, secret_key,
                                        resource['id'])
                        upload_metadata(connector, host, secret_key,
                                        resource['id'], complete_md)
                        return CheckMessage.download
            return CheckMessage.ignore
Exemplo n.º 7
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        if not contains_required_files(resource, ['_left.tif', '_right.tif']):
            self.log_skip(resource, "missing required files")
            # Check for raw_data_source in metadata and resumbit to bin2tif if available...
            md = download_metadata(connector, host, secret_key, resource['id'])
            terra_md = get_terraref_metadata(md)
            if 'raw_data_source' in terra_md:
                raw_id = str(terra_md['raw_data_source'].split("/")[-1])
                self.log_info(resource, "submitting raw source %s to bin2tif" % raw_id)
                submit_extraction(connector, host, secret_key, raw_id, "terra.stereo-rgb.bin2tif")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            if get_extractor_metadata(md, self.extractor_info['name'], self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_nrmac_tiff)) or (
                                not self.leftonly and file_exists(left_nrmac_tiff) and file_exists(right_nrmac_tiff)):
                    if contains_required_files(resource, [os.path.basename(left_nrmac_tiff)]):
                        self.log_skip(resource, "metadata v%s and outputs already exist" % self.extractor_info['version'])
                        return CheckMessage.ignore
                    else:
                        self.log_info(resource, "output file exists but not yet uploaded")
            # Have TERRA-REF metadata, but not any from this extractor
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
Exemplo n.º 8
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download
        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check for a left and right BIN file - skip if not found
        found_left = False
        found_right = False
        for f in resource['files']:
            if 'filename' in f:
                if f['filename'].endswith('_left.bin'):
                    found_left = True
                elif f['filename'].endswith('_right.bin'):
                    found_right = True
        if not (found_left and found_right):
            self.log_skip(
                resource,
                "found left: %s, right: %s" % (found_left, found_right))
            return CheckMessage.ignore

        # Check if outputs already exist unless overwrite is forced - skip if found
        if not self.overwrite:
            timestamp = resource['dataset_info']['name'].split(" - ")[1]
            lbase = self.sensors.get_sensor_path(timestamp,
                                                 opts=['left'],
                                                 ext='')
            rbase = self.sensors.get_sensor_path(timestamp,
                                                 opts=['right'],
                                                 ext='')
            out_dir = os.path.dirname(lbase)
            if (os.path.isfile(lbase + 'tif')
                    and os.path.isfile(rbase + 'tif')):
                self.log_skip(resource, "outputs found in %s" % out_dir)
                return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_extractor_metadata(
                md, self.extractor_info['name']) and not self.overwrite:
            self.log_skip("metadata indicates it was already processed")
            return CheckMessage.ignore
        if get_terraref_metadata(md):
            return CheckMessage.download
        else:
            self.log_skip("no terraref metadata found")
            return CheckMessage.ignore
Exemplo n.º 9
0
def do_work(left_file, right_file, json_file):
    """Make the calls to convert the files
    Args:
        left_file(str): Path to the left BIN file
        right_file(str): Path to the right BIN file
        json_file(str): Path to the JSON file
    """
    out_left = os.path.splitext(left_file)[0] + ".tif"
    out_right = os.path.splitext(right_file)[0] + ".tif"
    file_name, file_ext = os.path.splitext(json_file)
    out_json = file_name + "_updated" + file_ext

    # Load the JSON
    with open(json_file, "r") as infile:
        metadata = json.load(infile)
        if not metadata:
            raise RuntimeError("JSON file appears to be invalid: " + json_file)
        md_len = len(metadata)
        if md_len <= 0:
            raise RuntimeError("JSON file is empty: " + json_file)

    # Prepare the metadata
    clean_md = get_terraref_metadata(clean_metadata(metadata, 'stereoTop'), 'stereoTop')

    # Pull out the information we need from the JSON
    try:
        left_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'left')
        gps_bounds_left = geojson_to_tuples(clean_md['spatial_metadata']['left']['bounding_box'])
        right_shape = terraref.stereo_rgb.get_image_shape(clean_md, 'right')
        gps_bounds_right = geojson_to_tuples(clean_md['spatial_metadata']['right']['bounding_box'])
    except KeyError:
        print("ERROR: Spatial metadata not properly identified in JSON file")
        return
 
    # Make the conversion calls
    print("creating %s" % out_left)
    left_image = terraref.stereo_rgb.process_raw(left_shape, left_file, None)
    create_geotiff(left_image, gps_bounds_left, out_left, asfloat=False, system_md=clean_md, compress=False)

    print("creating %s" % out_right)
    right_image = terraref.stereo_rgb.process_raw(right_shape, right_file, None)
    create_geotiff(right_image, gps_bounds_right, out_right, asfloat=False, system_md=clean_md, compress=True)

    # Write the metadata
    print("creating %s" % out_json)
    with open(out_json, "w") as outfile:
        json.dump(clean_md, outfile, indent=4)
Exemplo n.º 10
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if parameters["rulechecked"]:
            return CheckMessage.download

        if not is_latest_file(resource):
            return CheckMessage.ignore

        # Check for an ir.BIN file and metadata before beginning processing
        found_ir = None
        found_md = None
        for f in resource['files']:
            if 'filename' in f and f['filename'].endswith('_ir.bin'):
                found_ir = f['filepath']
            elif 'filename' in f and f['filename'].endswith('_metadata.json'):
                found_md = f['filepath']

        if found_ir:
            # Check if outputs already exist
            timestamp = resource['dataset_info']['name'].split(" - ")[1]
            png_path = self.sensors.get_sensor_path(timestamp, ext='png')
            tiff_path = self.sensors.get_sensor_path(timestamp)

            if os.path.exists(png_path) and os.path.exists(
                    tiff_path) and not self.overwrite:
                logging.getLogger(__name__).info(
                    "skipping dataset %s, outputs already exist" %
                    resource['id'])
                return CheckMessage.ignore

            # If we don't find _metadata.json file, check if we have metadata attached to dataset instead
            if not found_md:
                md = download_metadata(connector, host, secret_key,
                                       resource['id'])
                if get_extractor_metadata(
                        md,
                        self.extractor_info['name']) and not self.overwrite:
                    logging.getLogger(__name__).info(
                        "skipping dataset %s, already processed" %
                        resource['id'])
                    return CheckMessage.ignore
                if get_terraref_metadata(md):
                    return CheckMessage.download
                return CheckMessage.ignore
            else:
                return CheckMessage.download
        return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        # Check for 0000-0101 bin files before beginning processing
        if len(resource['files']) < 102:
            self.log_skip(resource, "less than 102 files found")
            return CheckMessage.ignore
        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.get_sensor_path(timestamp, opts=['combined_hist'])
        coloredImg_path = self.sensors.get_sensor_path(timestamp, opts=['combined_pseudocolored'])

        # Count number of bin files in dataset, as well as number of existing outputs
        ind_add = 0
        ind_output = 0
        for ind in range(0, 102):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            for f in resource['files']:
                if f['filename'].endswith(format_ind+'.bin'):
                    ind_add += 1
                    out_png = self.sensors.get_sensor_path(timestamp, opts=[format_ind])
                    if os.path.exists(out_png) and not self.overwrite:
                        ind_output += 1
                    break

        # Do the outputs already exist?
        if ind_output == 102 and os.path.exists(hist_path) and os.path.exists(coloredImg_path):
            self.log_skip(resource, "outputs already exist")
            return CheckMessage.ignore
        # Do we have too few input BIN files?
        if ind_add < 102:
            self.log_skip(resource, "less than 102 .bin files found")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_extractor_metadata(md, self.extractor_info['name']) and not self.overwrite:
            self.log_skip(resource, "metadata indicates it was already processed")
            return CheckMessage.ignore
        if get_terraref_metadata(md):
            return CheckMessage.download
        else:
            self.log_skip(resource, "no terraref metadata found")
            return CheckMessage.ignore
    def check_message(self, connector, host, secret_key, resource, parameters):
        #if not is_latest_file(resource):
        #	return CheckMessage.ignore

        # Adjust sensor path based on VNIR vs SWIR
        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_fullname = 'swir_netcdf'
        else:
            sensor_fullname = 'vnir_netcdf'

        if has_all_files(resource):
            # Check if output already exists
            timestamp = resource['dataset_info']['name'].split(" - ")[1]
            outFilePath = self.sensors.get_sensor_path(timestamp,
                                                       sensor=sensor_fullname)

            if os.path.exists(outFilePath) and not self.overwrite:
                logging.getLogger(__name__).info(
                    'skipping dataset %s, output file already exists' %
                    resource['id'])
                return CheckMessage.ignore
            else:
                # Check if we have necessary metadata, either as a .json file or attached to dataset
                md = download_metadata(connector, host, secret_key,
                                       resource['id'],
                                       self.extractor_info['name'])
                if get_extractor_metadata(
                        md,
                        self.extractor_info['name']) and not self.overwrite:
                    logging.getLogger(__name__).info(
                        "skipping dataset %s, already processed" %
                        resource['id'])
                    return CheckMessage.ignore
                elif get_terraref_metadata(md):
                    return CheckMessage.download
                else:
                    for f in resource['files']:
                        if f['filename'] == 'metadata.json':
                            return CheckMessage.download
                    return CheckMessage.ignore
        else:
            logging.getLogger(__name__).info(
                'skipping dataset %s, not all input files are ready' %
                resource['id'])
            return CheckMessage.ignore
Exemplo n.º 13
0
def get_clowder_metadata(key, timestamp):
    resp = requests.get(
        "https://terraref.ncsa.illinois.edu/clowder/api/datasets?key=%s&exact=true&title=stereoTop - %s"
        % (key, timestamp))
    resp.raise_for_status()

    datasetId = resp.json()[0]["id"]

    resp = requests.get(
        "https://terraref.ncsa.illinois.edu/clowder/api/datasets/%s/metadata.jsonld?key=%s"
        % (datasetId, key))
    resp.raise_for_status()

    content = resp.json()[1]["content"]

    metadata = get_terraref_metadata(content, 'stereoTop')

    return metadata
Exemplo n.º 14
0
    def check_message(self, connector, host, secret_key, resource, parameters):
        if "rulechecked" in parameters and parameters["rulechecked"]:
            return CheckMessage.download

        self.start_check(resource)

        if not is_latest_file(resource):
            self.log_skip(resource, "not latest file")
            return CheckMessage.ignore

        # Check metadata to verify we have what we need
        md = download_metadata(connector, host, secret_key, resource['id'])
        if get_terraref_metadata(md):
            # Check for a left and right TIF file - skip if not found
            # If we're only processing the left files, don't check for the right file
            needed_files = ['_left.tif']
            if not self.leftonly:
                needed_files.append('_right.tif')
            if not contains_required_files(resource, needed_files):
                self.log_skip(resource, "missing required files")
                return CheckMessage.ignore

            if get_extractor_metadata(md, self.extractor_info['name'],
                                      self.extractor_info['version']):
                # Make sure outputs properly exist
                timestamp = resource['dataset_info']['name'].split(" - ")[1]
                left_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
                right_mask_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
                if (self.leftonly and file_exists(left_mask_tiff)) or \
                   (not (file_exists(left_mask_tiff) and file_exists(right_mask_tiff))):
                    self.log_skip(resource, "metadata v%s and outputs already exist" % \
                                  self.extractor_info['version'])
                    return CheckMessage.ignore
        # Check for other images to create a mask on
        elif not contains_required_files(resource, ['.tif']):
            self.log_skip(resource, "missing required tiff file")
            return CheckMessage.ignore

        # Have TERRA-REF metadata, but not any from this extractor
        return CheckMessage.download
Exemplo n.º 15
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message()

		# Find input files in dataset
		target_files = {
			'raw': None,
			'raw.hdr': None,
			'image.jpg': None,
			'frameIndex.txt': None,
			'settings.txt': None,
			"_metadata.json": None
		}

		metafile = None
		ds_metafile = None
		last_path = None
		path_match = None
		tempdir = None
		symlinks = []
		for f in resource['local_paths']:
			for fileExt in target_files.keys():
				if f.endswith(fileExt):
					if fileExt != '_metadata.json':
						filedir = os.path.dirname(f)
						if not last_path:
							last_path = filedir
						else:
							if filedir != last_path:
								path_match = False
							last_path = filedir
						target_files[fileExt] = {'filename': os.path.basename(f), 'path': f}
					else:
						if f.endswith('/_dataset_metadata.json'):
							ds_metafile = f
						elif not f.endswith('/_metadata.json'):
							metafile = f
							target_files['_metadata.json'] = {'filename': os.path.basename(metafile),
															  'path': metafile}

		# Identify md file either with other dataset files, or attached to Clowder dataset
		if metafile == None:
			if ds_metafile != None:
				# Found dataset metadata, so check for the .json file alongside other files
				logging.getLogger(__name__).info("...checking for local metadata file alongside other files")
				ds_dir = os.path.dirname(target_files['raw']['path'])
				for ds_f in os.path.listdir(ds_dir):
					if ds_f.endswith("_metadata.json"):
						target_files['_metadata.json']['path'] = os.path.join(ds_dir, ds_f)
			else:
				raise ValueError('could not locate metadata for %s' % resource['id'])

		# Create symlinks in one directory if inputs aren't in the same one
		if not path_match:
			tempdir = tempfile.mkdtemp()
			for f in target_files.keys():
				currf = target_files[f]
				if currf['filename'] == '_dataset_metadata.json':
					# Open the temporary file and change the JSON content format
					with open(currf['path'], 'r') as mdfile:
						jsondata = json.load(mdfile)
					md = get_terraref_metadata(jsondata)
					with open(currf['path'], 'w') as mdfile:
						json.dump(md, mdfile)
					newf = os.path.join(tempdir, target_files['raw']['filename'].replace("_raw","")+'_metadata.json')
				else:
					newf = os.path.join(tempdir, currf['filename'])
				os.symlink(currf['path'], newf)
				symlinks.append(newf)


		# Adjust sensor path based on VNIR vs SWIR and check for soil mask
		timestamp = resource['dataset_info']['name'].split(" - ")[1]
		if resource['dataset_info']['name'].find("SWIR") > -1:
			sensor_fullname = 'swir_netcdf'
			soil_mask = None
		else:
			sensor_fullname = 'vnir_netcdf'
			# Check for corresponding soil mask to include in workflow.sh if available
			soil_mask = self.sensors.get_sensor_path(timestamp, sensor='vnir_soil_masks', opts=['soil_mask'])
		outFilePath = self.sensors.create_sensor_path(timestamp, sensor=sensor_fullname)

		# Invoke terraref.sh
		logging.getLogger(__name__).info('invoking hyperspectral_workflow.sh to create: %s' % outFilePath)
		# TODO: Move this
		script_path = "/projects/arpae/terraref/shared/extractors/extractors-hyperspectral/hyperspectral/hyperspectral_workflow.sh"
		if soil_mask:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										  "-m", soil_mask, "--new_clb_mth",
										  "-i", target_files['raw']['path'], "-o", outFilePath])
		else:
			returncode = subprocess.call(["bash", script_path, "-d", "1", "-h",
										 "--new_clb_mth",
										 "-i", target_files['raw']['path'], "-o", outFilePath])

		# Verify outfile exists and upload to clowder
		logging.getLogger(__name__).info('done creating output file (%s)' % (returncode))
		if returncode != 0:
			raise ValueError('script encountered an error')
		if os.path.exists(outFilePath):
			if returncode == 0:
				if outFilePath not in resource['local_paths']:
					target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
														  self.sensors.get_display_name(sensor=sensor_fullname),
														  timestamp[:4], timestamp[:7], timestamp[:10],
														  leaf_ds_name=self.sensors.get_display_name(sensor=sensor_fullname)+' - '+timestamp)

					logging.getLogger(__name__).info('uploading %s' % outFilePath)
					upload_to_dataset(connector, host, secret_key, target_dsid, outFilePath)
				self.created += 1
				self.bytes += os.path.getsize(outFilePath)
		else:
			logging.getLogger(__name__).error('no output file was produced')

		# Send indices to betyDB
		ind_file = self.sensors.get_sensor_path(timestamp, sensor=sensor_fullname, opts=['_ind'])
		with Dataset(ind_file, "r") as netCDF_handle:
			ndvi = netCDF_handle.get_variables_by_attributes(
					standard_name='normalized_difference_chlorophyll_index_750_705')
			NDVI705 = ndvi[0].getValue().ravel()[0]

			# TODO: Create CSV using ndviVal as primary key
			tmp_csv = 'traits.csv'
			plot_no = 'Full Field'
			csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
						 'citation_author,citation_year,citation_title,method'
			csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
					   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
							timestamp, NDVI705, plot_no)
			with open(tmp_csv, 'w') as c:
				c.write(csv_header+'\n'+csv_vals)

		submit_traits(tmp_csv, bety_key=self.bety_key)

		# Remove symlinks and temp directory
		for sym in symlinks:
			os.remove(sym)
		if tempdir:
			os.rmdir(tempdir)

		self.end_message()
Exemplo n.º 16
0
	def process_message(self, connector, host, secret_key, resource, parameters):
		self.start_message(resource)

		sensor_type, timestamp = resource['name'].split(" - ")

		# First, re-check metadata to verify it hasn't been added in meantime
		ds_md = download_metadata(connector, host, secret_key, resource['id'])
		terra_md = get_terraref_metadata(ds_md)
		if terra_md:
			self.log_info(resource, "Found TERRA-REF metadata; not cleaning")
			return

		# These datasets do not have TERRA md
		uncleanables = ["Full Field"]
		if sensor_type in uncleanables:
			self.log_info(resource, "Cannot clean metadata for %s" % sensor_type)
			return

		# For these datasets, we must get TERRA md from raw_data source
		lv1_types = {"RGB GeoTIFFs": "stereoTop",
					 "Thermal IR GeoTIFFs": "flirIrCamera"}
		if sensor_type in lv1_types:
			raw_equiv = resource['name'].replace(sensor_type, lv1_types[sensor_type])
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(raw_equiv))
		else:
			# Search for metadata.json source file
			source_dir = os.path.dirname(self.sensors.get_sensor_path_by_dataset(resource['name']))
		source_dir = self.remapMountPath(connector, source_dir)

		if self.delete:
			# Delete all existing metadata from this dataset
			self.log_info(resource, "Deleting existing metadata")
			delete_dataset_metadata(host, self.clowder_user, self.clowder_pass, resource['id'])

		# TODO: split between the PLY files (in Level_1) and metadata.json files - unique to this sensor
		if sensor_type == "scanner3DTop":
			source_dir = source_dir.replace("Level_1", "raw_data")

		self.log_info(resource, "Searching for metadata.json in %s" % source_dir)
		if os.path.isdir(source_dir):
			md_file = None
			for f in os.listdir(source_dir):
				if f.endswith("metadata.json"):
					md_file = os.path.join(source_dir, f)
			if md_file:
				self.log_info(resource, "Found metadata.json; cleaning")
				md_json = clean_metadata(load_json_file(md_file), sensor_type)
				format_md = {
					"@context": ["https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld",
								 {"@vocab": "https://terraref.ncsa.illinois.edu/metadata/uamac#"}],
					"content": md_json,
					"agent": {
						"@type": "cat:user",
						"user_id": "https://terraref.ncsa.illinois.edu/clowder/api/users/%s" % self.userid
					}
				}
				self.log_info(resource, "Uploading cleaned metadata")
				upload_metadata(connector, host, secret_key, resource['id'], format_md)

				# Now trigger a callback extraction if given
				if len(self.callback) > 0:
					self.log_info(resource, "Submitting callback extraction to %s" % self.callback)
					submit_extraction(connector, host, secret_key, resource['id'], self.callback)
				else:
					callbacks = self.get_callbacks_by_sensor(sensor_type)
					if callbacks:
						for c in callbacks:
							self.log_info(resource, "Submitting callback extraction to %s" % c)
							submit_extraction(connector, host, secret_key, resource['id'], c)
					else:
						self.log_info(resource, "No default callback found for %s" % sensor_type)
			else:
				self.log_error(resource, "metadata.json not found in %s" % source_dir)

		else:
			self.log_error(resource, "%s could not be found" % source_dir)

		# TODO: Have extractor check for existence of Level_1 output product and delete if exists?

		self.end_message(resource)
Exemplo n.º 17
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                metadata = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, metadata]:
            self.log_error(
                "could not locate each of left+right+metadata in processing")
            raise ValueError(
                "could not locate each of left+right+metadata in processing")

        # Determine output location & filenames
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image shapes & gps bounds")
        left_shape = bin2tiff.get_image_shape(metadata, 'left')
        right_shape = bin2tiff.get_image_shape(metadata, 'right')
        left_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['left']['bounding_box'])
        right_gps_bounds = geojson_to_tuples(
            metadata['spatial_metadata']['right']['bounding_box'])
        out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                    resource['id'].encode('utf8'))

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        if (not os.path.isfile(left_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % left_tiff)
            left_image = bin2tiff.process_image(left_shape, img_left, None)
            # Rename output.tif after creation to avoid long path errors
            create_geotiff(left_image, left_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            # TODO: we're moving zero byte files
            shutil.move(out_tmp_tiff, left_tiff)
            if left_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           left_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)

        if (not os.path.isfile(right_tiff)) or self.overwrite:
            self.log_info(resource, "creating & uploading %s" % right_tiff)
            right_image = bin2tiff.process_image(right_shape, img_right, None)
            create_geotiff(right_image, right_gps_bounds, out_tmp_tiff, None,
                           False, self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, right_tiff)
            if right_tiff not in resource['local_paths']:
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            else:
                self.log_info(
                    resource,
                    "file found in dataset already; not re-uploading")
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, resource['id'],
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], ext_meta)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        self.log_info(resource, "uploading LemnaTec metadata")
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message(resource)
Exemplo n.º 18
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # clean tmp directory from any potential failed previous runs
        flist = os.listdir("/tmp")
        for f in flist:
            try:
                os.remove(os.path.join("/tmp", f))
            except:
                pass
        """ if file is above configured limit, skip it
		max_gb = 24 # RAM has 4x requirement, e.g. 24GB requires 96GB RAM
		for fname in resource['local_paths']:
			if fname.endswith('raw'): rawfile = fname
		rawsize = os.stat(rawfile).st_size
		if rawsize > max_gb * 1000000000:
			self.log_skip(resource, "filesize %sGB exceeds available RAM" % int(rawsize/1000000000))
			return False
		"""

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        if resource['dataset_info']['name'].find("SWIR") > -1:
            sensor_rawname = 'SWIR'
            sensor_fullname = 'swir_netcdf'
            soil_mask = None
        else:
            sensor_rawname = 'VNIR'
            sensor_fullname = 'vnir_netcdf'
            # Check for corresponding soil mask to include in workflow.sh if available
            soil_mask = self.sensors.get_sensor_path(timestamp,
                                                     sensor='vnir_soil_masks',
                                                     opts=['soil_mask'])
        out_nc = self.sensors.create_sensor_path(timestamp,
                                                 sensor=sensor_fullname)
        xps_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['xps'])
        ind_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname,
                                                opts=['ind'])
        csv_file = self.sensors.get_sensor_path(timestamp,
                                                sensor=sensor_fullname.replace(
                                                    "_netcdf", "_traits"))

        raw_file, terra_md_full = None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, sensor_rawname)
            elif fname.endswith('raw'):
                raw_file = fname
        if None in [raw_file, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(
            timestamp, sensor_rawname, terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        print_name = self.sensors.get_display_name(sensor=sensor_fullname)
        self.log_info(
            resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" %
            (season_name, experiment_name, print_name, timestamp[:4],
             timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            season_name,
            experiment_name,
            print_name,
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        uploaded_file_ids = []

        # Perform actual processing
        if (not file_exists(out_nc)) or self.overwrite:
            """TODO: OLD AND NOT USED
			self.log_info(resource, 'invoking hyperspectral_workflow.sh to create: %s' % out_nc)
			if soil_mask and file_exists(soil_mask):
				# If soil mask exists, we can generate an _ind indices file
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
										  "-m", soil_mask, "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			else:
				# Otherwise we cannot, and need to trigger soilmask extractor and circle back later
				returncode = subprocess.call(["bash", "hyperspectral_workflow.sh", "-d", "1", "-h",
											  "--output_xps_img", xps_file, "-i", raw_file, "-o", out_nc]) # disable --new_clb_mth
			if returncode != 0:
				raise ValueError('script encountered an error')
			"""

            self.log_info(resource,
                          'invoking python calibration to create: %s' % out_nc)
            create_empty_netCDF(raw_file, out_nc)
            self.log_info(resource, 'applying calibration to: %s' % out_nc)
            apply_calibration(raw_file, out_nc)
            self.log_info(resource, '...done' % raw_file)

            found_in_dest = check_file_in_dataset(connector,
                                                  host,
                                                  secret_key,
                                                  target_dsid,
                                                  out_nc,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, out_nc)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(out_nc)

            # TODO: Still compatible?
            #if not soil_mask:
            #	self.log_info(resource, "triggering soil mask extractor on %s" % fileid)
            #	submit_extraction(connector, host, secret_key, fileid, "terra.sunshade.soil_removal")

            # TODO: Sent output to BETYdb
            """
			# Send indices to betyDB
			if file_exists(ind_file):
				# TODO: Use ncks to trim ind_file to plots before this step
				plot_no = 'Full Field'
	
				with Dataset(ind_file, "r") as netCDF_handle:
					ndvi = netCDF_handle.get_variables_by_attributes(standard_name='normalized_difference_chlorophyll_index_750_705')
					NDVI705 = ndvi[0].getValue().ravel()[0]
	
					# TODO: Map the remaining ~50 variables in BETY to create indices file
					# TODO: In netCDF header,
	
					csv_header = 'local_datetime,NDVI705,access_level,species,site,' \
								 'citation_author,citation_year,citation_title,method'
					csv_vals = '%s,%s,2,Sorghum bicolor,%s,"Butowsky, Henry",2016,' \
							   'Maricopa Field Station Data and Metadata,Hyperspectral NDVI705 Indices' % (
									timestamp, NDVI705, plot_no)
					with open(csv_file, 'w') as c:
						c.write(csv_header+'\n'+csv_vals)
	
				# TODO: Send this CSV to betydb & geostreams extractors instead
				submit_traits(csv_file, bety_key=self.bety_key)
			"""

        self.end_message(resource)
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get bin files and metadata
        metadata = None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, "ps2Top")
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_metadata.json') and f.find('/_metadata.json') == -1 and metadata is None:
                metadata = load_json_file(f)
        frames = {}
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            for f in resource['local_paths']:
                if f.endswith(format_ind+'.bin'):
                    frames[ind] = f
        if None in [metadata] or len(frames) < 101:
            self.log_error(resource, 'could not find all of frames/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        hist_path = self.sensors.create_sensor_path(timestamp, opts=['combined_hist'])
        coloredImg_path = self.sensors.create_sensor_path(timestamp, opts=['combined_pseudocolored'])
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)

        (img_width, img_height) = self.get_image_dimensions(metadata)
        gps_bounds = geojson_to_tuples(metadata['spatial_metadata']['ps2Top']['bounding_box'])

        self.log_info(resource, "image dimensions (w, h): (%s, %s)" % (img_width, img_height))

        png_frames = {}
        # skip 0101.bin since 101 is an XML file that lists the frame times
        for ind in range(0, 101):
            format_ind = "{0:0>4}".format(ind) # e.g. 1 becomes 0001
            png_path = self.sensors.create_sensor_path(timestamp, opts=[format_ind])
            tif_path = png_path.replace(".png", ".tif")
            png_frames[ind] = png_path
            if not os.path.exists(png_path) or self.overwrite:
                self.log_info(resource, "generating and uploading %s" % png_path)
                pixels = np.fromfile(frames[ind], np.dtype('uint8')).reshape([int(img_height), int(img_width)])
                create_image(pixels, png_path)
                create_geotiff(pixels, gps_bounds, tif_path, None, False, self.extractor_info, metadata)

                if png_path not in resource['local_paths']:
                    fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
                    uploaded_file_ids.append(fileid)
                self.created += 1
                self.bytes += os.path.getsize(png_path)

        # Generate aggregate outputs
        self.log_info(resource, "generating aggregates")
        if not (os.path.exists(hist_path) and os.path.exists(coloredImg_path)) or self.overwrite:
            # TODO: Coerce histogram and pseudocolor to geotiff?
            self.analyze(int(img_width), int(img_height), png_frames, hist_path, coloredImg_path)
            self.created += 2
            self.bytes += os.path.getsize(hist_path) + os.path.getsize(coloredImg_path)
        if hist_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, hist_path)
            uploaded_file_ids.append(fileid)
        if coloredImg_path not in resource['local_paths']:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, coloredImg_path)
            uploaded_file_ids.append(fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid, {
                                  "files_created": uploaded_file_ids}, 'dataset')
        self.log_info(resource, "uploading extractor metadata")
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        self.end_message(resource)
Exemplo n.º 20
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get BIN file and metadata
        bin_file, terra_md_full = None, None
        for f in resource['local_paths']:
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                terra_md_full = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'flirIrCamera', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name()+' - '+timestamp)
        tiff_path = self.sensors.create_sensor_path(timestamp)
        png_path = tiff_path.replace(".tif", ".png")
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product
        self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
        remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
        terra_md_trim = get_terraref_metadata(all_dsmd)
        if updated_experiment is not None:
            terra_md_trim['experiment_metadata'] = updated_experiment
        terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        skipped_png = False
        if not file_exists(png_path) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating & uploading %s" % png_path)
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, png_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, png_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        if not file_exists(tiff_path) or self.overwrite:
            # Generate temperature matrix and perform actual processing
            self.log_info(resource, "creating & uploading %s" % tiff_path)
            gps_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape([480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data, terra_md_full) # get temperature
            create_geotiff(tc, gps_bounds, tiff_path, None, True, self.extractor_info, terra_md_full)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)
        # Only upload the newly generated file to Clowder if it isn't already in dataset
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, tiff_path, remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, tiff_path)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Exemplo n.º 21
0
lib_path = os.path.abspath(os.path.join('..', '..', 'scanner_3d'))
sys.path.append(lib_path)

from terrautils.metadata import get_terraref_metadata, clean_metadata
from terrautils.extractors import load_json_file
from scanner_3d.ply2las import generate_las_from_pdal, combine_east_west_las, geo_referencing_las, \
    geo_referencing_las_for_eachpoint_in_mac

test_id = '85f9c8c2-fa68-48a6-b63c-375daa438414'
path = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc', test_id)
dire = os.path.join(os.path.dirname(__file__), 'test_ply2las_doc')

all_dsmd = load_json_file(path + '_metadata.json')
cleanmetadata = clean_metadata(all_dsmd, "scanner3DTop")
terra_md = get_terraref_metadata(cleanmetadata, 'scanner3DTop')

in_east = '/data/' + test_id + '__Top-heading-east_0.ply'
in_west = '/data/' + test_id + '__Top-heading-west_0.ply'

pdal_base = "docker run -v %s:/data pdal/pdal:1.5 " % dire
tmp_east_las = "/data/east_temp.las"
tmp_west_las = "/data/west_temp.las"
merge_las = "/data/merged.las"
convert_las = dire + "/converted.las"
convert_pt_las = dire + "/converted_pts.las"


def test_east_las():
    generate_las_from_pdal(pdal_base, in_east, tmp_east_las)
    assert os.path.isfile(dire + '/east_temp.las')
Exemplo n.º 22
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message()

        # Get BIN file and metadata
        bin_file, metadata = None, None
        for f in resource['local_paths']:
            # First check metadata attached to dataset in Clowder for item of interest
            if f.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(f)
                metadata = get_terraref_metadata(all_dsmd, 'flirIrCamera')
            # Otherwise, check if metadata was uploaded as a .json file
            elif f.endswith('_ir.bin'):
                bin_file = f
        if None in [bin_file, metadata]:
            logging.getLogger(__name__).error(
                'could not find all both of ir.bin/metadata')
            return

        # Determine output directory
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        png_path = self.sensors.create_sensor_path(timestamp, ext='png')
        tiff_path = self.sensors.create_sensor_path(timestamp)
        uploaded_file_ids = []

        target_dsid = build_dataset_hierarchy(
            host,
            secret_key,
            self.clowder_user,
            self.clowder_pass,
            self.clowderspace,
            self.sensors.get_display_name(),
            timestamp[:4],
            timestamp[5:7],
            timestamp[8:10],
            leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

        skipped_png = False
        if not os.path.exists(png_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating %s" % png_path)
            # get raw data from bin file
            raw_data = numpy.fromfile(bin_file, numpy.dtype('<u2')).reshape(
                [480, 640]).astype('float')
            raw_data = numpy.rot90(raw_data, 3)
            create_image(raw_data, png_path, self.scale_values)
            # Only upload the newly generated file to Clowder if it isn't already in dataset
            if png_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, png_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(png_path)
        else:
            skipped_png = True

        if not os.path.exists(tiff_path) or self.overwrite:
            logging.getLogger(__name__).info("Generating temperature matrix")
            gps_bounds = geojson_to_tuples(
                metadata['spatial_metadata']['flirIrCamera']['bounding_box'])
            if skipped_png:
                raw_data = numpy.fromfile(bin_file,
                                          numpy.dtype('<u2')).reshape(
                                              [480, 640]).astype('float')
                raw_data = numpy.rot90(raw_data, 3)
            tc = getFlir.rawData_to_temperature(raw_data,
                                                metadata)  # get temperature

            logging.getLogger(__name__).info("Creating %s" % tiff_path)
            # Rename temporary tif after creation to avoid long path errors
            out_tmp_tiff = os.path.join(tempfile.gettempdir(),
                                        resource['id'].encode('utf8'))
            create_geotiff(tc, gps_bounds, out_tmp_tiff, None, True,
                           self.extractor_info, metadata)
            shutil.move(out_tmp_tiff, tiff_path)
            if tiff_path not in resource["local_paths"]:
                fileid = upload_to_dataset(connector, host, secret_key,
                                           target_dsid, tiff_path)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)
            self.created += 1
            self.bytes += os.path.getsize(tiff_path)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        metadata = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        upload_metadata(connector, host, secret_key, resource['id'], metadata)

        # Upload original Lemnatec metadata to new Level_1 dataset
        md = get_terraref_metadata(all_dsmd)
        md['raw_data_source'] = host + ("" if host.endswith("/") else
                                        "/") + "datasets/" + resource['id']
        lemna_md = build_metadata(host, self.extractor_info, target_dsid, md,
                                  'dataset')
        upload_metadata(connector, host, secret_key, target_dsid, lemna_md)

        self.end_message()
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                            opts=['left'])
        right_rgb_enh_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['right'])
        uploaded_file_ids = []

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_enh_tiff)
            EI = getEnhancedImage(img_left)
            create_geotiff(EI, left_bounds, left_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(left_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              left_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not file_exists(right_rgb_enh_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % right_rgb_enh_tiff)
            EI = getEnhancedImage(img_right)
            create_geotiff(EI, right_bounds, right_rgb_enh_tiff)
            self.created += 1
            self.bytes += os.path.getsize(right_rgb_enh_tiff)

        found_in_dest = check_file_in_dataset(connector,
                                              host,
                                              secret_key,
                                              target_dsid,
                                              right_rgb_enh_tiff,
                                              remove=self.overwrite)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_rgb_enh_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       right_rgb_enh_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        ext_meta = build_metadata(host, self.extractor_info, target_dsid,
                                  {"files_created": uploaded_file_ids},
                                  'dataset')
        self.log_info(resource, "uploading extractor metadata")
        remove_metadata(connector, host, secret_key, target_dsid,
                        self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, target_dsid, ext_meta)

        self.end_message(resource)
Exemplo n.º 24
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, terra_md_full = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.bin'):
                img_left = fname
            elif fname.endswith('_right.bin'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'stereoTop', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine output directory
        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                              season_name, experiment_name, self.sensors.get_display_name(),
                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)
        left_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        # Attach LemnaTec source metadata to Level_1 product if necessary
        target_md = download_metadata(connector, host, secret_key, target_dsid)
        if not get_extractor_metadata(target_md, self.extractor_info['name']):
            self.log_info(resource, "uploading LemnaTec metadata to ds [%s]" % target_dsid)
            remove_metadata(connector, host, secret_key, target_dsid, self.extractor_info['name'])
            terra_md_trim = get_terraref_metadata(all_dsmd)
            if updated_experiment is not None:
                terra_md_trim['experiment_metadata'] = updated_experiment
            terra_md_trim['raw_data_source'] = host + ("" if host.endswith("/") else "/") + "datasets/" + resource['id']
            level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
            upload_metadata(connector, host, secret_key, target_dsid, level1_md)

        try:
            left_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'left')
            gps_bounds_left = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
            right_shape = terraref.stereo_rgb.get_image_shape(terra_md_full, 'right')
            gps_bounds_right = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])
        except KeyError:
            self.log_error(resource, "spatial metadata not properly identified; sending to cleaner")
            submit_extraction(connector, host, secret_key, resource['id'], "terra.metadata.cleaner")
            return

        if (not file_exists(left_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % left_tiff)
            left_image = terraref.stereo_rgb.process_raw(left_shape, img_left, None)
            create_geotiff(left_image, gps_bounds_left, left_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, left_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if (not file_exists(right_tiff)) or self.overwrite:
            # Perform actual processing
            self.log_info(resource, "creating %s" % right_tiff)
            right_image = terraref.stereo_rgb.process_raw(right_shape, img_right, None)
            create_geotiff(right_image, gps_bounds_right, right_tiff, None, True,
                           self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(right_tiff)
        # Check if the file should be uploaded, even if it was already created
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % right_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, right_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Trigger additional extractors
        self.log_info(resource, "triggering downstream extractors")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.rgbmask")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.stereo-rgb.nrmac")
        submit_extraction(connector, host, secret_key, target_dsid, "terra.plotclipper_tif")

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            extractor_md = build_metadata(host, self.extractor_info, target_dsid, {
                "files_created": uploaded_file_ids
            }, 'dataset')
            self.log_info(resource, "uploading extractor metadata to raw dataset")
            remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
            try:
                upload_metadata(connector, host, secret_key, resource['id'], extractor_md)
            except:
                self.log_info(resource, "problem uploading extractor metadata...")

        self.end_message(resource)
Exemplo n.º 25
0
    def process_message(self, connector, host, secret_key, resource,
                        parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError(
                "could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']

        left_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                             opts=['left'])
        right_rgb_mask_tiff = self.sensors.create_sensor_path(timestamp,
                                                              opts=['right'])
        uploaded_file_ids = []
        right_ratio, left_ratio = 0, 0

        left_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(
            terra_md_full['spatial_metadata']['right']['bounding_box'])
        #qual_md = get_extractor_metadata(all_dsmd, "terra.stereo-rgb.nrmac")
        if (not file_exists(left_rgb_mask_tiff)) or self.overwrite:
            self.log_info(resource, "creating %s" % left_rgb_mask_tiff)

            #if qual_md and 'left_quality_score' in qual_md:
            #left_ratio, left_rgb = gen_cc_enhanced(img_left, quality_score=int(qual_md['left_quality_score']))
            left_ratio, left_rgb = gen_cc_enhanced(img_left)

            if left_ratio is not None and left_rgb is not None:
                # Bands must be reordered to avoid swapping R and B
                left_rgb = cv2.cvtColor(left_rgb, cv2.COLOR_BGR2RGB)
                create_geotiff(left_rgb, left_bounds, left_rgb_mask_tiff, None,
                               False, self.extractor_info, terra_md_full)
                compress_geotiff(left_rgb_mask_tiff)
                self.created += 1
                self.bytes += os.path.getsize(left_rgb_mask_tiff)
            else:
                # If the masked version was not generated, delete any old version as well
                self.log_info(
                    resource, "a faulty version exists; deleting %s" %
                    left_rgb_mask_tiff)
                os.remove(left_rgb_mask_tiff)

        found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                              target_dsid, left_rgb_mask_tiff)
        if not found_in_dest:
            self.log_info(resource, "uploading %s" % left_rgb_mask_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user,
                                       self.clowder_pass, target_dsid,
                                       left_rgb_mask_tiff)
            uploaded_file_ids.append(host +
                                     ("" if host.endswith("/") else "/") +
                                     "files/" + fileid)

        if not self.leftonly:
            if (not file_exists(right_rgb_mask_tiff)) or self.overwrite:

                right_ratio, right_rgb = gen_cc_enhanced(img_right)

                if right_ratio is not None and right_rgb is not None:
                    # Bands must be reordered to avoid swapping R and B
                    right_rgb = cv2.cvtColor(right_rgb, cv2.COLOR_BGR2RGB)
                    create_geotiff(right_rgb, right_bounds,
                                   right_rgb_mask_tiff, None, False,
                                   self.extractor_info, terra_md_full)
                    compress_geotiff(right_rgb_mask_tiff)
                    self.created += 1
                    self.bytes += os.path.getsize(right_rgb_mask_tiff)
                else:
                    # If the masked version was not generated, delete any old version as well
                    self.log_info(
                        resource, "a faulty version exists; deleting %s" %
                        right_rgb_mask_tiff)
                    os.remove(right_rgb_mask_tiff)

            found_in_dest = check_file_in_dataset(connector, host, secret_key,
                                                  target_dsid,
                                                  right_rgb_mask_tiff)
            if not found_in_dest:
                self.log_info(resource, "uploading %s" % right_rgb_mask_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user,
                                           self.clowder_pass, target_dsid,
                                           right_rgb_mask_tiff)
                uploaded_file_ids.append(host +
                                         ("" if host.endswith("/") else "/") +
                                         "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        if len(uploaded_file_ids) > 0:
            md = {
                "files_created": uploaded_file_ids,
                "left_mask_ratio": left_ratio
            }
            if not self.leftonly:
                md["right_mask_ratio"] = right_ratio
            extractor_md = build_metadata(host, self.extractor_info,
                                          target_dsid, md, 'dataset')
            self.log_info(resource,
                          "uploading extractor metadata to Lv1 dataset")
            remove_metadata(connector, host, secret_key, resource['id'],
                            self.extractor_info['name'])
            upload_metadata(connector, host, secret_key, resource['id'],
                            extractor_md)

        self.end_message(resource)
Exemplo n.º 26
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Load metadata from dataset
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd)
                if 'spatial_metadata' in terra_md_full:
                    spatial_meta = terra_md_full['spatial_metadata']
                else:
                    spatial_meta = None
        if not spatial_meta:
            ValueError("No spatial metadata found.")

        # Determine which files in dataset need clipping
        files_to_process = {}
        for f in resource['local_paths']:
            if f.startswith("ir_geotiff") and f.endswith(".tif"):
                sensor_name = "ir_geotiff"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta['flirIrCamera']['bounding_box']
                }

            elif f.startswith("rgb_geotiff") and f.endswith(".tif"):
                sensor_name = "rgb_geotiff"
                filename = os.path.basename(f)
                if f.endswith("_left.tif"): side = "left"
                else:                       side = "right"
                files_to_process[filename] = {
                    "path": f,
                    "bounds": spatial_meta[side]['bounding_box']
                }

            elif f.endswith(".las"):
                sensor_name = "laser3d_las"
                filename = os.path.basename(f)
                files_to_process[filename] = {
                    "path": f,
                    "bounds": get_las_extents(f)
                }

            # TODO: Add case for laser3d heightmap

        # Fetch experiment name from terra metadata
        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        season_name, experiment_name, updated_experiment = get_season_and_experiment(timestamp, 'plotclipper', terra_md_full)
        if None in [season_name, experiment_name]:
            raise ValueError("season and experiment could not be determined")

        # Determine script name
        target_scan = "unknown_scan"
        if 'gantry_variable_metadata' in terra_md_full:
            if 'script_name' in terra_md_full['gantry_variable_metadata']:
                target_scan = terra_md_full['gantry_variable_metadata']['script_name']
                if 'script_hash' in terra_md_full['gantry_variable_metadata']:
                    target_scan += ' '+terra_md_full['gantry_variable_metadata']['script_hash']

        all_plots = get_site_boundaries(timestamp.split("__")[0], city='Maricopa')
        uploaded_file_ids = []

        for filename in files_to_process:
            file_path = files_to_process[filename]["path"]
            file_bounds = files_to_process[filename]["bounds"]

            overlap_plots = find_plots_intersect_boundingbox(file_bounds, all_plots, fullmac=True)

            if len(overlap_plots) > 0:
                self.log_info(resource, "Attempting to clip %s into %s plot shards" % (filename, len(overlap_plots)))
                for plotname in overlap_plots:
                    plot_bounds = overlap_plots[plotname]
                    tuples = geojson_to_tuples_betydb(yaml.safe_load(plot_bounds))

                    plot_display_name = self.sensors.get_display_name(sensor=sensor_name) + " (By Plot)"
                    leaf_dataset = plot_display_name + ' - ' + plotname + " - " + timestamp.split("__")[0]
                    self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name,
                                                                                             timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_dataset))
                    target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
                                                                season_name, experiment_name, plot_display_name,
                                                                timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=leaf_dataset)

                    out_file = self.sensors.create_sensor_path(timestamp, plot=plotname, subsensor=sensor_name, filename=filename)
                    if not os.path.exists(os.path.dirname(out_file)):
                        os.makedirs(os.path.dirname(out_file))

                    if filename.endswith(".tif") and (not file_exists(out_file) or self.overwrite):
                        """If file is a geoTIFF, simply clip it and upload it to Clowder"""
                        clip_raster(file_path, tuples, out_path=out_file)

                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                    elif filename.endswith(".las"):
                        """If file is LAS, we can merge with any existing scan+plot output safely"""
                        merged_out = os.path.join(os.path.dirname(out_file), target_scan+"_merged.las")
                        merged_txt = merged_out.replace(".las", "_contents.txt")

                        already_merged = False
                        if os.path.exists(merged_txt):
                            # Check if contents
                            with open(merged_txt, 'r') as contents:
                                for entry in contents.readlines():
                                    if entry.strip() == file_path:
                                        already_merged = True
                                        break
                        if not already_merged:
                            clip_las(file_path, tuples, out_path=out_file, merged_path=merged_out)
                            with open(merged_txt, 'a') as contents:
                                contents.write(file_path+"\n")

                        # Upload the individual plot shards for optimizing las2height later
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, out_file, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, out_file)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(out_file)

                        # Upload the merged result if necessary
                        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, merged_out, remove=self.overwrite)
                        if not found_in_dest or self.overwrite:
                            fileid = upload_to_dataset(connector, host, secret_key, target_dsid, merged_out)
                            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)
                        self.created += 1
                        self.bytes += os.path.getsize(merged_out)

                        # Trigger las2height extractor
                        submit_extraction(connector, host, secret_key, target_dsid, "terra.3dscanner.las2height")


        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], {
            "files_created": uploaded_file_ids
        }, 'dataset')
        self.log_info(resource, "uploading extractor metadata to Level_1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)
Exemplo n.º 27
0
    def process_message(self, connector, host, secret_key, resource, parameters):
        self.start_message(resource)

        # Get left/right files and metadata
        img_left, img_right, metadata = None, None, None
        for fname in resource['local_paths']:
            if fname.endswith('_dataset_metadata.json'):
                all_dsmd = load_json_file(fname)
                terra_md_full = get_terraref_metadata(all_dsmd, 'stereoTop')
            elif fname.endswith('_left.tif'):
                img_left = fname
            elif fname.endswith('_right.tif'):
                img_right = fname
        if None in [img_left, img_right, terra_md_full]:
            raise ValueError("could not locate all files & metadata in processing")

        timestamp = resource['dataset_info']['name'].split(" - ")[1]
        target_dsid = resource['id']
        left_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['left'])
        right_nrmac_tiff = self.sensors.create_sensor_path(timestamp, opts=['right'])
        uploaded_file_ids = []

        self.log_info(resource, "determining image quality")
        left_qual = getImageQuality(img_left)
        if not self.leftonly:
            right_qual = getImageQuality(img_right)

        left_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['left']['bounding_box'])
        right_bounds = geojson_to_tuples(terra_md_full['spatial_metadata']['right']['bounding_box'])

        if not file_exists(left_nrmac_tiff) or self.overwrite:
            self.log_info(resource, "creating %s" % left_nrmac_tiff)
            create_geotiff(np.array([[left_qual, left_qual],[left_qual, left_qual]]), left_bounds,
                           left_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
            self.created += 1
            self.bytes += os.path.getsize(left_nrmac_tiff)
        found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, left_nrmac_tiff,
                                              remove=self.overwrite)
        if not found_in_dest or self.overwrite:
            self.log_info(resource, "uploading %s" % left_nrmac_tiff)
            fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                       left_nrmac_tiff)
            uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)


        if not self.leftonly:
            if (not file_exists(right_nrmac_tiff) or self.overwrite):
                self.log_info(resource, "creating %s" % right_nrmac_tiff)
                create_geotiff(np.array([[right_qual, right_qual],[right_qual, right_qual]]), right_bounds,
                               right_nrmac_tiff, None, True, self.extractor_info, terra_md_full, compress=True)
                self.created += 1
                self.bytes += os.path.getsize(right_nrmac_tiff)
            found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, right_nrmac_tiff,
                                                  remove=self.overwrite)
            if not found_in_dest or self.overwrite:
                self.log_info(resource, "uploading %s" % right_nrmac_tiff)
                fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid,
                                           right_nrmac_tiff)
                uploaded_file_ids.append(host + ("" if host.endswith("/") else "/") + "files/" + fileid)

        # Tell Clowder this is completed so subsequent file updates don't daisy-chain
        md = {
            "files_created": uploaded_file_ids,
            "left_quality_score": left_qual
        }
        if not self.leftonly:
            md["right_quality_score"] = right_qual
        extractor_md = build_metadata(host, self.extractor_info, resource['id'], md, 'file')
        self.log_info(resource, "uploading extractor metadata to Lv1 dataset")
        remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name'])
        upload_metadata(connector, host, secret_key, resource['id'], extractor_md)

        self.end_message(resource)