Пример #1
0
    def submit_rulecheck(sensor_name, target, date):
        sensordef = count_defs[sensor_name]
        targetdef = sensordef[target]
        submitted = []

        s = Sensors("", "ua-mac")

        if "parent" in targetdef:
            target_dir = os.path.join(sensordef[targetdef["parent"]]["path"], date)
            target_timestamps = os.listdir(target_dir)

            disp_name = s.get_display_name(targetdef["parent"])

            for ts in target_timestamps:
                if ts.find("-") > -1 and ts.find("__") > -1: # TODO: and os.listdir(os.path.join(target_dir, ts)):
                    # Get first populated timestamp for the date that has a Clowder ID
                    dataset_name = disp_name+" - "+ts
                    raw_dsid = get_dsid_by_name(dataset_name)
                    if raw_dsid:
                        # Submit associated Clowder ID to rulechecker
                        submit_extraction(CONN, CLOWDER_HOST, CLOWDER_KEY, raw_dsid, "ncsa.rulechecker.terra")
                        submitted.append({"name": dataset_name, "id": raw_dsid})
                        break

        return json.dumps({
            "extractor": "ncsa.rulechecker.terra",
            "datasets submitted": submitted
        })
Пример #2
0
    def check_message_individual(self, connector, host, secret_key, resource,
                                 parameters):
        """This is deprecated method that operates on single capture, not field mosaic"""
        ds_md = get_info(connector, host, secret_key, resource['parent']['id'])

        s = Sensors('', 'ua-mac', 'rgb_geotiff')
        if ds_md['name'].find(s.get_display_name()) > -1:
            timestamp = ds_md['name'].split(" - ")[1]
            side = 'left' if resource['name'].find("_left") > -1 else 'right'
            out_csv = self.sensors.get_sensor_path(timestamp,
                                                   opts=[side],
                                                   ext='csv')

            if not os.path.exists(out_csv) or self.overwrite:
                return CheckMessage.download
            else:
                logging.info("output file already exists; skipping %s" %
                             resource['id'])

        return CheckMessage.ignore
Пример #3
0
def fullFieldMosaicStitcher(extractor, connector, host, secret_key, resource, rulemap):
    results = {}
    full_field_ready = False

    # full-field queues must have at least this percent of the raw datasets present to trigger
    tolerance_pct = 100
    # full-field queues must have at least this many datasets to trigger
    min_datasets = 200

    # Determine output dataset
    dsname = resource["dataset_info"]["name"]
    sensor = dsname.split(" - ")[0]

    # Map sensor display names to the GeoTIFF stitching target in those sensor datasets,
    # including directory to look for date subfolder to count # of datasets on that date
    if os.path.exists('/projects/arpae/terraref/sites'):
        TERRAREF_BASE = '/projects/arpae/terraref/sites'
    elif os.path.exists('/home/clowder/sites'):
        TERRAREF_BASE = '/home/clowder/sites'
    else:
        TERRAREF_BASE = '/home/extractor/sites'

    sensor_lookup = Sensors(TERRAREF_BASE, 'ua-mac')
    stitchable_sensors = {
        sensor_lookup.get_display_name('rgb_geotiff'): {
            "target": "_left.tif",
            "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='stereoTop').split("/")[:-2]))
        },
        sensor_lookup.get_display_name('ir_geotiff'): {
            "target": ".tif",
            "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='flirIrCamera').split("/")[:-2]))
        },
        sensor_lookup.get_display_name('laser3d_heightmap'): {
            "target": "_west.tif",
            "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='scanner3DTop').split("/")[:-2]))
        },
        'scanner3DTop': {
            "target": "_west.tif",
            "raw_dir": os.path.join(*(sensor_lookup.get_sensor_path('', sensor='scanner3DTop').split("/")[:-2]))
        }
    }

    if sensor in stitchable_sensors.keys():
        timestamp = dsname.split(" - ")[1]
        date = timestamp.split("__")[0]
        progress_key = "Full Field -- " + sensor + " - " + date

        # Is there actually a new left geoTIFF to add to the stack?
        target_id = None
        for f in resource['files']:
            if f['filename'].endswith(stitchable_sensors[sensor]["target"]):
                target_id = f['id']
                target_path = f['filepath']
        if not target_id:
            # If not, no need to trigger anything for now.
            logging.info("no target geoTIFF found in %s" % dsname)
            for trig_extractor in rulemap["extractors"]:
                results[trig_extractor] = {
                    "process": False,
                    "parameters": {}
                }
            return results

        logging.info("[%s] found target: %s" % (progress_key, target_id))

        # Fetch all existing file IDs that would be fed into this field mosaic
        progress = rule_utils.retrieveProgressFromDB(progress_key)

        # Is current ID already included in the list? If not, add it
        submit_record = False
        if 'ids' in progress:
            ds_count = len(progress['ids'].keys())
            if target_id not in progress['ids'].keys():
                submit_record = True
                ds_count += 1
            else:
                # Already seen this geoTIFF, so skip for now.
                logging.info("previously logged target geoTIFF from %s" % dsname)
                for trig_extractor in rulemap["extractors"]:
                    results[trig_extractor] = {
                        "process": False,
                        "parameters": {}
                    }
        else:
            submit_record = True
            ds_count = 1

        if submit_record:
            for trig_extractor in rulemap["extractors"]:
                rule_utils.submitProgressToDB("fullFieldMosaicStitcher", trig_extractor, progress_key, target_id, target_path)

        if ds_count >= min_datasets:
            # Check to see if list of geotiffs is same length as list of raw datasets
            root_dir = stitchable_sensors[sensor]["raw_dir"]
            if len(connector.mounted_paths) > 0:
                for source_path in connector.mounted_paths:
                    if root_dir.startswith(source_path):
                        root_dir = root_dir.replace(source_path, connector.mounted_paths[source_path])
            date_directory = os.path.join(root_dir, date)
            date_directory = ("/"+date_directory if not date_directory.startswith("/") else "")

            raw_file_count = float(subprocess.check_output("ls %s | wc -l" % date_directory,
                                                           shell=True).strip())
            logging.info("found %s raw files in %s" % (int(raw_file_count), date_directory))

            if raw_file_count == 0:
                raise Exception("problem communicating with file system")
            else:
                # If we have enough raw files accounted for and more than min_datasets, trigger
                prog_pct = (len(progress['ids'])/raw_file_count)*100
                if prog_pct >= tolerance_pct:
                    full_field_ready = True
                else:
                    logging.info("found %s/%s necessary geotiffs (%s%%)" % (len(progress['ids']), int(raw_file_count),
                                                                            "{0:.2f}".format(prog_pct)))
        for trig_extractor in rulemap["extractors"]:
            results[trig_extractor] = {
                "process": full_field_ready,
                "parameters": {}
            }
            if full_field_ready:
                results[trig_extractor]["parameters"]["output_dataset"] = "Full Field - "+date

                # Write output ID list to a text file
                output_dir = os.path.dirname(sensor_lookup.get_sensor_path(date, 'fullfield'))
                logging.info("writing %s_file_ids.json to %s" % (sensor, output_dir))
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                output_file = os.path.join(output_dir, sensor+"_file_paths.json")

                # Sort IDs by file path before writing to disk
                # TODO: Eventually alternate every other image so we have half complete and half "underneath"
                paths = []
                for fid in progress['ids'].keys():
                    paths.append(progress['ids'][fid])
                with open(output_file, 'w') as out:
                    json.dump(sorted(paths), out)
                results[trig_extractor]["parameters"]["file_paths"] = output_file

    else:
        for trig_extractor in rulemap["extractors"]:
            results[trig_extractor] = {
                "process": False,
                "parameters": {}
            }

    return results
Пример #4
0
class Transformer():
    """Generic class for supporting transformers
    """
    def __init__(self, **kwargs):
        """Performs initialization of class instance
        Arguments:
            kwargs: additional parameters passed in to Transformer
        """
        # pylint: disable=unused-argument
        self.sensor = None
        self.args = None

    @property
    def default_epsg(self):
        """Returns the default EPSG code that utilities expect
        """
        return 4326

    @property
    def sensor_name(self):
        """Returns the name of the sensor we represent
        """
        return configuration.TRANSFORMER_SENSOR

    @property
    def supported_image_file_exts(self):
        """Returns the list of supported image file extension strings (in lower case)
        """
        return ['tif', 'tiff', 'jpg']

    def get_image_file_epsg(self, source_path: str) -> str:
        """Returns the EPSG of the georeferenced image file
        Arguments:
            source_path: the path to the image to load the EPSG code from
        Return:
            Returns the EPSG code loaded from the file. None is returned if there is a problem or the file
            doesn't have an EPSG code
        """
        # pylint: disable=no-self-use
        return tr_get_epsg(source_path)

    def get_image_file_geobounds(self, source_path: str) -> list:
        """Uses gdal functionality to retrieve rectilinear boundaries from the file
        Args:
            source_path(str): path of the file to get the boundaries from
        Returns:
            The upper-left and calculated lower-right boundaries of the image in a list upon success.
            The values are returned in following order: min_y, max_y, min_x, max_x. A list of numpy.nan
            is returned if the boundaries can't be determined
        """
        # pylint: disable=no-self-use
        return tr_image_get_geobounds(source_path)

    def generate_transformer_md(self) -> dict:
        """Generates metadata about this transformer
        Returns:
            Returns the transformer metadata
        """
        # pylint: disable=no-self-use
        return {
            'version': configuration.TRANSFORMER_VERSION,
            'name': configuration.TRANSFORMER_NAME,
            'author': configuration.AUTHOR_NAME,
            'description': configuration.TRANSFORMER_DESCRIPTION,
            'repository': {
                'repUrl': configuration.REPOSITORY
            }
        }

    def add_parameters(self, parser: argparse.ArgumentParser) -> None:
        """Adds processing parameters to existing parameters
        Arguments:
            parser: instance of argparse
        """
        # pylint: disable=no-self-use
        parser.add_argument(
            '--logging',
            '-l',
            nargs='?',
            default=os.getenv("LOGGING"),
            help='file or url or logging configuration (default=None)')

        parser.epilog = configuration.TRANSFORMER_NAME + ' version ' + configuration.TRANSFORMER_VERSION + \
                        ' author ' + configuration.AUTHOR_NAME + ' ' + configuration.AUTHOR_EMAIL

    def get_transformer_params(self, args: argparse.Namespace,
                               metadata_list: list) -> dict:
        """Returns a parameter list for processing data
        Arguments:
            args: result of calling argparse.parse_args
            metadata: the loaded metadata
        """
        # pylint: disable=no-self-use
        # Setup logging
        pyc_setup_logging(args.logging)

        self.args = args

        # Determine if we're using JSONLD (which we should be)
        metadata = metadata_list[0]
        if 'content' in metadata:
            parse_md = metadata['content']
        else:
            parse_md = metadata

        terraref_md = tr_get_terraref_metadata(
            parse_md, configuration.TRANSFORMER_SENSOR)
        if not terraref_md:
            return {'code': -5001, 'error': "Unable to load Gantry information from metadata for '%s'" % \
                                                                                    configuration.TRANSFORMER_TYPE}

        timestamp = __internal__.get_metadata_timestamp(parse_md)
        if not timestamp:
            return {'code': -5002, 'error': "Unable to locate timestamp in metadata for '%s'" % \
                                                                                    configuration.TRANSFORMER_TYPE}

        # Fetch experiment name from terra metadata
        season_name, experiment_name, updated_experiment = \
                                    tr_get_season_and_experiment(__internal__.get_datestamp(timestamp),
                                                                 configuration.TRANSFORMER_TYPE, terraref_md)

        # Setup our sensor
        self.sensor = Sensors(base='',
                              station='ua-mac',
                              sensor=configuration.TRANSFORMER_SENSOR)
        leaf_name = self.sensor.get_display_name()

        # Get our trimmed metadata
        terraref_md_trim = tr_get_terraref_metadata(parse_md)
        if updated_experiment is not None:
            terraref_md_trim['experiment_metadata'] = updated_experiment

        # Get the list of files, if there are some
        file_list = []
        if args.file_list:
            for one_file in args.file_list:
                # Filter out arguments that are obviously not files
                if not one_file.startswith('-'):
                    file_list.append(one_file)

        # Prepare our parameters
        check_md = {
            'timestamp': timestamp,
            'season': season_name,
            'experiment': experiment_name,
            'container_name': None,
            'target_container_name': leaf_name,  # TODO: Is this needed?
            'trigger_name': None,
            'context_md': terraref_md_trim,
            'working_folder': args.working_space,
            'list_files': lambda: file_list
        }

        return {
            'check_md':
            check_md,
            'transformer_md':
            tr_get_extractor_metadata(terraref_md,
                                      configuration.TRANSFORMER_NAME),
            'full_md': [parse_md]
        }
Пример #5
0
def bin2tif(filename: str, metadata: str, working_space: str) -> dict:
    """Converts the bin file to a geotiff file
    ArgumentsL
        filename: the path to the .bin file
        metadata: the path to the cleaned metadata file
        working_space: the path to our working space
    """
    result = {}

    loaded_json = do_load_json_file(metadata)
    if not loaded_json:
        msg = "Unable to load JSON from file '%s'" % metadata
        logging.error(msg)
        logging.error("    JSON may be missing or invalid. Returning an error")
        result['error'] = {'message': msg}
        result['code'] = -1
        return result

    if 'content' in loaded_json:
        parse_json = loaded_json['content']
    else:
        parse_json = loaded_json
    terra_md_full = do_get_terraref_metadata(parse_json, EXTRACTOR_NAME)
    if not terra_md_full:
        msg = "Unable to find %s metadata in JSON file '%s'" % (EXTRACTOR_NAME,
                                                                metadata)
        logging.error(msg)
        logging.error("    JSON may be missing or invalid. Returning an error")
        result['error'] = {'message': msg}
        result['code'] = -2
        return result

    timestamp = get_metadata_timestamp(terra_md_full)
    if not timestamp:
        msg = "Unable to find timestamp in JSON file '%s'" % filename
        logging.error(msg)
        logging.error("    JSON may be missing or invalid. Returning an error")
        result['error'] = {'message': msg}
        result['code'] = -3
        return result

        # Fetch experiment name from terra metadata
    _, _, updated_experiment = do_get_season_and_experiment(
        timestamp, 'stereoTop', terra_md_full)
    #        if None in [season_name, experiment_name]:
    #            raise ValueError("season and experiment could not be determined")
    #
    #        # Determine output directory
    #        self.log_info(resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(),
    #                                                                                 timestamp[:4], timestamp[5:7], timestamp[8:10], timestamp))
    #        target_dsid = build_dataset_hierarchy_crawl(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace,
    #                                              season_name, experiment_name, self.sensors.get_display_name(),
    #                                              timestamp[:4], timestamp[5:7], timestamp[8:10],
    #                                              leaf_ds_name=self.sensors.get_display_name() + ' - ' + timestamp)

    sensor = Sensors(base='', station='ua-mac', sensor='rgb_geotiff')
    leaf_name = sensor.get_display_name()

    bin_type = 'left' if filename.endswith(
        '_left.bin') else 'right' if filename.endswith('_right.bin') else None
    if not bin_type:
        msg = "Bin file must be a left or right file: '%s'" % filename
        logging.error(msg)
        logging.error("    Returning an error")
        result['error'] = {'message': msg}
        result['code'] = -4
        return result

    terra_md_trim = do_get_terraref_metadata(parse_json)
    if updated_experiment is not None:
        terra_md_trim['experiment_metadata'] = updated_experiment
    terra_md_trim['raw_data_source'] = filename

    tiff_filename = os.path.splitext(os.path.basename(filename))[0] + '.tif'
    tiff_path = os.path.join(working_space, tiff_filename)

    try:
        bin_shape = terraref.stereo_rgb.get_image_shape(
            terra_md_full, bin_type)
        gps_bounds_bin = do_geojson_to_tuples(
            terra_md_full['spatial_metadata'][bin_type]['bounding_box'])
    except KeyError:
        msg = "Spatial metadata is not properly identified. Unable to continue"
        logging.error(msg)
        logging.error("    Returning an error")
        result['error'] = {'message': msg}
        result['code'] = -5
        return result

    # Extractor info
    extractor_info = {
        'name':
        EXTRACTOR_NAME,
        'version':
        EXTRCTOR_VERSION,
        'author':
        "*****@*****.**",
        'description':
        "Maricopa agricultural gantry bin to geotiff converter",
        'repository': [{
            "repType":
            "git",
            "repUrl":
            "https://github.com/terraref/extractors-stereo-rgb.git"
        }]
    }

    # Perform actual processing
    new_image = terraref.stereo_rgb.process_raw(bin_shape, filename, None)
    do_create_geotiff(new_image,
                      gps_bounds_bin,
                      tiff_path,
                      None,
                      True,
                      extractor_info,
                      terra_md_full,
                      compress=True)

    #        level1_md = build_metadata(host, self.extractor_info, target_dsid, terra_md_trim, 'dataset')
    context = ['https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld']
    terra_md_trim['extractor_version'] = EXTRCTOR_VERSION
    new_md = {
        '@context': context,
        'content': terra_md_trim,
        'filename': tiff_filename,
        'agent': {
            '@type': 'cat:extractor',
            'version': EXTRCTOR_VERSION,
            'name': EXTRACTOR_NAME
        }
    }

    # Setup the result
    result['container'] = [{
        'name': leaf_name,
        'exists': False,
        'metadata': {
            'replace': True,
            'data': new_md
        },
        'file': [{
            'path': tiff_path,
            'key': sensor.sensor
        }]
    }]
    result['code'] = 0
    return result
Пример #6
0
    def perform_uploads(self, connector, host, secret_key, resource,
                        default_dsid, content, season_name, experiment_name,
                        timestamp):
        """Perform the uploading of all the files we're put onto the upload list

        Args:
            connector(obj): the message queue connector instance
            host(str): the URI of the host making the connection
            secret_key(str): used with the host API
            default_dsid(str): the default dataset to load files to
            content(str): content information for the files we're uploading
            season_name(str): the name of the season
            experiment_name(str): the name of the experiment
            timestamp(str): the timestamp string associated with the source dataset

        Notes:
            We loop through the files, compressing, and remapping the names as needed.
            If the sensor associated with the file is missing, we upload the file to
            the default dataset. Otherwise, we use the dataset associated with the sensor
            and create the dataset if necessary
        """
        for one_file in self.files_to_upload:
            sourcefile = os.path.join(one_file["source_path"],
                                      one_file["source_name"])

            # Make sure we have the original file and then compress it if needed, or remane is
            if os.path.isfile(sourcefile):
                # make sure we have the full destination path
                if not os.path.exists(one_file["dest_path"]):
                    os.makedirs(one_file["dest_path"])

                resultfile = os.path.join(one_file["dest_path"],
                                          one_file["dest_name"])
                if one_file["compress"]:
                    resultfile = resultfile + ".zip"
                    with open(sourcefile, 'rb') as f_in:
                        with gzip.open(resultfile, 'wb') as f_out:
                            shutil.copyfileobj(f_in, f_out)
                elif not sourcefile == resultfile:
                    shutil.move(sourcefile, resultfile)

                # Find or create the target dataset for this entry if it doesn't exist
                cur_dataset_id = default_dsid
                if "sensor" in one_file:
                    sensor_type = one_file["sensor"]
                    if sensor_type in self.sensor_dsid_map:
                        cur_dataset_id = self.sensor_dsid_map[sensor_type]
                    else:
                        new_sensor = Sensors(base=self.sensors.base,
                                             station=self.sensors.station,
                                             sensor=sensor_type)

                        sensor_leaf_name = new_sensor.get_display_name(
                        ) + ' - ' + timestamp
                        ds_exists = get_datasetid_by_name(
                            host, secret_key, sensor_leaf_name)
                        new_dsid = build_dataset_hierarchy_crawl(
                            host,
                            secret_key,
                            self.clowder_user,
                            self.clowder_pass,
                            self.clowderspace,
                            season_name,
                            experiment_name,
                            new_sensor.get_display_name(),
                            timestamp[:4],
                            timestamp[5:7],
                            timestamp[8:10],
                            leaf_ds_name=sensor_leaf_name)

                        if (self.overwrite_ok
                                or not ds_exists) and self.experiment_metadata:
                            self.update_dataset_extractor_metadata(
                                connector, host, secret_key, new_dsid,
                                prepare_pipeline_metadata(
                                    self.experiment_metadata),
                                self.extractor_info['name'])

                        self.sensor_dsid_map[sensor_type] = new_dsid
                        cur_dataset_id = new_dsid

                # Check if file already exists in the dataset
                file_in_dataset = check_file_in_dataset(connector,
                                                        host,
                                                        secret_key,
                                                        cur_dataset_id,
                                                        resultfile,
                                                        remove=False)

                # If the files is already in the dataset, determine if we need to delete it first
                if self.overwrite_ok and file_in_dataset:
                    # Delete the file from the dataset before uploading the new copy
                    self.log_info(
                        resource,
                        "Removing existing file in dataset " + resultfile)
                    check_file_in_dataset(connector,
                                          host,
                                          secret_key,
                                          cur_dataset_id,
                                          resultfile,
                                          remove=True)
                elif not self.overwrite_ok and file_in_dataset:
                    # We won't overwrite an existing file
                    self.log_skip(
                        resource, "Not overwriting existing file in dataset " +
                        resultfile)
                    continue

                # Upload the file to the dataset
                fid = upload_to_dataset(connector, host, self.clowder_user,
                                        self.clowder_pass, cur_dataset_id,
                                        resultfile)

                # Generate our metadata
                meta = build_metadata(host, self.extractor_info, fid, content,
                                      'file')

                # Upload the meadata to the dataset
                upload_metadata(connector, host, secret_key, fid, meta)

                self.created += 1
                self.bytes += os.path.getsize(resultfile)
            else:
                raise Exception("%s was not found" % sourcefile)