def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) with open(resource['local_paths'][0], 'r') as inputcsv: inputlines = inputcsv.readlines() if len(inputlines) <= 1: # first check if there is data besides header line self.log_info(resource, "no trait lines found in CSV; skipping upload") else: # submit CSV to BETY self.log_info( resource, "found %s trait lines; submitting CSV to bety" % str(len(inputlines) - 1)) submit_traits(resource['local_paths'][0], betykey=self.bety_key) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], {}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) successful_plots = 0 with open(resource['local_paths'][0], 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = { "source": row['source'], "value": row['value'] } trait = row['trait'] create_datapoint_with_dependencies(connector, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata (%s)" % resource['id']) ext_meta = build_metadata(host, self.extractor_info, resource['id'], { "plots_processed": successful_plots, }, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def upload_to_geostreams(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) successful_plots = 0 with open(file, 'rb') as csvfile: reader = csv.DictReader(csvfile) for row in reader: centroid_lonlat = [row['lon'], row['lat']] time_fmt = row['dp_time'] timestamp = row['timestamp'] dpmetadata = {"source": row['source'], "value": row['value']} trait = row['trait'] create_datapoint_with_dependencies( conn, host, secret_key, trait, (centroid_lonlat[1], centroid_lonlat[0]), time_fmt, time_fmt, dpmetadata, timestamp) successful_plots += 1 # Extractor metadata extractor_info = { "extractor_name": "terra.geostreams", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "Geostreams CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata(host, extractor_info, clowder_id, { "plots_processed": successful_plots, }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) f = resource['local_paths'][0] self.log_info(resource, "determining image quality") qual = getImageQuality(f) self.log_info(resource, "creating output image") md = download_ds_metadata(connector, host, secret_key, resource['parent']['id']) terramd = get_terraref_metadata(md) if "left" in f: bounds = geojson_to_tuples( terramd['spatial_metadata']['left']['bounding_box']) else: bounds = geojson_to_tuples( terramd['spatial_metadata']['right']['bounding_box']) output = f.replace(".tif", "_nrmac.tif") create_geotiff(np.array([[qual, qual], [qual, qual]]), bounds, output) upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], output) # Tell Clowder this is completed so subsequent file updates don't daisy-chain ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"quality_score": qual}, 'file') self.log_info(resource, "uploading extractor metadata") upload_metadata(connector, host, secret_key, resource['id'], ext_meta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() # Put files alongside .nc file out_dir = os.path.dirname(resource['local_paths'][0]) out_fname_root = resource['name'].replace('.nc', '') metaFilePath = os.path.join(out_dir, out_fname_root + '_metadata.cdl') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in cdl format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--cdl', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) metaFilePath = os.path.join(out_dir, out_fname_root + '._metadataxml') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in xml format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--xml', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) metaFilePath = os.path.join(out_dir, out_fname_root + '._metadata.json') if not os.path.isfile(metaFilePath) or self.overwrite: logging.info('...extracting metadata in json format: %s' % metaFilePath) with open(metaFilePath, 'w') as fmeta: subprocess.call( ['ncks', '--jsn', '-m', '-M', resource['local_paths'][0]], stdout=fmeta) self.created += 1 self.bytes += os.path.getsize(metaFilePath) upload_to_dataset(connector, host, secret_key, resource['parent']['id'], metaFilePath) # Add json metadata to original netCDF file with open(metaFilePath, 'r') as metajson: metadata = build_metadata(host, self.extractor_info, resource['id'], json.load(metajson), 'dataset') upload_metadata(connector, host, secret_key, resource['parent']['id'], metadata) self.end_message()
def upload_to_bety(file, clowder_id): conn = Connector( None, mounted_paths={"/home/clowder/sites": "/home/clowder/sites"}) submit_traits(file, betykey=bety_key) # Extractor metadata extractor_info = { "extractor_name": "terra.betydb", "extractor_version": "1.0", "extractor_author": "Max Burnette <*****@*****.**>", "extractor_description": "BETYdb CSV uploader", "extractor_repo": "https://github.com/terraref/computing-pipeline.git" } # Add metadata to original dataset indicating this was run ext_meta = build_metadata( host, extractor_info, clowder_id, { "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/v1/variables?name=canopy_cover" }, 'file') upload_metadata(conn, host, secret_key, clowder_id, ext_meta)
def process_message(self, connector, host, secret_key, resource, parameters): logger = logging.getLogger(__name__) inputfile = resource["local_paths"][0] file_id = resource['id'] if self._validate(inputfile): # set tags tags = {'tags': ['STNeeded', 'CKANNeeded']} rtags = {'tags': ['ValidationNeeded', 'ValidationFailed']} # set metadata metadata = self._make_metadata(inputfile) metadata = self.get_metadata(metadata, 'file', file_id, host) try: files.upload_metadata(connector, host, secret_key, file_id, metadata) except BaseException: return else: tags = {'tags': ['ValidationFailed']} rtags = {'tags': ['ValidationNeeded']} logger.debug('adding tags={}'.format(tags)) files.upload_tags(connector, host, secret_key, file_id, tags) if rtags: logger.debug('removing tags={}'.format(rtags)) headers = {'Content-Type': 'application/json'} url = '{}api/files/{}/tags?key={}'.format(host, file_id, secret_key) connector.delete( url, headers=headers, data=json.dumps(rtags), verify=connector.ssl_verify if connector else True)
def process_message(self, connector, host, secret_key, resource, parameters): inputfile = resource["local_paths"][0] file_id = resource['id'] metadata = self.upload(inputfile) self.logger.debug(metadata) if metadata: metadata = self.get_metadata(metadata, 'file', file_id, host) self.logger.debug(metadata) # upload metadata files.upload_metadata(connector, host, secret_key, file_id, metadata) # set tags tags = {'tags': ['SensorThings']} files.upload_tags(connector, host, secret_key, file_id, tags) connector.status_update(StatusMessage.processing, {"type": "file", "id": file_id}, "Deleting file tags.") # delete tags headers = {'Content-Type': 'application/json'} url = '{}api/files/{}/tags?key={}'.format(host, file_id, secret_key) tags = {'tags': ['STNeeded']} connector.delete(url, headers=headers, data=json.dumps(tags), verify=connector.ssl_verify)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # rulechecker provided some key information for us in parameters if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] # Input path will suggest which sensor we are seeing sensor_name, sensor_lookup = None, None for f in resource['files']: if f['filepath'].find("rgb_geotiff") > -1: sensor_name = "stereoTop" sensor_lookup = "rgb_fullfield" elif f['filepath'].find("ir_geotiff") > -1: sensor_name = "flirIrCamera" sensor_lookup = "ir_fullfield" elif f['filepath'].find("laser3d_heightmap") > -1: sensor_name = "scanner3DTop" sensor_lookup = "laser3d_fullfield" if sensor_lookup is not None: break # Fetch experiment name from terra metadata season_name, experiment_name, updated_experiment = get_season_and_experiment( timestamp, sensor_name, {}) if None in [season_name, experiment_name]: raise ValueError("season and experiment could not be determined") # Determine output file paths out_tif_full = self.sensors.create_sensor_path( timestamp, sensor=sensor_lookup, opts=[scan_name]).replace(" ", "_") out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_tif_medium = out_tif_full.replace(".tif", "_10pct.tif") out_png = out_tif_full.replace(".tif", ".png") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) # TODO: Check for L1 version of VRT and _thumb and if the JSON contents match, copy instead of regenerating # If outputs already exist, we don't need to do anything else found_all = True if self.thumb: output_files = [out_vrt, out_tif_thumb] else: output_files = [out_tif_full, out_tif_medium, out_png] for output_file in output_files: if not file_exists(output_file): found_all = False break if found_all and not self.overwrite: if self.thumb: self.log_info( resource, "thumb output already exists; triggering terra.geotiff.fieldmosaic_full" ) r = requests.post( "%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() else: self.log_skip(resource, "all outputs already exist") return # Perform actual field stitching if not self.darker or sensor_lookup != 'rgb_fullfield': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, out_dir, out_vrt, out_tif_thumb, out_tif_full, out_tif_medium, parameters, resource) self.created += nu_created self.bytes += nu_bytes if not self.thumb and os.path.isfile(out_tif_medium): # Create PNG thumbnail self.log_info(resource, "Converting 10pct to %s..." % out_png) cmd = "gdal_translate -of PNG %s %s" % (out_tif_medium, out_png) subprocess.call(cmd, shell=True) self.created += 1 self.bytes += os.path.getsize(out_png) self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s" % (season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7])) # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, self.sensors.get_display_name(sensor=sensor_lookup), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } # If we newly created these files, upload to Clowder if self.thumb: generated_files = [out_tif_thumb] else: generated_files = [out_tif_medium, out_tif_full, out_png] for checked_file in generated_files: if os.path.isfile(checked_file): found_in_dest = check_file_in_dataset(connector, host, secret_key, target_dsid, checked_file) #, replacements=[("ir_fullfield", "fullfield"), ("L2", "L1")]) if not found_in_dest: id = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, checked_file) meta = build_metadata(host, self.extractor_info, id, content, 'file') upload_metadata(connector, host, secret_key, id, meta) if checked_file == out_tif_full: # Trigger downstream extractions on full resolution if sensor_lookup == 'ir_fullfield': submit_extraction(connector, host, secret_key, id, "terra.multispectral.meantemp") elif sensor_lookup == 'rgb_fullfield' and checked_file.endswith( "_mask.tif"): submit_extraction(connector, host, secret_key, id, "terra.stereo-rgb.canopycover") if self.thumb: # TODO: Add parameters support to pyclowder submit_extraction() self.log_info(resource, "triggering terra.geotiff.fieldmosaic_full") r = requests.post("%sapi/%s/%s/extractions?key=%s" % (host, 'datasets', resource['id'], secret_key), headers={"Content-Type": "application/json"}, data=json.dumps({ "extractor": 'terra.geotiff.fieldmosaic_full', "parameters": parameters })) r.raise_for_status() self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Write the CSV to the same directory as the source file ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp + "T12:00:00-07:00" rootdir = self.sensors.create_sensor_path(timestamp, sensor="rgb_fullfield", ext=".csv") out_csv = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_bety.csv")) out_geo = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_canopycover_geo.csv")) # TODO: What should happen if CSV already exists? If we're here, there's no completed metadata... self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') # Get full list of experiment plots using date as filter all_plots = get_site_boundaries(timestamp, city='Maricopa') self.log_info(resource, "found %s plots on %s" % (len(all_plots), timestamp)) successful_plots = 0 for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot try: pxarray = clip_raster(resource['local_paths'][0], tuples) if pxarray is not None: if len(pxarray.shape) < 3: self.log_error( resource, "unexpected array shape for %s (%s)" % (plotname, pxarray.shape)) continue ccVal = calculate_canopycover_masked( rollaxis(pxarray, 0, 3)) if (ccVal > -1): # Prepare and submit datapoint geo_file.write(','.join([ plotname, 'Canopy Cover', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(ccVal), timestamp ]) + '\n') successful_plots += 1 if successful_plots % 10 == 0: self.log_info( resource, "processed %s/%s plots" % (successful_plots, len(all_plots))) else: continue except: self.log_error(resource, "error generating cc for %s" % plotname) continue if (ccVal > -1): traits['canopy_cover'] = str(ccVal) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') csv_file.close() geo_file.close() # Upload this CSV to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Add metadata to original dataset indicating this was run self.log_info(resource, "updating file metadata") ext_meta = build_metadata(host, self.extractor_info, resource['id'], {"files_created": [fileid, geoid]}, 'file') upload_metadata(connector, host, secret_key, resource['id'], ext_meta) # Trigger separate extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): """Performs plot level image extraction Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API resource(dict): dictionary containing the resources associated with the request parameters(json): json object of the triggering message contents """ self.start_message(resource) super(ClipByShape, self).process_message(connector, host, secret_key, resource, parameters) # Handle any parameters if isinstance(parameters, basestring): parameters = json.loads(parameters) if isinstance(parameters, unicode): parameters = json.loads(str(parameters)) # Initialize local variables dataset_name = parameters["datasetname"] season_name, experiment_name = "Unknown Season", "Unknown Experiment" datestamp, shape_table, plot_name_idx, shape_rows = None, None, None, None # Array containing the links to uploaded files uploaded_file_ids = [] # Find the files we're interested in # pylint: disable=line-too-long (shapefile, shxfile, dbffile, imagefiles) = self.find_shape_image_files(resource['local_paths'], resource['triggering_file']) # pylint: enable=line-too-long if shapefile is None: self.log_skip(resource, "No shapefile found") return if shxfile is None: self.log_skip(resource, "No SHX file found") return num_image_files = len(imagefiles) if num_image_files <= 0: self.log_skip(resource, "No image files with geographic boundaries found") return # Get the best username, password, and space old_un, old_pw, old_space = (self.clowder_user, self.clowder_pass, self.clowderspace) self.clowder_user, self.clowder_pass, self.clowderspace = self.get_clowder_context( ) # Ensure that the clowder information is valid if not confirm_clowder_info(host, secret_key, self.clowderspace, self.clowder_user, self.clowder_pass): self.log_error(resource, "Clowder configuration is invalid. Not processing " +\ "request") self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource) return # Change the base path of files to include the user by tweaking the sensor's value sensor_old_base = None if self.get_terraref_metadata is None: _, new_base = self.get_username_with_base_path( host, secret_key, resource['id'], self.sensors.base) sensor_old_base = self.sensors.base self.sensors.base = new_base try: # Build up a list of image IDs image_ids = {} if 'files' in resource: for one_image in imagefiles: image_name = os.path.basename(one_image) for res_file in resource['files']: if ('filename' in res_file) and ('id' in res_file) and \ (image_name == res_file['filename']): image_ids[image_name] = res_file['id'] # Get timestamps. Also get season and experiment information for Clowder collections datestamp = self.find_datestamp(dataset_name) timestamp = timestamp_to_terraref( self.find_timestamp(dataset_name)) (season_name, experiment_name, _) = self.get_season_and_experiment(datestamp, self.sensor_name) if self.experiment_metadata: if 'extractors' in self.experiment_metadata: extractor_json = self.experiment_metadata['extractors'] if 'shapefile' in extractor_json: if 'plot_column_name' in extractor_json['shapefile']: plot_name_idx = extractor_json['shapefile'][ 'plot_column_name'] # Check our current local variables if dbffile is None: self.log_info(resource, "DBF file not found, using default plot naming") self.log_info(resource, "Extracting plots using shapefile '" + \ os.path.basename(shapefile) + "'") # Load the shapes and find the plot name column if we have a DBF file shape_in = ogr.Open(shapefile) layer = shape_in.GetLayer( os.path.split(os.path.splitext(shapefile)[0])[1]) feature = layer.GetNextFeature() layer_ref = layer.GetSpatialRef() if dbffile: shape_table = DBF(dbffile, lowernames=True, ignore_missing_memofile=True) shape_rows = iter(list(shape_table)) # Make sure if we have the column name of plot-names specified that it exists in # the shapefile column_names = shape_table.field_names if not plot_name_idx is None: if not find_all_plot_names(plot_name_idx, column_names): ValueError( resource, "Shapefile data does not have specified plot name" + " column '" + plot_name_idx + "'") # Lookup a plot name field to use if plot_name_idx is None: for one_name in column_names: # pylint: disable=line-too-long if one_name == "observationUnitName": plot_name_idx = one_name break elif (one_name.find('plot') >= 0) and ( (one_name.find('name') >= 0) or one_name.find('id')): plot_name_idx = one_name break elif one_name == 'id': plot_name_idx = one_name break # pylint: enable=line-too-long if plot_name_idx is None: ValueError( resource, "Shapefile data does not have a plot name field '" + os.path.basename(dbffile) + "'") # Setup for the extracted plot images plot_display_name = self.sensors.get_display_name(sensor=self.sensor_name) + \ " (By Plot)" # Loop through each polygon and extract plot level data alternate_plot_id = 0 while feature: # Current geometry to extract plot_poly = feature.GetGeometryRef() if layer_ref: plot_poly.AssignSpatialReference(layer_ref) plot_spatial_ref = plot_poly.GetSpatialReference() # Determie the plot name to use plot_name = None alternate_plot_id = alternate_plot_id + 1 if shape_rows and plot_name_idx: try: row = next(shape_rows) plot_name = get_plot_name(plot_name_idx, row) except StopIteration: pass if not plot_name: plot_name = "plot_" + str(alternate_plot_id) # Determine output dataset name leaf_dataset = plot_display_name + ' - ' + plot_name + " - " + datestamp self.log_info( resource, "Hierarchy: %s / %s / %s / %s / %s / %s / %s" % (season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_dataset)) # Create the dataset, even if we have no data to put in it, so that the caller knows # it was addressed target_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, plot_display_name, datestamp[:4], datestamp[5:7], datestamp[8:10], leaf_ds_name=leaf_dataset) # Loop through all the images looking for overlap for filename in imagefiles: # Get the bounds. We also get the reference systems in case we need to convert # between them bounds = imagefiles[filename]['bounds'] bounds_spatial_ref = bounds.GetSpatialReference() # Checking for geographic overlap and skip if there is none if not bounds_spatial_ref.IsSame(plot_spatial_ref): # We need to convert coordinate system before an intersection transform = osr.CoordinateTransformation( bounds_spatial_ref, plot_spatial_ref) new_bounds = bounds.Clone() if new_bounds: new_bounds.Transform(transform) intersection = plot_poly.Intersection(new_bounds) new_bounds = None else: # Same coordinate system. Simple intersection intersection = plot_poly.Intersection(bounds) if intersection.GetArea() == 0.0: self.log_info(resource, "Skipping image: " + filename) continue # Determine where we're putting the clipped file on disk and determine overwrite # pylint: disable=unexpected-keyword-arg out_file = self.sensors.create_sensor_path( timestamp, filename=os.path.basename(filename), plot=plot_name, subsensor=self.sensor_name) if (file_exists(out_file) and not self.overwrite): # The file exists and don't want to overwrite it self.logger.warn("Skipping existing output file: %s", out_file) continue self.log_info( resource, "Attempting to clip '" + filename + "' to polygon number " + str(alternate_plot_id)) # Create destination folder on disk if we haven't done that already if not os.path.exists(os.path.dirname(out_file)): os.makedirs(os.path.dirname(out_file)) # Clip the raster bounds_tuple = polygon_to_tuples_transform( plot_poly, bounds_spatial_ref) clip_pix = clip_raster(filename, bounds_tuple, out_path=out_file) if clip_pix is None: self.log_error( resource, "Failed to clip image to plot name " + plot_name) continue # Upload the clipped image to the dataset found_in_dest = check_file_in_dataset( connector, host, secret_key, target_dsid, out_file, remove=self.overwrite) if not found_in_dest or self.overwrite: image_name = os.path.basename(filename) content = { "comment": "Clipped from shapefile " + os.path.basename(shapefile), "imageName": image_name } if image_name in image_ids: content['imageID'] = image_ids[image_name] fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_file) uploaded_file_ids.append(fileid) # Generate our metadata meta = build_metadata(host, self.extractor_info, fileid, content, 'file') clowder_file.upload_metadata(connector, host, secret_key, fileid, meta) else: self.logger.warn( "Skipping existing file in dataset: %s", out_file) self.created += 1 self.bytes += os.path.getsize(out_file) # Get the next shape to extract feature = layer.GetNextFeature() # Tell Clowder this is completed so subsequent file updates don't daisy-chain id_len = len(uploaded_file_ids) if id_len > 0 or self.created > 0: extractor_md = build_metadata( host, self.extractor_info, resource['id'], {"files_created": uploaded_file_ids}, 'dataset') self.log_info( resource, "Uploading shapefile plot extractor metadata to Level_2 dataset: " + str(extractor_md)) clowder_dataset.remove_metadata(connector, host, secret_key, resource['id'], self.extractor_info['name']) clowder_dataset.upload_metadata(connector, host, secret_key, resource['id'], extractor_md) else: self.logger.warn( "Skipping dataset metadata updating since no files were loaded" ) finally: # Signal end of processing message and restore changed variables. Be sure to restore # changed variables above with early returns if not sensor_old_base is None: self.sensors.base = sensor_old_base self.clowder_user, self.clowder_pass, self.clowderspace = ( old_un, old_pw, old_space) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) if type(parameters) is str: parameters = json.loads(parameters) if 'parameters' in parameters: parameters = parameters['parameters'] if type(parameters) is unicode: parameters = json.loads(str(parameters)) # Input path will suggest which sensor we are seeing sensor_type = None for f in resource['files']: filepath = f['filepath'] for sens in ["rgb_geotiff", "ir_geotiff", "laser3d_heightmap"]: if filepath.find(sens) > -1: sensor_type = sens.split("_")[0] break if sensor_type is not None: break # dataset_name = "Full Field - 2017-01-01" dataset_name = parameters["output_dataset"] scan_name = parameters["scan_type"] if "scan_type" in parameters else "" timestamp = dataset_name.split(" - ")[1] out_tif_full = self.sensors.create_sensor_path( timestamp, opts=[sensor_type, scan_name]) out_tif_thumb = out_tif_full.replace(".tif", "_thumb.tif") out_vrt = out_tif_full.replace(".tif", ".vrt") out_dir = os.path.dirname(out_vrt) if os.path.exists(out_vrt) and not self.overwrite: self.log_skip(resource, "%s already exists; ending process" % out_vrt) return if not self.darker or sensor_type != 'rgb': (nu_created, nu_bytes) = self.generateSingleMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) else: (nu_created, nu_bytes) = self.generateDarkerMosaic( connector, host, secret_key, sensor_type, out_dir, out_vrt, out_tif_thumb, out_tif_full, parameters, resource) self.created += nu_created self.bytes += nu_bytes # Get dataset ID or create it, creating parent collections as needed target_dsid = build_dataset_hierarchy(host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, self.sensors.get_display_name(), timestamp[:4], timestamp[5:7], leaf_ds_name=dataset_name) # Upload full field image to Clowder content = { "comment": "This stitched image is computed based on an assumption that the scene is planar. \ There are likely to be be small offsets near the boundary of two images anytime there are plants \ at the boundary (because those plants are higher than the ground plane), or where the dirt is \ slightly higher or lower than average.", "file_ids": parameters["file_paths"] } if os.path.exists(out_tif_thumb): thumbid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_thumb) thumbmeta = build_metadata(host, self.extractor_info, thumbid, content, 'file') upload_metadata(connector, host, secret_key, thumbid, thumbmeta) if os.path.exists(out_tif_full): fullid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, target_dsid, out_tif_full) fullmeta = build_metadata(host, self.extractor_info, fullid, content, 'file') upload_metadata(connector, host, secret_key, fullid, fullmeta) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message() stream_name = 'Energy Farm Observations' disp_name = self.sensors.get_display_name() if 'Weather CEN' in resource['name']: curr_sens = disp_name + ' - CEN' stream_name += ' CEN' main_coords = [-88.199801, 40.062051, 0] elif 'WeatherNE' in resource['name']: curr_sens = disp_name + ' - NE' stream_name += ' NE' main_coords = [-88.193298, 40.067379, 0] elif 'WeatherSE' in resource['name']: curr_sens = disp_name + ' - SE' stream_name += ' SE' main_coords = [-88.193573, 40.056910, 0] geom = {"type": "Point", "coordinates": main_coords} # Get sensor or create if not found sensor_data = get_sensor_by_name(connector, host, secret_key, curr_sens) if not sensor_data: sensor_id = create_sensor(connector, host, secret_key, curr_sens, geom, { "id": "Met Station", "title": "Met Station", "sensorType": 4 }, "Urbana") else: sensor_id = sensor_data['id'] # Get stream or create if not found stream_data = get_stream_by_name(connector, host, secret_key, stream_name) if not stream_data: stream_id = create_stream(connector, host, secret_key, stream_name, sensor_id, geom) else: stream_id = stream_data['id'] # Get metadata to check till what time the file was processed last. Start processing the file after this time allmd = download_metadata(connector, host, secret_key, resource['id']) last_processed_time = 0 datapoint_count = 0 for md in allmd: if 'content' in md and 'last processed time' in md['content']: last_processed_time = md['content']['last processed time'] if 'datapoints_created' in md['content']: datapoint_count = md['content']['datapoints_created'] else: datapoint_count = 0 delete_metadata(connector, host, secret_key, resource['id'], md['agent']['name'].split("/")[-1]) # Parse file and get all the records in it. ISO_8601_UTC_OFFSET = dateutil.tz.tzoffset("-07:00", -7 * 60 * 60) records = parse_file(resource["local_paths"][0], last_processed_time, utc_offset=ISO_8601_UTC_OFFSET) # Add props to each record. for record in records: record['properties']['source_file'] = resource['id'] record['stream_id'] = str(stream_id) total_dp = 0 datapoint_list = [] for record in records: datapoint_list.append({ "start_time": record['start_time'], "end_time": record['end_time'], "type": "Point", "geometry": record['geometry'], "properties": record['properties'] }) if len(datapoint_list) > self.batchsize: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) datapoint_list = [] if len(datapoint_list) > 0: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) # Mark dataset as processed metadata = build_metadata( host, self.extractor_info, resource['id'], { "last processed time": records[-1]["end_time"], "datapoints_created": datapoint_count + total_dp }, 'file') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message()
def perform_uploads(self, connector, host, secret_key, resource, default_dsid, content, season_name, experiment_name, timestamp): """Perform the uploading of all the files we're put onto the upload list Args: connector(obj): the message queue connector instance host(str): the URI of the host making the connection secret_key(str): used with the host API default_dsid(str): the default dataset to load files to content(str): content information for the files we're uploading season_name(str): the name of the season experiment_name(str): the name of the experiment timestamp(str): the timestamp string associated with the source dataset Notes: We loop through the files, compressing, and remapping the names as needed. If the sensor associated with the file is missing, we upload the file to the default dataset. Otherwise, we use the dataset associated with the sensor and create the dataset if necessary """ for one_file in self.files_to_upload: sourcefile = os.path.join(one_file["source_path"], one_file["source_name"]) # Make sure we have the original file and then compress it if needed, or remane is if os.path.isfile(sourcefile): # make sure we have the full destination path if not os.path.exists(one_file["dest_path"]): os.makedirs(one_file["dest_path"]) resultfile = os.path.join(one_file["dest_path"], one_file["dest_name"]) if one_file["compress"]: resultfile = resultfile + ".zip" with open(sourcefile, 'rb') as f_in: with gzip.open(resultfile, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) elif not sourcefile == resultfile: shutil.move(sourcefile, resultfile) # Find or create the target dataset for this entry if it doesn't exist cur_dataset_id = default_dsid if "sensor" in one_file: sensor_type = one_file["sensor"] if sensor_type in self.sensor_dsid_map: cur_dataset_id = self.sensor_dsid_map[sensor_type] else: new_sensor = Sensors(base=self.sensors.base, station=self.sensors.station, sensor=sensor_type) sensor_leaf_name = new_sensor.get_display_name( ) + ' - ' + timestamp ds_exists = get_datasetid_by_name( host, secret_key, sensor_leaf_name) new_dsid = build_dataset_hierarchy_crawl( host, secret_key, self.clowder_user, self.clowder_pass, self.clowderspace, season_name, experiment_name, new_sensor.get_display_name(), timestamp[:4], timestamp[5:7], timestamp[8:10], leaf_ds_name=sensor_leaf_name) if (self.overwrite_ok or not ds_exists) and self.experiment_metadata: self.update_dataset_extractor_metadata( connector, host, secret_key, new_dsid, prepare_pipeline_metadata( self.experiment_metadata), self.extractor_info['name']) self.sensor_dsid_map[sensor_type] = new_dsid cur_dataset_id = new_dsid # Check if file already exists in the dataset file_in_dataset = check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=False) # If the files is already in the dataset, determine if we need to delete it first if self.overwrite_ok and file_in_dataset: # Delete the file from the dataset before uploading the new copy self.log_info( resource, "Removing existing file in dataset " + resultfile) check_file_in_dataset(connector, host, secret_key, cur_dataset_id, resultfile, remove=True) elif not self.overwrite_ok and file_in_dataset: # We won't overwrite an existing file self.log_skip( resource, "Not overwriting existing file in dataset " + resultfile) continue # Upload the file to the dataset fid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, cur_dataset_id, resultfile) # Generate our metadata meta = build_metadata(host, self.extractor_info, fid, content, 'file') # Upload the meadata to the dataset upload_metadata(connector, host, secret_key, fid, meta) self.created += 1 self.bytes += os.path.getsize(resultfile) else: raise Exception("%s was not found" % sourcefile)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # TODO: Get this from Clowder fixed metadata] main_coords = [-111.974304, 33.075576, 361] geom = {"type": "Point", "coordinates": main_coords} disp_name = self.sensors.get_display_name() # Get sensor or create if not found sensor_data = get_sensor_by_name(connector, host, secret_key, disp_name) if not sensor_data: sensor_id = create_sensor( connector, host, secret_key, disp_name, geom, { "id": "MAC Met Station", "title": "MAC Met Station", "sensorType": 4 }, "Maricopa") else: sensor_id = sensor_data['id'] # Get stream or create if not found stream_name = "Irrigation Observations" stream_data = get_stream_by_name(connector, host, secret_key, stream_name) if not stream_data: stream_id = create_stream(connector, host, secret_key, stream_name, sensor_id, geom) else: stream_id = stream_data['id'] # Process records in file records = parse_file(resource["local_paths"][0], main_coords) total_dp = 0 datapoint_list = [] for record in records: record['properties']['source_file'] = resource['id'] datapoint_list.append({ "start_time": record['start_time'], "end_time": record['end_time'], "type": "Point", "geometry": record['geometry'], "properties": record['properties'] }) if len(datapoint_list) > self.batchsize: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) datapoint_list = [] if len(datapoint_list) > 0: create_datapoints(connector, host, secret_key, stream_id, datapoint_list) total_dp += len(datapoint_list) # Mark dataset as processed metadata = build_metadata(host, self.extractor_info, resource['id'], {"datapoints_created": len(records)}, 'file') upload_metadata(connector, host, secret_key, resource['id'], metadata) self.end_message(resource)
def process_message(self, connector, host, secret_key, resource, parameters): self.start_message(resource) # Get full list of experiment plots using date as filter ds_info = get_info(connector, host, secret_key, resource['parent']['id']) timestamp = ds_info['name'].split(" - ")[1] time_fmt = timestamp + "T12:00:00-07:00" rootdir = self.sensors.create_sensor_path(timestamp, sensor="ir_meantemp", ext=".csv") out_csv = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_meantemp_bety.csv")) out_geo = os.path.join( os.path.dirname(rootdir), resource['name'].replace(".tif", "_meantemp_geo.csv")) self.log_info(resource, "Writing BETY CSV to %s" % out_csv) csv_file = open(out_csv, 'w') (fields, traits) = get_traits_table() csv_file.write(','.join(map(str, fields)) + '\n') self.log_info(resource, "Writing Geostreams CSV to %s" % out_geo) geo_file = open(out_geo, 'w') geo_file.write(','.join([ 'site', 'trait', 'lat', 'lon', 'dp_time', 'source', 'value', 'timestamp' ]) + '\n') successful_plots = 0 nan_plots = 0 all_plots = get_site_boundaries(timestamp, city='Maricopa') for plotname in all_plots: if plotname.find("KSU") > -1: self.log_info(resource, "skipping %s" % plotname) continue bounds = all_plots[plotname] tuples = geojson_to_tuples_betydb(yaml.safe_load(bounds)) centroid_lonlat = json.loads( centroid_from_geojson(bounds))["coordinates"] # Use GeoJSON string to clip full field to this plot pxarray = clip_raster(resource['local_paths'][0], tuples, "/home/extractor/temp.tif") os.remove("/home/extractor/temp.tif") # Filter out any pxarray[pxarray < 0] = numpy.nan mean_tc = numpy.nanmean(pxarray) - 273.15 # Create BETY-ready CSV if not numpy.isnan(mean_tc): geo_file.write(','.join([ plotname, 'IR Surface Temperature', str(centroid_lonlat[1]), str(centroid_lonlat[0]), time_fmt, host + ("" if host.endswith("/") else "/") + "files/" + resource['id'], str(mean_tc), timestamp ]) + '\n') traits['surface_temperature'] = str(mean_tc) traits['site'] = plotname traits['local_datetime'] = timestamp + "T12:00:00" trait_list = generate_traits_list(traits) csv_file.write(','.join(map(str, trait_list)) + '\n') else: nan_plots += 1 successful_plots += 1 self.log_info( resource, "skipped %s of %s plots due to NaN" % (nan_plots, len(all_plots))) # submit CSV to BETY csv_file.close() geo_file.close() # Upload CSVs to Clowder fileid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_csv) geoid = upload_to_dataset(connector, host, self.clowder_user, self.clowder_pass, resource['parent']['id'], out_geo) # Tell Clowder this is completed so subsequent file updates don't daisy-chain self.log_info(resource, "updating file metadata") metadata = build_metadata( host, self.extractor_info, resource['parent']['id'], { "total_plots": len(all_plots), "plots_processed": successful_plots, "blank_plots": nan_plots, "files_created": [fileid, geoid], "betydb_link": "https://terraref.ncsa.illinois.edu/bety/api/beta/variables?name=surface_temperature" }, 'dataset') upload_metadata(connector, host, secret_key, resource['id'], metadata) # Trigger downstream extractors self.log_info(resource, "triggering BETY extractor on %s" % fileid) submit_extraction(connector, host, secret_key, fileid, "terra.betydb") self.log_info(resource, "triggering geostreams extractor on %s" % geoid) submit_extraction(connector, host, secret_key, geoid, "terra.geostreams") self.end_message(resource)