def create_prov_es_json(id, project, master_orbit_file, slave_orbit_file, aria_dem_xml, aria_dem_file, work_dir, prov_file): """Create provenance JSON file.""" # get abs paths work_dir = os.path.abspath(work_dir) prod_dir = os.path.join(work_dir, id) # get context ctx_file = os.path.join(prod_dir, "%s.context.json" % id) with open(ctx_file) as f: context = json.load(f) # put in fake start/end times so that prov:used and prov:generated # are properly created by the prov lib fake_time = datetime.utcnow().isoformat() + 'Z' job_id = "create_interferogram-%s" % fake_time bundle_id = "bundle-create_interferogram-%s" % fake_time # create PROV-ES doc doc = ProvEsDocument() #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id)) bndl = None # input and output identifiers input_ids = {} platform_ids = {} instrument_ids = {} # full url paths work_url = "file://%s%s" % (socket.getfqdn(), work_dir) prod_url = "%s/%s" % (work_url, id) # add sentinel.ini file ini_ent = doc.file("hysds:%s" % get_uuid("%s/sentinel.ini" % work_url), ["%s/sentinel.ini" % work_url], label="sentinel.ini") input_ids[ini_ent.identifier] = True # add orbit files master_orbit_ent = doc.file( "hysds:%s" % get_uuid("%s/%s" % (work_url, master_orbit_file)), ["%s/%s" % (work_url, master_orbit_file)], label=os.path.basename(master_orbit_file)) input_ids[master_orbit_ent.identifier] = True slave_orbit_ent = doc.file( "hysds:%s" % get_uuid("%s/%s" % (work_url, slave_orbit_file)), ["%s/%s" % (work_url, slave_orbit_file)], label=os.path.basename(slave_orbit_file)) input_ids[slave_orbit_ent.identifier] = True # get list of S1A urls level = "L0" version = "v1.0" sensor = "eos:SAR" sensor_title = "Synthetic-aperture radar (SAR)" gov_org = "eos:ESA" gov_org_title = "European Space Agency" doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl) instrument = "" for i, url in enumerate( [context.get('master_zip_url', ''), context.get('slave_zip_url', '')]): match = PLATFORM_RE.search(url) if not match: continue pf = match.group(1) platform = "eos:%s" % pf platform_title = "Sentinel1A Satellite" instrument = "eos:%s-SAR" % pf instrument_title = "%s-SAR" % pf input_ds = doc.product("hysds:%s" % get_uuid(url), None, [url], [instrument], None, level, None, label=os.path.basename(url), bundle=bndl) input_ids[input_ds.identifier] = True if platform not in platform_ids: doc.platform(platform, [instrument], label=platform_title, bundle=bndl) platform_ids[platform] = True if instrument not in instrument_ids: doc.instrument(instrument, platform, [sensor], [gov_org], label=instrument_title, bundle=bndl) doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl) instrument_ids[instrument] = True # add dem xml, file and related provenance srtm_platform = "eos:SpaceShuttleEndeavour" srtm_platform_title = "USS Endeavour" srtm_instrument = "eos:SRTM" srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)" srtm_sensor = "eos:radar" srtm_sensor_title = "radar" srtm_gov_org = "eos:JPL" srtm_gov_org_title = "Jet Propulsion Laboratory" doc.governingOrganization(srtm_gov_org, label=srtm_gov_org_title, bundle=bndl) dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), ["%s/%s" % (work_url, aria_dem_xml)], label=os.path.basename(aria_dem_xml)) input_ids[dem_xml_ent.identifier] = True dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), ["%s/%s" % (work_url, aria_dem_file)], label=os.path.basename(aria_dem_file)) input_ids[dem_file_ent.identifier] = True doc.platform(srtm_platform, [srtm_instrument], label=srtm_platform_title, bundle=bndl) doc.instrument(srtm_instrument, srtm_platform, [srtm_sensor], [srtm_gov_org], label=srtm_instrument_title, bundle=bndl) doc.sensor(srtm_sensor, srtm_instrument, label=srtm_sensor_title, bundle=bndl) instrument_ids[srtm_instrument] = True # software and algorithm algorithm = "eos:interferogram_generation" software_version = "2.0.0_201604" software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version software = "eos:ISCE-%s" % software_version software_location = "https://winsar.unavco.org/isce.html" doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # output int_level = "L2" int_version = "v1.0" int_collection = "eos:S1A-interferograms-%s" % int_version int_collection_shortname = "S1A-interferograms-%s" % int_version int_collection_label = "ISCE generated S1A interferograms %s" % int_version int_collection_loc = "https://aria-dst-dav.jpl.nasa.gov/products/s1a_ifg/%s" % int_version doc.collection(int_collection, None, int_collection_shortname, int_collection_label, [int_collection_loc], instrument_ids.keys(), int_level, int_version, label=int_collection_label, bundle=bndl) output_ds = doc.granule("hysds:%s" % get_uuid(prod_url), None, [prod_url], instrument_ids.keys(), int_collection, int_level, int_version, label=id, bundle=bndl) # runtime context rt_ctx_id = "hysds:runtimeContext-sentinel_ifg-%s" % project doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl) # create process doc.processStep("hysds:%s" % get_uuid(job_id), fake_time, fake_time, [software], None, rt_ctx_id, input_ids.keys(), [output_ds.identifier], label=job_id, bundle=bndl, prov_type="hysds:create_interferogram") # write with open(prov_file, 'w') as f: json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)
def test_ProvEsDocument(): """Test dataset().""" # create doc doc = ProvEsDocument() # input dataset id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629" doi = "10.5067/ARIAMH/INSAR/Scene" downloadURL = "https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875" instrument = "eos:INSAR2-SAR" level = "L0" doc.dataset(id, doi, [downloadURL], [instrument], None, level) # input DEM dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114" dem_doi = None dem_downloadURL = "https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip" dem_level = "L0" doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level) # platform platform = "eos:INSAR2" doc.platform(platform, [instrument]) # second instrument/platform from same org instrument2 = "eos:INSAR4-SAR" platform2 = "eos:INSAR4" doc.platform(platform2, [instrument2]) # instrument sensor = "eos:SAR" gov_org = "eos:ASI" doc.instrument(instrument, platform, [sensor], [gov_org]) doc.sensor(sensor, instrument) doc.instrument(instrument2, platform2, [sensor], [gov_org]) doc.sensor(sensor, instrument2) # software software = "eos:ISCE" algorithm = "eos:interferogram_creation" doc.software(software, [algorithm]) # document atbd_id = "eos:interferogram_creation_atbd" atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI" atbd_url = "http://aria.domain.com/docs/ATBD.pdf" doc.document(atbd_id, atbd_doi, [atbd_url]) # algorithm doc.algorithm(algorithm, [software], [atbd_id]) # output dataset out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609" out_doi = "10.5067/ARIAMH/INSAR/Interferogram" out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":""interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}' out_downloadURL = "https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648" out_level = "L1" doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level) # software agent sa_id = "hysds:ariamh-worker-32.domain.com/12353" pid = "12353" worker_node = "ariamh-worker-32.domain.com" doc.softwareAgent(sa_id, pid, worker_node) # runtime context rt_ctx_id = "hysds:runtime_context" doc.runtimeContext(rt_ctx_id, [downloadURL]) # process step proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232" start_time = datetime.utcnow() end_time = start_time + timedelta(seconds=12233) ps = doc.processStep( proc_id, start_time.isoformat() + "Z", end_time.isoformat() + "Z", [software], sa_id, rt_ctx_id, [id, dem_id], [out_id], wasAssociatedWithRole="softwareAgent", ) print doc.serialize(indent=2)
def create_prov_es_json(id, netsel_file, jobdesc_file, project, aria_dem_xml, aria_dem_file, prod_dir, work_dir, prov_file): """Create provenance JSON file.""" # put in fake start/end times so that prov:used and prov:generated # are properly created by the prov lib fake_time = datetime.utcnow().isoformat() + 'Z' job_id = "create_interferogram-%s" % fake_time bundle_id = "bundle-create_interferogram-%s" % fake_time # create PROV-ES doc doc = ProvEsDocument() #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id)) bndl = None # input and output identifiers input_ids = {} platform_ids = {} instrument_ids = {} # full url paths work_url = "file://%s%s" % (socket.getfqdn(), work_dir) prod_url = "%s/%s" % (work_url, prod_dir) # add network selector file #netsel_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)), netsel_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)), ["%s/%s" % (work_url, netsel_file)], label=os.path.basename(netsel_file)) input_ids[netsel_ent.identifier] = True # add job description file #jobdesc_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)), jobdesc_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)), ["%s/%s" % (work_url, jobdesc_file)], label=os.path.basename(jobdesc_file)) input_ids[jobdesc_ent.identifier] = True # get list of CSK urls level = "L0" version = "v1.0" sensor = "eos:SAR" sensor_title = "Synthetic-aperture radar (SAR)" gov_org = "eos:ASI" gov_org_title = "Agenzia Spaziale Italiana" doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl) instrument = "" for i, url in enumerate(get_netsel_urls(netsel_file)): match = PLATFORM_RE.search(url) if not match: continue pf = match.group(1) platform = "eos:%s" % pf platform_title = "COSMO-SkyMed Satellite %s" % pf[-1] instrument = "eos:%s-SAR" % pf instrument_title = "%s-SAR" % pf input_ds = doc.product("hysds:%s" % get_uuid(url), None, [url], [instrument], None, level, version, label=os.path.basename(url), bundle=bndl) input_ids[input_ds.identifier] = True if platform not in platform_ids: doc.platform(platform, [instrument], label=platform_title, bundle=bndl) platform_ids[platform] = True if instrument not in instrument_ids: doc.instrument(instrument, platform, [sensor], [gov_org], label=instrument_title, bundle=bndl) doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl) instrument_ids[instrument] = True # add dem xml, file and related provenance srtm_platform = "eos:SpaceShuttleEndeavour" srtm_platform_title = "USS Endeavour" srtm_instrument = "eos:SRTM" srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)" srtm_sensor = "eos:radar" srtm_sensor_title = "radar" srtm_gov_org = "eos:JPL" srtm_gov_org_title = "Jet Propulsion Laboratory" doc.governingOrganization(srtm_gov_org, label=srtm_gov_org_title, bundle=bndl) #dem_xml_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)), ["%s/%s" % (work_url, aria_dem_xml)], label=os.path.basename(aria_dem_xml)) input_ids[dem_xml_ent.identifier] = True #dem_file_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)), ["%s/%s" % (work_url, aria_dem_file)], label=os.path.basename(aria_dem_file)) input_ids[dem_file_ent.identifier] = True doc.platform(srtm_platform, [srtm_instrument], label=srtm_platform_title, bundle=bndl) doc.instrument(srtm_instrument, srtm_platform, [srtm_sensor], [srtm_gov_org], label=srtm_instrument_title, bundle=bndl) doc.sensor(srtm_sensor, srtm_instrument, label=srtm_sensor_title, bundle=bndl) instrument_ids[srtm_instrument] = True # software and algorithm algorithm = "eos:interferogram_generation" software_version = "2.0.0_201604" software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version software = "eos:ISCE-%s" % software_version software_location = "https://winsar.unavco.org/isce.html" doc.software(software, [algorithm], software_version, label=software_title, location=software_location, bundle=bndl) # output int_level = "L2" int_version = "v1.0" int_collection = "eos:CSK-interferograms-%s" % int_version int_collection_shortname = "CSK-interferograms-%s" % int_version int_collection_label = "ISCE generated CSK interferograms %s" % int_version int_collection_loc = "https://aria-dav.jpl.nasa.gov/repository/products/interferogram/%s" % int_version doc.collection(int_collection, None, int_collection_shortname, int_collection_label, [int_collection_loc], instrument_ids.keys(), int_level, int_version, label=int_collection_label, bundle=bndl) output_ds = doc.granule("hysds:%s" % get_uuid(prod_url), None, [prod_url], instrument_ids.keys(), int_collection, int_level, int_version, label=id, bundle=bndl) # runtime context rt_ctx_id = "hysds:runtimeContext-ariamh-%s" % project doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl) # create process doc.processStep("hysds:%s" % get_uuid(job_id), fake_time, fake_time, [software], None, rt_ctx_id, input_ids.keys(), [output_ds.identifier], label=job_id, bundle=bndl, prov_type="hysds:create_interferogram") # write with open(prov_file, 'w') as f: json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)
def test_ProvEsDocument(): """Test dataset().""" # create doc doc = ProvEsDocument() # input dataset id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629" doi = "10.5067/ARIAMH/INSAR/Scene" downloadURL = 'https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875' instrument = "eos:INSAR2-SAR" level = "L0" doc.dataset(id, doi, [downloadURL], [instrument], None, level) # input DEM dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114" dem_doi = None dem_downloadURL = 'https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip' dem_level = "L0" doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level) # platform platform = "eos:INSAR2" doc.platform(platform, [instrument]) # second instrument/platform from same org instrument2 = "eos:INSAR4-SAR" platform2 = "eos:INSAR4" doc.platform(platform2, [instrument2]) # instrument sensor = "eos:SAR" gov_org = "eos:ASI" doc.instrument(instrument, platform, [sensor], [gov_org]) doc.sensor(sensor, instrument) doc.instrument(instrument2, platform2, [sensor], [gov_org]) doc.sensor(sensor, instrument2) # software software = "eos:ISCE" algorithm = "eos:interferogram_creation" doc.software(software, [algorithm]) # document atbd_id = "eos:interferogram_creation_atbd" atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI" atbd_url = "http://aria.domain.com/docs/ATBD.pdf" doc.document(atbd_id, atbd_doi, [atbd_url]) # algorithm doc.algorithm(algorithm, [software], [atbd_id]) # output dataset out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609" out_doi = "10.5067/ARIAMH/INSAR/Interferogram" out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":"\"interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906\"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}' out_downloadURL = 'https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648' out_level = "L1" doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level) # software agent sa_id = "hysds:ariamh-worker-32.domain.com/12353" pid = "12353" worker_node = "ariamh-worker-32.domain.com" doc.softwareAgent(sa_id, pid, worker_node) # runtime context rt_ctx_id = "hysds:runtime_context" doc.runtimeContext(rt_ctx_id, [downloadURL]) # process step proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232" start_time = datetime.utcnow() end_time = start_time + timedelta(seconds=12233) ps = doc.processStep(proc_id, start_time.isoformat() + 'Z', end_time.isoformat() + 'Z', [software], sa_id, rt_ctx_id, [id, dem_id], [out_id], wasAssociatedWithRole="softwareAgent") print(doc.serialize(indent=2))