Пример #1
0
def create_prov_es_json(id, project, master_orbit_file, slave_orbit_file,
                        aria_dem_xml, aria_dem_file, work_dir, prov_file):
    """Create provenance JSON file."""

    # get abs paths
    work_dir = os.path.abspath(work_dir)
    prod_dir = os.path.join(work_dir, id)

    # get context
    ctx_file = os.path.join(prod_dir, "%s.context.json" % id)
    with open(ctx_file) as f:
        context = json.load(f)

    # put in fake start/end times so that prov:used and prov:generated
    # are properly created by the prov lib
    fake_time = datetime.utcnow().isoformat() + 'Z'
    job_id = "create_interferogram-%s" % fake_time
    bundle_id = "bundle-create_interferogram-%s" % fake_time

    # create PROV-ES doc
    doc = ProvEsDocument()
    #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id))
    bndl = None

    # input and output identifiers
    input_ids = {}
    platform_ids = {}
    instrument_ids = {}

    # full url paths
    work_url = "file://%s%s" % (socket.getfqdn(), work_dir)
    prod_url = "%s/%s" % (work_url, id)

    # add sentinel.ini file
    ini_ent = doc.file("hysds:%s" % get_uuid("%s/sentinel.ini" % work_url),
                       ["%s/sentinel.ini" % work_url],
                       label="sentinel.ini")
    input_ids[ini_ent.identifier] = True

    # add orbit files
    master_orbit_ent = doc.file(
        "hysds:%s" % get_uuid("%s/%s" % (work_url, master_orbit_file)),
        ["%s/%s" % (work_url, master_orbit_file)],
        label=os.path.basename(master_orbit_file))
    input_ids[master_orbit_ent.identifier] = True
    slave_orbit_ent = doc.file(
        "hysds:%s" % get_uuid("%s/%s" % (work_url, slave_orbit_file)),
        ["%s/%s" % (work_url, slave_orbit_file)],
        label=os.path.basename(slave_orbit_file))
    input_ids[slave_orbit_ent.identifier] = True

    # get list of S1A urls
    level = "L0"
    version = "v1.0"
    sensor = "eos:SAR"
    sensor_title = "Synthetic-aperture radar (SAR)"
    gov_org = "eos:ESA"
    gov_org_title = "European Space Agency"
    doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl)
    instrument = ""
    for i, url in enumerate(
        [context.get('master_zip_url', ''),
         context.get('slave_zip_url', '')]):
        match = PLATFORM_RE.search(url)
        if not match: continue
        pf = match.group(1)
        platform = "eos:%s" % pf
        platform_title = "Sentinel1A Satellite"
        instrument = "eos:%s-SAR" % pf
        instrument_title = "%s-SAR" % pf
        input_ds = doc.product("hysds:%s" % get_uuid(url),
                               None, [url], [instrument],
                               None,
                               level,
                               None,
                               label=os.path.basename(url),
                               bundle=bndl)
        input_ids[input_ds.identifier] = True
        if platform not in platform_ids:
            doc.platform(platform, [instrument],
                         label=platform_title,
                         bundle=bndl)
            platform_ids[platform] = True
        if instrument not in instrument_ids:
            doc.instrument(instrument,
                           platform, [sensor], [gov_org],
                           label=instrument_title,
                           bundle=bndl)
            doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl)
            instrument_ids[instrument] = True

    # add dem xml, file and related provenance
    srtm_platform = "eos:SpaceShuttleEndeavour"
    srtm_platform_title = "USS Endeavour"
    srtm_instrument = "eos:SRTM"
    srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)"
    srtm_sensor = "eos:radar"
    srtm_sensor_title = "radar"
    srtm_gov_org = "eos:JPL"
    srtm_gov_org_title = "Jet Propulsion Laboratory"
    doc.governingOrganization(srtm_gov_org,
                              label=srtm_gov_org_title,
                              bundle=bndl)
    dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" %
                                                 (work_url, aria_dem_xml)),
                           ["%s/%s" % (work_url, aria_dem_xml)],
                           label=os.path.basename(aria_dem_xml))
    input_ids[dem_xml_ent.identifier] = True
    dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" %
                                                  (work_url, aria_dem_file)),
                            ["%s/%s" % (work_url, aria_dem_file)],
                            label=os.path.basename(aria_dem_file))
    input_ids[dem_file_ent.identifier] = True
    doc.platform(srtm_platform, [srtm_instrument],
                 label=srtm_platform_title,
                 bundle=bndl)
    doc.instrument(srtm_instrument,
                   srtm_platform, [srtm_sensor], [srtm_gov_org],
                   label=srtm_instrument_title,
                   bundle=bndl)
    doc.sensor(srtm_sensor,
               srtm_instrument,
               label=srtm_sensor_title,
               bundle=bndl)
    instrument_ids[srtm_instrument] = True

    # software and algorithm
    algorithm = "eos:interferogram_generation"
    software_version = "2.0.0_201604"
    software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version
    software = "eos:ISCE-%s" % software_version
    software_location = "https://winsar.unavco.org/isce.html"
    doc.software(software, [algorithm],
                 software_version,
                 label=software_title,
                 location=software_location,
                 bundle=bndl)

    # output
    int_level = "L2"
    int_version = "v1.0"
    int_collection = "eos:S1A-interferograms-%s" % int_version
    int_collection_shortname = "S1A-interferograms-%s" % int_version
    int_collection_label = "ISCE generated S1A interferograms %s" % int_version
    int_collection_loc = "https://aria-dst-dav.jpl.nasa.gov/products/s1a_ifg/%s" % int_version
    doc.collection(int_collection,
                   None,
                   int_collection_shortname,
                   int_collection_label, [int_collection_loc],
                   instrument_ids.keys(),
                   int_level,
                   int_version,
                   label=int_collection_label,
                   bundle=bndl)
    output_ds = doc.granule("hysds:%s" % get_uuid(prod_url),
                            None, [prod_url],
                            instrument_ids.keys(),
                            int_collection,
                            int_level,
                            int_version,
                            label=id,
                            bundle=bndl)

    # runtime context
    rt_ctx_id = "hysds:runtimeContext-sentinel_ifg-%s" % project
    doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl)

    # create process
    doc.processStep("hysds:%s" % get_uuid(job_id),
                    fake_time,
                    fake_time, [software],
                    None,
                    rt_ctx_id,
                    input_ids.keys(), [output_ds.identifier],
                    label=job_id,
                    bundle=bndl,
                    prov_type="hysds:create_interferogram")

    # write
    with open(prov_file, 'w') as f:
        json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)
Пример #2
0
def test_ProvEsDocument():
    """Test dataset()."""

    # create doc
    doc = ProvEsDocument()

    # input dataset
    id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629"
    doi = "10.5067/ARIAMH/INSAR/Scene"
    downloadURL = "https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875"
    instrument = "eos:INSAR2-SAR"
    level = "L0"
    doc.dataset(id, doi, [downloadURL], [instrument], None, level)

    # input DEM
    dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114"
    dem_doi = None
    dem_downloadURL = "https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip"
    dem_level = "L0"
    doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level)

    # platform
    platform = "eos:INSAR2"
    doc.platform(platform, [instrument])

    # second instrument/platform from same org
    instrument2 = "eos:INSAR4-SAR"
    platform2 = "eos:INSAR4"
    doc.platform(platform2, [instrument2])

    # instrument
    sensor = "eos:SAR"
    gov_org = "eos:ASI"
    doc.instrument(instrument, platform, [sensor], [gov_org])
    doc.sensor(sensor, instrument)
    doc.instrument(instrument2, platform2, [sensor], [gov_org])
    doc.sensor(sensor, instrument2)

    # software
    software = "eos:ISCE"
    algorithm = "eos:interferogram_creation"
    doc.software(software, [algorithm])

    # document
    atbd_id = "eos:interferogram_creation_atbd"
    atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI"
    atbd_url = "http://aria.domain.com/docs/ATBD.pdf"
    doc.document(atbd_id, atbd_doi, [atbd_url])

    # algorithm
    doc.algorithm(algorithm, [software], [atbd_id])

    # output dataset
    out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609"
    out_doi = "10.5067/ARIAMH/INSAR/Interferogram"
    out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":""interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}'
    out_downloadURL = "https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648"
    out_level = "L1"
    doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None, out_level)

    # software agent
    sa_id = "hysds:ariamh-worker-32.domain.com/12353"
    pid = "12353"
    worker_node = "ariamh-worker-32.domain.com"
    doc.softwareAgent(sa_id, pid, worker_node)

    # runtime context
    rt_ctx_id = "hysds:runtime_context"
    doc.runtimeContext(rt_ctx_id, [downloadURL])

    # process step
    proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232"
    start_time = datetime.utcnow()
    end_time = start_time + timedelta(seconds=12233)
    ps = doc.processStep(
        proc_id,
        start_time.isoformat() + "Z",
        end_time.isoformat() + "Z",
        [software],
        sa_id,
        rt_ctx_id,
        [id, dem_id],
        [out_id],
        wasAssociatedWithRole="softwareAgent",
    )

    print doc.serialize(indent=2)
Пример #3
0
def create_prov_es_json(id, netsel_file, jobdesc_file, project, aria_dem_xml,
                        aria_dem_file, prod_dir, work_dir, prov_file):
    """Create provenance JSON file."""

    # put in fake start/end times so that prov:used and prov:generated
    # are properly created by the prov lib
    fake_time = datetime.utcnow().isoformat() + 'Z'
    job_id = "create_interferogram-%s" % fake_time
    bundle_id = "bundle-create_interferogram-%s" % fake_time

    # create PROV-ES doc
    doc = ProvEsDocument()
    #bndl = doc.bundle("hysds:%s" % get_uuid(bundle_id))
    bndl = None

    # input and output identifiers
    input_ids = {}
    platform_ids = {}
    instrument_ids = {}

    # full url paths
    work_url = "file://%s%s" % (socket.getfqdn(), work_dir)
    prod_url = "%s/%s" % (work_url, prod_dir)

    # add network selector file
    #netsel_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)),
    netsel_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, netsel_file)),
                          ["%s/%s" % (work_url, netsel_file)],
                          label=os.path.basename(netsel_file))
    input_ids[netsel_ent.identifier] = True
    
    # add job description file
    #jobdesc_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)),
    jobdesc_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, jobdesc_file)),
                           ["%s/%s" % (work_url, jobdesc_file)],
                           label=os.path.basename(jobdesc_file))
    input_ids[jobdesc_ent.identifier] = True
    
    # get list of CSK urls
    level = "L0"
    version = "v1.0"
    sensor = "eos:SAR"
    sensor_title = "Synthetic-aperture radar (SAR)"
    gov_org = "eos:ASI"
    gov_org_title = "Agenzia Spaziale Italiana"
    doc.governingOrganization(gov_org, label=gov_org_title, bundle=bndl)
    instrument = ""
    for i, url in enumerate(get_netsel_urls(netsel_file)):
        match = PLATFORM_RE.search(url)
        if not match: continue
        pf = match.group(1)
        platform = "eos:%s" % pf
        platform_title = "COSMO-SkyMed Satellite %s" % pf[-1]
        instrument = "eos:%s-SAR" % pf
        instrument_title = "%s-SAR" % pf
        input_ds = doc.product("hysds:%s" % get_uuid(url), None,
                               [url], [instrument], None, level, version,
                               label=os.path.basename(url), bundle=bndl)
        input_ids[input_ds.identifier] = True
        if platform not in platform_ids:
            doc.platform(platform, [instrument], label=platform_title,
                         bundle=bndl)
            platform_ids[platform] = True
        if instrument not in instrument_ids:
            doc.instrument(instrument, platform, [sensor], [gov_org],
                           label=instrument_title, bundle=bndl)
            doc.sensor(sensor, instrument, label=sensor_title, bundle=bndl)
            instrument_ids[instrument] = True

    # add dem xml, file and related provenance
    srtm_platform = "eos:SpaceShuttleEndeavour"
    srtm_platform_title = "USS Endeavour"
    srtm_instrument = "eos:SRTM"
    srtm_instrument_title = "Shuttle Radar Topography Mission (SRTM)"
    srtm_sensor = "eos:radar"
    srtm_sensor_title = "radar"
    srtm_gov_org = "eos:JPL"
    srtm_gov_org_title = "Jet Propulsion Laboratory"
    doc.governingOrganization(srtm_gov_org, label=srtm_gov_org_title, bundle=bndl)
    #dem_xml_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)),
    dem_xml_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_xml)),
                           ["%s/%s" % (work_url, aria_dem_xml)],
                           label=os.path.basename(aria_dem_xml))
    input_ids[dem_xml_ent.identifier] = True
    #dem_file_ent = bndl.entity("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)),
    dem_file_ent = doc.file("hysds:%s" % get_uuid("%s/%s" % (work_url, aria_dem_file)),
                            ["%s/%s" % (work_url, aria_dem_file)],
                            label=os.path.basename(aria_dem_file))
    input_ids[dem_file_ent.identifier] = True
    doc.platform(srtm_platform, [srtm_instrument], label=srtm_platform_title,
                 bundle=bndl)
    doc.instrument(srtm_instrument, srtm_platform, [srtm_sensor], [srtm_gov_org],
                   label=srtm_instrument_title, bundle=bndl)
    doc.sensor(srtm_sensor, srtm_instrument, label=srtm_sensor_title, bundle=bndl)
    instrument_ids[srtm_instrument] = True

    # software and algorithm
    algorithm = "eos:interferogram_generation"
    software_version = "2.0.0_201604"
    software_title = "InSAR SCE (InSAR Scientific Computing Environment) v%s" % software_version
    software = "eos:ISCE-%s" % software_version
    software_location = "https://winsar.unavco.org/isce.html"
    doc.software(software, [algorithm], software_version, label=software_title,
                 location=software_location, bundle=bndl)

    # output
    int_level = "L2"
    int_version = "v1.0"
    int_collection = "eos:CSK-interferograms-%s" % int_version
    int_collection_shortname = "CSK-interferograms-%s" % int_version
    int_collection_label = "ISCE generated CSK interferograms %s" % int_version
    int_collection_loc = "https://aria-dav.jpl.nasa.gov/repository/products/interferogram/%s" % int_version
    doc.collection(int_collection, None, int_collection_shortname,
                   int_collection_label, [int_collection_loc],
                   instrument_ids.keys(), int_level, int_version,
                   label=int_collection_label, bundle=bndl)
    output_ds = doc.granule("hysds:%s" % get_uuid(prod_url), None, [prod_url], 
                            instrument_ids.keys(), int_collection, int_level,
                            int_version, label=id, bundle=bndl)

    # runtime context
    rt_ctx_id = "hysds:runtimeContext-ariamh-%s" % project
    doc.runtimeContext(rt_ctx_id, [project], label=project, bundle=bndl)

    # create process
    doc.processStep("hysds:%s" % get_uuid(job_id), fake_time, fake_time,
                    [software], None, rt_ctx_id, input_ids.keys(), 
                    [output_ds.identifier], label=job_id, bundle=bndl,
                    prov_type="hysds:create_interferogram")
     
    # write
    with open(prov_file, 'w') as f:
        json.dump(json.loads(doc.serialize()), f, indent=2, sort_keys=True)
Пример #4
0
def test_ProvEsDocument():
    """Test dataset()."""

    # create doc
    doc = ProvEsDocument()

    # input dataset
    id = "hysds:INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629"
    doi = "10.5067/ARIAMH/INSAR/Scene"
    downloadURL = 'https://dav.domain.com/repository/products/insar/v0.2/2014/09/22/INSAR2_RAW_HI_06_HH_RA_20140922062622_20140922062629/INSAR20140922_913686_3720875'
    instrument = "eos:INSAR2-SAR"
    level = "L0"
    doc.dataset(id, doi, [downloadURL], [instrument], None, level)

    # input DEM
    dem_id = "hysds:srtm/version2_1/SRTM1/Region_01/N31W114"
    dem_doi = None
    dem_downloadURL = 'https://dav.domain.com/repository/products/srtm/version2_1/SRTM1/Region_01/N31W114.hgt.zip'
    dem_level = "L0"
    doc.dataset(dem_id, dem_doi, [dem_downloadURL], [], None, dem_level)

    # platform
    platform = "eos:INSAR2"
    doc.platform(platform, [instrument])

    # second instrument/platform from same org
    instrument2 = "eos:INSAR4-SAR"
    platform2 = "eos:INSAR4"
    doc.platform(platform2, [instrument2])

    # instrument
    sensor = "eos:SAR"
    gov_org = "eos:ASI"
    doc.instrument(instrument, platform, [sensor], [gov_org])
    doc.sensor(sensor, instrument)
    doc.instrument(instrument2, platform2, [sensor], [gov_org])
    doc.sensor(sensor, instrument2)

    # software
    software = "eos:ISCE"
    algorithm = "eos:interferogram_creation"
    doc.software(software, [algorithm])

    # document
    atbd_id = "eos:interferogram_creation_atbd"
    atbd_doi = "10.5067/SOME/FAKE/ATBD_DOI"
    atbd_url = "http://aria.domain.com/docs/ATBD.pdf"
    doc.document(atbd_id, atbd_doi, [atbd_url])

    # algorithm
    doc.algorithm(algorithm, [software], [atbd_id])

    # output dataset
    out_id = "hysds:interferogram__T22_F314-330_INSAR1_20130828-INSAR1_20130609"
    out_doi = "10.5067/ARIAMH/INSAR/Interferogram"
    out_accessURL = 'https://aria-search.domain.com/?source={"query":{"bool":{"must":[{"term":{"dataset":"interferogram"}},{"query_string":{"query":"\"interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906\"","default_operator":"OR"}}]}},"sort":[{"_timestamp":{"order":"desc"}}],"fields":["_timestamp","_source"]}'
    out_downloadURL = 'https://dav.domain.com/repository/products/interferograms/v0.2/2014/09/06/interferogram__T111_F330-343_INSAR1_20140922-INSAR1_20140906/2014-09-22T224943.621648'
    out_level = "L1"
    doc.dataset(out_id, out_doi, [out_downloadURL], [instrument], None,
                out_level)

    # software agent
    sa_id = "hysds:ariamh-worker-32.domain.com/12353"
    pid = "12353"
    worker_node = "ariamh-worker-32.domain.com"
    doc.softwareAgent(sa_id, pid, worker_node)

    # runtime context
    rt_ctx_id = "hysds:runtime_context"
    doc.runtimeContext(rt_ctx_id, [downloadURL])

    # process step
    proc_id = "hysds:create_interferogram-INSAR20130625_673969_2940232"
    start_time = datetime.utcnow()
    end_time = start_time + timedelta(seconds=12233)
    ps = doc.processStep(proc_id,
                         start_time.isoformat() + 'Z',
                         end_time.isoformat() + 'Z', [software],
                         sa_id,
                         rt_ctx_id, [id, dem_id], [out_id],
                         wasAssociatedWithRole="softwareAgent")

    print(doc.serialize(indent=2))