예제 #1
0
def query_grq(doc_id):
    """
    This function queries ES
    :param endpoint: the value specifies which ES endpoint to send query
     can be MOZART or GRQ
    :param doc_id: id of product or job
    :return: result from elasticsearch
    """
    es_url, es_index = None, None
    '''
    if endpoint == GRQ_ES_ENDPOINT:
        es_url = app.conf["GRQ_ES_URL"]
        es_index = "grq"
    if endpoint == MOZART_ES_ENDPOINT:
        es_url = app.conf['JOBS_ES_URL']
        es_index = "job_status-current"
    '''

    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    es_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url
    es_index = uu.grq_index_prefix

    query = {
        "query": {
            "bool": {
                "must": [{
                    "term": {
                        "_id": doc_id
                    }
                }  # add job status:
                         ]
            }
        }
    }
    #print(query)

    if es_url.endswith('/'):
        search_url = '%s%s/_search' % (es_url, es_index)
    else:
        search_url = '%s/%s/_search' % (es_url, es_index)
    r = requests.post(search_url, data=json.dumps(query))

    if r.status_code != 200:
        print("Failed to query %s:\n%s" % (es_url, r.text))
        print("query: %s" % json.dumps(query, indent=2))
        print("returned: %s" % r.text)
        r.raise_for_status()

    result = r.json()
    print(result['hits']['total'])
    return result['hits']['hits']
예제 #2
0
def run_auditor(context, dataset="ifg"):
    '''
    Route auditor for dataset type
    '''
    try:
        #Read existing
        with open(context, "r") as fh1:
            context = json.load(fh1)
        try:
            coordinates = json.loads(context["audit_coordinates"])
        except TypeError as ex:
            coordinates = context["audit_coordinates"]
        context["query"] = {
            "query":
            get_audit_input_query(context["audit_starttime"],
                                  context["audit_endtime"], coordinates)
        }
        #Write out new context
        enum_context = "enum_context.json"
        with open(enum_context, "w") as fh2:
            json.dump(context, fh2)
        #Call the pair-gen code with new context
        LOGGER.info("Enumerating IFGs")
        if dataset == "ifg":
            cfgs = enumerate_topsapp_cfgs.get_topsapp_cfgs("enum_context.json")
        elif dataset == "slcp":
            cfgs = enumerate_topsapp_cfgs.get_topsapp_cfgs_rsp(
                "enum_context.json")
        else:
            raise RuntimeError("Unknown dataset type for auditor: %s" %
                               dataset)

        # query docs
        url_util = UU()
        LOGGER.info("rest_url: %s" % url_util.rest_url)
        LOGGER.info("grq_index_prefix: %s" % url_util.grq_index_prefix)
        LOGGER.info("version: %s" % url_util.version)
        # get normalized rest url
        rest_url = url_util.rest_url[:-1] if url_util.rest_url.endswith(
            '/') else url_util.rest_url
        return audit(cfgs, rest_url, url_util.grq_index_prefix,
                     url_util.version)
    except Exception as ex:
        with open('_alt_error.txt', 'w') as fh1:
            fh1.write("{}\n".format(ex))
        with open('_alt_traceback.txt', 'w') as fh2:
            fh2.write("{}\n".format(traceback.format_exc()))
        LOGGER.error("Exception of type %s occured with message %s" %
                     (type(ex), ex))
        LOGGER.error("Traceback:\n%s" % traceback.format_exc())
        raise
예제 #3
0
def get_topsapp_cfgs(context_file,
                     temporalBaseline=72,
                     id_tmpl=IFG_ID_TMPL,
                     minMatch=0,
                     covth=.95):
    """Return all possible topsApp configurations."""
    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get args
    event_time = context['event_time']
    start_time = context['start_time']
    end_time = context['end_time']
    project = context['project']
    sso = get_bool_param(context, 'singlesceneOnly')
    auto_bbox = get_bool_param(context, 'auto_bbox')
    precise_orbit_only = get_bool_param(context, 'precise_orbit_only')
    query = context['query']

    # pair direction:
    #   forward => reference scene is slave
    #   backward => reference scene is master
    pre_ref_pd = get_pair_direction(context, 'preReferencePairDirection')
    pre_search = False if pre_ref_pd == 'none' else True
    post_ref_pd = get_pair_direction(context, 'postReferencePairDirection')
    post_search = False if post_ref_pd == 'none' else True

    # overwrite temporal baseline from context
    if 'temporalBaseline' in context:
        temporalBaseline = int(context['temporalBaseline'])

    # overwrite minMatch
    if 'minMatch' in context:
        minMatch = int(context['minMatch'])

    # overwrite covth
    if 'covth' in context:
        covth = float(context['covth'])

    # log enumerator params
    logging.info("event_time: %s" % event_time)
    logging.info("start_time: %s" % start_time)
    logging.info("end_time: %s" % end_time)
    logging.info("project: %s" % project)
    logging.info("singleceneOnly: %s" % sso)
    logging.info("auto_bbox: %s" % auto_bbox)
    logging.info("preReferencePairDirection: %s" % pre_ref_pd)
    logging.info("postReferencePairDirection: %s" % post_ref_pd)
    logging.info("temporalBaseline: %s" % temporalBaseline)
    logging.info("minMatch: %s" % minMatch)
    logging.info("covth: %s" % covth)

    # get bbox from query
    coords = None
    bbox = [-90., 90., -180., 180.]
    if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}):
        filts = query['query']['filtered']['filter']['and']
    elif 'geo_shape' in query.get('query', {}).get('filtered',
                                                   {}).get('filter', {}):
        filts = [{
            "geo_shape": query['query']['filtered']['filter']['geo_shape']
        }]
    else:
        filts = []
    for filt in filts:
        if 'geo_shape' in filt:
            coords = filt['geo_shape']['location']['shape']['coordinates']
            roi = {
                'type': 'Polygon',
                'coordinates': coords,
            }
            logger.info("query filter ROI: %s" % json.dumps(roi))
            roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi))
            roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope()
            bbox = [roi_y_min, roi_y_max, roi_x_min, roi_x_max]
            logger.info("query filter bbox: %s" % bbox)
            break

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(
        rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # query hits
    query.update({"partial_fields": {
        "partial": {
            "exclude": "city",
        }
    }})
    #logger.info("query: {}".format(json.dumps(query, indent=2)))
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    ref_hits = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=60m' % rest_url,
                          data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        ref_hits.extend(res['hits']['hits'])

    # extract reference ids
    ref_ids = {h['_id']: True for h in ref_hits}
    logger.info("ref_ids: {}".format(json.dumps(ref_ids, indent=2)))
    logger.info("ref_hits count: {}".format(len(ref_hits)))

    # group ref hits by track and date
    grouped_refs = group_frames_by_track_date(ref_hits)

    # dedup any reprocessed reference SLCs
    dedup_reprocessed_slcs(grouped_refs['grouped'], grouped_refs['metadata'])

    #logger.info("ref hits: {}".format(json.dumps(grouped_refs['hits'], indent=2)))
    #logger.info("ref sorted_hits: {}".format(pformat(grouped_refs['grouped'])))
    #logger.info("ref slc_dates: {}".format(pformat(grouped_refs['dates'])))
    #logger.info("ref slc_footprints: {}".format(json.dumps(grouped_refs['footprints'], indent=2)))

    # build list reference scenes
    ref_scenes = []
    for track in grouped_refs['grouped']:
        logger.info("track: %s" % track)
        for ref_dt in grouped_refs['grouped'][track]:
            logger.info("reference date: %s" % ref_dt.isoformat())
            if sso:
                for ref_id in grouped_refs['grouped'][track][ref_dt]:
                    ref_scenes.append({
                        'id': [ref_id],
                        'track':
                        track,
                        'date':
                        ref_dt,
                        'location':
                        grouped_refs['footprints'][ref_id],
                        'pre_matches':
                        None,
                        'post_matches':
                        None
                    })
            else:
                union_poly = get_union_geometry(
                    grouped_refs['grouped'][track][ref_dt],
                    grouped_refs['footprints'])
                if len(union_poly['coordinates']) > 1:
                    logger.warn(
                        "Stitching %s will result in a disjoint geometry." %
                        grouped_refs['grouped'][track][ref_dt])
                    logger.warn("Skipping.")
                else:
                    ref_scenes.append({
                        'id':
                        grouped_refs['grouped'][track][ref_dt],
                        'track':
                        track,
                        'date':
                        ref_dt,
                        'location':
                        union_poly,
                        'pre_matches':
                        None,
                        'post_matches':
                        None
                    })

    # find reference scene matches
    projects = []
    stitched_args = []
    auto_bboxes = []
    ifg_ids = []
    master_zip_urls = []
    master_orbit_urls = []
    slave_zip_urls = []
    slave_orbit_urls = []
    swathnums = []
    bboxes = []
    mrpe_dict = {}
    for ref_scene in ref_scenes:
        for ref_id in ref_scene['id']:
            logger.info("#" * 80)
            logger.info("ref id: %s" % ref_id)
            logger.info("ref date: %s" % ref_scene['date'])
            logger.info("ref scene: %s" % pformat(ref_scene))
            mrpe_hits = get_mrpe_hits(rest_url, ref_scene, start_time,
                                      event_time)
            for mrpe_hit in mrpe_hits:
                if mrpe_hit['_id'] in mrpe_dict: continue
                mrpe_dict[mrpe_hit['_id']] = True
                logger.info("mrpe_hit: %s" % pformat(mrpe_hit))
                new_query = {
                    "query": {
                        "bool": {
                            "must": [{
                                "term": {
                                    "_id": mrpe_hit['_id'],
                                }
                            }, {
                                "term": {
                                    "system_version.raw":
                                    mrpe_hit['fields']['partial'][0]
                                    ['system_version'],
                                }
                            }]
                        }
                    }
                }
                new_context = deepcopy(context)
                new_context['query'] = new_query
                tmp_ctx_file = "%s.context.json" % ref_id
                with open('%s.context.json' % ref_id, 'w') as f:
                    json.dump(new_context, f, indent=2)
                (tmp_projects, tmp_stitched_args, tmp_auto_bboxes, tmp_ifg_ids,
                 tmp_master_zip_urls, tmp_master_orbit_urls,
                 tmp_slave_zip_urls, tmp_slave_orbit_urls, tmp_swathnums,
                 tmp_bboxes) = gtc(tmp_ctx_file,
                                   temporalBaseline=temporalBaseline,
                                   id_tmpl=id_tmpl,
                                   minMatch=minMatch,
                                   covth=covth)
                projects.extend(tmp_projects)
                stitched_args.extend(tmp_stitched_args)
                auto_bboxes.extend(tmp_auto_bboxes)
                ifg_ids.extend(tmp_ifg_ids)
                master_zip_urls.extend(tmp_master_zip_urls)
                master_orbit_urls.extend(tmp_master_orbit_urls)
                slave_zip_urls.extend(tmp_slave_zip_urls)
                slave_orbit_urls.extend(tmp_slave_orbit_urls)
                swathnums.extend(tmp_swathnums)
                bboxes.extend(tmp_bboxes)

    return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls,
            master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums,
            bboxes)
예제 #4
0
def pair_selector(id, margin=0.2, overlap_min=.5, frame_id_margin=3):
    """Return last matching temporal S1 pair."""

    uu = UU()
    print("S1 ID is {}".format(id))
    print("rest_url: {}".format(uu.rest_url))
    print("dav_url: {}".format(uu.dav_url))
    print("version: {}".format(uu.version))
    print("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # extract info
    match = ID_RE.search(id)
    if match is None:
        raise RuntimeError("Swath number extraction error: {}.".format(id))
    swath_num = int(match.group(1))
    vtype = match.group(2)
    yr = int(match.group(3))
    mo = int(match.group(4))
    dy = int(match.group(5))
    hr = int(match.group(6))
    mn = int(match.group(7))
    ss = int(match.group(8))

    # get index name and url
    idx = "{}_{}_s1-swath".format(uu.grq_index_prefix,
                                  uu.version.replace('.', ''))
    url = "{}{}/_search".format(uu.rest_url, idx)
    print("idx: {}".format(idx))
    print("url: {}".format(url))

    # get metadata
    query = {
        "query": {
            "term": {
                "_id": id
            }
        },
        "partial_fields" : {
            "partial" : {
                "exclude" : "city",
            }
        }
    }
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    res = r.json()
    if res['hits']['total'] != 1:
        raise RuntimeError("Failed to find exactly 1 result for {}:\n\n{}".format(id, json.dumps(res, indent=2)))
    hit = res['hits']['hits'][0]['fields']['partial'][0]
    #print(json.dumps(hit, indent=2))

    # find matching ones within +-50 days
    sensingStart = datetime.strptime(hit['metadata']['sensingStart'], '%Y-%m-%dT%H:%M:%S.%f')
    query_start = (sensingStart - timedelta(days=50)).isoformat()
    query_stop = (sensingStart + timedelta(days=50)).isoformat()
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "term": {
                            "system_version": hit['system_version']
                        }
                    }, 
                    {
                        "term": {
                            "metadata.trackNumber": hit['metadata']['trackNumber']
                        }
                    }, 
                    {
                        "range": {
                            "metadata.frameID": {
                                "from": int(hit['metadata']['frameID']) - frame_id_margin,
                                "to": int(hit['metadata']['frameID']) + frame_id_margin
                            }
                        }
                    }, 
                    {
                        "bool": {
                            "should": [
                                {
                                    "range": {
                                        "metadata.sensingStart": {
                                            "from": query_start,
                                            "to": query_stop
                                        }
                                    }
                                }, 
                                {
                                    "range": {
                                        "metadata.sensingStop": {
                                            "from": query_start,
                                            "to": query_stop
                                        }
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        },
        "sort": [
            {
                "starttime": {
                    "order": "desc"
                }
            }
        ], 
        "partial_fields" : {
            "partial" : {
                "exclude" : "city",
            }
        }
    }
    #print(json.dumps(query, indent=2))
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    res = r.json()
    print("total matches: {}".format(res['hits']['total']))
    matches = res['hits']['hits']
    print("matches: {}".format([m['_id'] for m in matches]))

    # filter matches
    filtered_matches = []
    for m in matches:
        h = m['fields']['partial'][0]
        #print("h: {}".format(json.dumps(h, indent=2)))
        if h['id'] == id:
            print("Filtering self: %s" % h['id'])
            continue
        match = ID_RE.search(h['id'])
        if match is None:
            print("Filtering unrecognized id: %s" % h['id'])
            continue
        sn = int(match.group(1))
        if sn != swath_num:
            print("Filtering %s due to unmatched swath number. Got %s but should be %s." % (h['id'], sn, swath_num))
            continue
        vt = match.group(2)
        if vt != vtype:
            print("Filtering %s due to unmatched vtype. Got %s but should be %s." % (h['id'], vt, vtype))
            continue
        overlap_pct = get_overlap(hit['location'], h['location'])
        print("overlap_pct is: %s" % overlap_pct)
        if overlap_pct < overlap_min:
            print("Filtering %s since overlap_pct < min overlap threshold of %s." % (h['id'], overlap_min))
            continue
        filtered_matches.append(h)
    print("total filtered_matches: {}".format(len(filtered_matches)))
    print("filtered_matches: {}".format([fm['id'] for fm in filtered_matches]))

    # return if no filtered matches
    if len(filtered_matches) == 0:
        return filtered_matches

    # get bbox arg
    bbox = np.array(hit['metadata']['bbox'])
    bbox_str = "{0:.2f} {1:.2f} {2:.2f} {3:.2f}".format(
        bbox[:,0].min() - margin,
        bbox[:,0].max() + margin,
        bbox[:,1].min() - margin,
        bbox[:,1].max() + margin)

    # get orbit URL
    orbit_url = fetch(hit['starttime'], hit['endtime'], dry_run=True)
    if orbit_url is None:
        raise RuntimeError("Failed to query for an orbit URL for {}.".format(
                           os.path.basename(hit['metadata']['archive_url'])))

    # result json
    ret_list = []
    for filtered_match in filtered_matches:
        j = {
            "swath": swath_num,
            "bbox_str": bbox_str,
            "id": [ id ],
            "bbox": [ hit['metadata']['bbox'] ],
            "archive_url": [ hit['metadata']['archive_url'] ],
            "frameID": [ hit['metadata']['frameID'] ],
            "trackNumber": [ hit['metadata']['trackNumber'] ],
            "orbit_url": [ orbit_url ],
        }
        #print("filtered match: {}".format(json.dumps(filtered_match, indent=2)))
        st_time = datetime.strptime(filtered_match['metadata']['sensingStart'], '%Y-%m-%dT%H:%M:%S.%f')

        # extract info
        match = ID_RE.search(filtered_match['id'])
        if match is None:
            raise RuntimeError("Swath number extraction error: {}.".format(filtered_match['id']))
        match_swath_num = int(match.group(1))
        match_vtype = match.group(2)
        match_yr = int(match.group(3))
        match_mo = int(match.group(4))
        match_dy = int(match.group(5))
        match_hr = int(match.group(6))
        match_mn = int(match.group(7))
        match_ss = int(match.group(8))

        # get orbit URL
        match_orbit_url = fetch(filtered_match['starttime'], filtered_match['endtime'], dry_run=True)
        if match_orbit_url is None:
            raise RuntimeError("Failed to query for an orbit URL for {}.".format(
                               os.path.basename(filtered_match['metadata']['archive_url'])))

        # each pair is (master, slave); determine which is which
        if st_time > sensingStart:
            ifg_start_dt = datetime(yr, mo, dy, hr, mn, ss)
            ifg_end_dt = datetime(match_yr, match_mo, match_dy, match_hr, match_mn, match_ss)
            j['id'].append(filtered_match['id'])
            j['bbox'].append(filtered_match['metadata']['bbox'])
            j['archive_url'].append(filtered_match['metadata']['archive_url'])
            j['frameID'].append(filtered_match['metadata']['frameID'])
            j['trackNumber'].append(filtered_match['metadata']['trackNumber'])
            j['orbit_url'].append(match_orbit_url)
        else:
            ifg_start_dt = datetime(match_yr, match_mo, match_dy, match_hr, match_mn, match_ss)
            ifg_end_dt = datetime(yr, mo, dy, hr, mn, ss)
            j['id'].insert(0, filtered_match['id'])
            j['bbox'].insert(0, filtered_match['metadata']['bbox'])
            j['archive_url'].insert(0, filtered_match['metadata']['archive_url'])
            j['frameID'].insert(0, filtered_match['metadata']['frameID'])
            j['trackNumber'].insert(0, filtered_match['metadata']['trackNumber'])
            j['orbit_url'].insert(0, match_orbit_url)

        # get ifg orbit type
        orbit_type = 'poeorb'
        for u in j['orbit_url']:
            if RESORB_RE.search(u):
                orbit_type = 'resorb'
                break
        j['orbit_type'] = orbit_type

        # generate ifg id
        ifg_id_tmpl = "S1-IFG_FID{:03d}_TN{:03d}_{:%Y%m%dT%H%M%S}-{:%Y%m%dT%H%M%S}_s{}-{}"
        j['ifg_id'] = ifg_id_tmpl.format(
            filtered_match['metadata']['frameID'],
            filtered_match['metadata']['trackNumber'],
            ifg_start_dt, ifg_end_dt, swath_num, orbit_type)

        # append
        ret_list.append(j)

    # write out pair info
    with open('pair.json', 'w') as f:
        json.dump({'pairs': ret_list}, f, indent=2, sort_keys=True)

    return ret_list
예제 #5
0
def get_stitch_cfgs(context_file):
    """Return all possible stitch interferogram configurations."""

    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get args
    project = context['project']
    direction = context.get('direction', 'along')
    subswaths = [int(i) for i in context.get('subswaths', "1 2 3").split()]
    subswaths.sort()
    min_stitch_count = int(context['min_stitch_count'])
    extra_products = [i.strip() for i in context.get('extra_products', 'los.rdr.geo').split()]
    orig_query = context['query']
    logger.info("orig_query: %s" % json.dumps(orig_query, indent=2))

    # cleanse query of ids from triggered rules
    query = clean_query(orig_query)
    logger.info("clean query: %s" % json.dumps(query, indent=2))

    # log enumerator params
    logger.info("project: %s" % project)
    logger.info("direction: %s" % direction)
    logger.info("subswaths: %s" % subswaths)
    logger.info("min_stitch_count: %s" % min_stitch_count)
    logger.info("extra_products: %s" % extra_products)

    # get bbox from query
    coords = None
    bbox = [-90., 90., -180., 180.]
    if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}):
        filts = query['query']['filtered']['filter']['and']
    elif 'geo_shape' in query.get('query', {}).get('filtered', {}).get('filter', {}):
        filts = [ { "geo_shape": query['query']['filtered']['filter']['geo_shape'] } ]
    else: filts = []
    for filt in filts:
        if 'geo_shape' in filt:
            coords = filt['geo_shape']['location']['shape']['coordinates']
            roi = {
                'type': 'Polygon',
                'coordinates': coords,
            }
            logger.info("query filter ROI: %s" % json.dumps(roi))
            roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi))
            roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope()
            bbox = [ roi_y_min, roi_y_max, roi_x_min, roi_x_max ]
            logger.info("query filter bbox: %s" % bbox)
            break

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # query hits
    query.update({
        "partial_fields" : {
            "partial" : {
                "exclude" : "city",
            }
        }
    })
    #logger.info("query: {}".format(json.dumps(query, indent=2)))
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    hits = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        hits.extend(res['hits']['hits'])

    # remove partial fields and reformat metadata as expected by stitcher_utils
    #hits = remove_partials(hits)

    # extract reference ids
    ids = { h['_id']: True for h in hits }
    logger.info("ids: {}".format(json.dumps(ids, indent=2)))
    logger.info("hits count: {}".format(len(hits)))

    # dump metadata
    valid_meta_ts_out_file = "valid_meta_ts_out.json"
    with open(valid_meta_ts_out_file, 'w') as f:
        json.dump(hits, f, indent=2)

    # group frames by track and date pairs
    grouped = group_frames_by_track_date(hits)
    logger.info("grouped: %s" % json.dumps(grouped, indent=2))

    # enumerate configs
    projects = []
    directions = []
    extra_products_list = []
    filenames = []
    filename_urls = []
    ifg_ids = []
    base_products = ['filt_topophase.unw.geo', 'filt_topophase.unw.conncomp.geo', 'phsig.cor.geo']
    base_products.extend(extra_products)
    for track in sorted(grouped['grouped']):
        for dt_pair in sorted(grouped['grouped'][track]):
            stitch_count = 0

            # filter scenes without all requested subswaths
            swath_check = {}
            for swath in subswaths:
                if swath not in grouped['grouped'][track][dt_pair]:
                    raise RuntimeError("Did not find singlescene IFGs for subswath %s for track %s dates %s. Check your query results." %
                                       (swath, track, dt_pair))
                for tr, id in grouped['grouped'][track][dt_pair][swath]:
                    swath_check.setdefault(tr, {})[swath] = id
            skip_tr = {}
            for tr in sorted(swath_check):
                for swath in subswaths:
                    if swath not in swath_check[tr]: skip_tr[tr] = True
                
            furls = []
            swathnums = []
            ifg_sts = set()
            ifg_ets = set()
            fnames_tr = {}

            for swath in subswaths:
                swathnums.append(swath)
                for tr, id in grouped['grouped'][track][dt_pair][swath]:
                    if tr in skip_tr:
                        logger.warning("Skipping %s for scene %s since only subswaths %s exist." %
                                       (id, tr, sorted(swath_check[tr].keys())))
                        continue
                    bisect.insort(fnames_tr.setdefault(tr, []),
                                  os.path.join(id, 'merged', 'filt_topophase.unw.geo'))
                    for prod_file in base_products:
                        furls.append({
                            'url': "%s/merged/%s" % (grouped['hits'][id], prod_file),
                            'local_path': "%s/merged/" % id,
                        })
                        furls.append({
                            'url': "%s/merged/%s.xml" % (grouped['hits'][id], prod_file),
                            'local_path': "%s/merged/" % id,
                        })
                    furls.append({
                        'url': "%s/fine_interferogram.xml" % grouped['hits'][id],
                        'local_path': "%s/" % id,
                    })
                    furls.append({
                        'url': "%s/%s.dataset.json" % (grouped['hits'][id], id),
                        'local_path': "%s/_%s.dataset.json" % (id, id),
                    })
                    furls.append({
                        'url': "%s/%s.met.json" % (grouped['hits'][id], id),
                        'local_path': "%s/_%s.met.json" % (id, id),
                    })
                    stitch_count += 1
                    st, et = tr.split('_')
                    ifg_sts.add(st)
                    ifg_ets.add(et)
            ifg_sts = list(ifg_sts)
            ifg_sts.sort()
            ifg_ets = list(ifg_ets)
            ifg_ets.sort()

            # check minimum stitch count met
            if stitch_count < min_stitch_count:
                logger.warning("Failed to find minimum stitch count of %s for track %s date pair %s: %s" %
                               (min_stitch_count, track, dt_pair, stitch_count))
                continue

            # build job params
            projects.append(project)
            directions.append(direction)
            extra_products_list.append(extra_products)
            filenames.append([fnames_tr[tr] for tr in sorted(fnames_tr)])
            filename_urls.append(furls)
            ifg_hash = hashlib.md5(json.dumps([
                projects[-1],
                directions[-1],
                extra_products_list[-1],
                filenames[-1],
                filename_urls[-1],
            ], sort_keys=True)).hexdigest()
            ifg_ids.append(ID_TMPL.format(int(track), ifg_sts[0], ifg_ets[-1], 
                           ''.join(map(str, swathnums)), direction, ifg_hash[0:4]))
    logger.info("projects: %s" % projects)
    logger.info("directions: %s" % directions)
    logger.info("extra_products: %s" % extra_products_list)
    logger.info("filenames: %s" % json.dumps(filenames, indent=2))
    logger.info("filename_urls: %s" % json.dumps(filename_urls, indent=2))
    logger.info("ifg_ids: %s" % ifg_ids)
    return ( projects, directions, extra_products_list, filenames, filename_urls, ifg_ids )
def get_topsapp_cfgs(context_file, id_tmpl=IFG_ID_TMPL):
    """Return all possible topsApp configurations that can be reprocessed with precise orbit."""
    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get dataset type to query
    if id_tmpl == IFG_ID_TMPL: dataset = "S1-IFG"
    elif id_tmpl == RSP_ID_TMPL: dataset = "S1-SLCP"
    else: raise RuntimeError("Failed to recognize dataset from id template: %s" % id_tmpl)

    # get params
    ifg_version = context['ifg_version']
    starttime = context['starttime']
    endtime = context['endtime']
    orb_ds_url = context['url']
    orb_file = context['orbit_file']
    platform = context['platform']

    # get precise orbit date
    orb_dt = get_orbit_date(orb_file)

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # build query
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "term": {
                            "dataset.raw": dataset
                        }
                    },
                    {
                        "term": {
                            "version.raw": ifg_version
                        }
                    },
                    {
                        "term": {
                            "metadata.orbit_type.raw": "resorb"
                        }
                    },
                    {
                        "term": {
                            "metadata.platform.raw": platform
                        }
                    },
                    {
                        "bool": {
                            "should": [
                                {
                                    "range": {
                                        "starttime": {
                                            "from": starttime,
                                            "to": endtime
                                        }
                                    }
                                },
                                {
                                    "range": {
                                        "endtime": {
                                            "from": starttime,
                                            "to": endtime
                                        }
                                    }
                                }
                            ]
                        }
                    }
                ]
            }
        },
        "partial_fields" : {
            "partial" : {
                "exclude" : "city",
            }
        }
    }
    logger.info("query: {}".format(json.dumps(query, indent=2)))
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    hits = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        hits.extend(res['hits']['hits'])
    #logger.info("hits: {}".format(json.dumps(hits, indent=2)))
    logger.info("hits count: {}".format(len(hits)))

    # collect topsapps cfgs
    projects = []
    stitched_args = []
    auto_bboxes = []
    master_zip_urls = []
    slave_zip_urls = []
    swathnums = []
    bboxes = []
    master_orbit_urls = []
    slave_orbit_urls = []
    ifg_ids = []
    for hit in hits:
        # propagate unmodified params
        ifg_ctx = hit['fields']['partial'][0]['metadata']['context']
        sfl_ifg_ctx = ifg_ctx.get('context', {})

        # old id
        ifg_id = ifg_ctx['id']

        # determine orbit to replace
        logger.info("latest precise orbit file date: {}".format(orb_dt.isoformat('T')))
        mo_dt = get_orbit_date(ifg_ctx['master_orbit_file'])
        logger.info("original master orbit file date: {}".format(mo_dt.isoformat('T')))
        so_dt = get_orbit_date(ifg_ctx['slave_orbit_file'])
        logger.info("original slave orbit file date: {}".format(so_dt.isoformat('T')))
        if orb_dt == mo_dt:
            master_orbit_urls.append(os.path.join(orb_ds_url, orb_file))
            slave_orbit_urls.append(ifg_ctx['slave_orbit_url'])
        elif orb_dt == so_dt:
            master_orbit_urls.append(ifg_ctx['master_orbit_url'])
            slave_orbit_urls.append(os.path.join(orb_ds_url, orb_file))
        else:
            logger.info("Precise orbit file {} doesn't align with S1-IFG {}. Skipping.".format(orb_file, ifg_id))
            continue

        logger.info("sfl_ifg_ctx: {}".format(json.dumps(sfl_ifg_ctx, indent=2)))
    
        # carry over the rest of the params
        projects.append(ifg_ctx['project'])
        stitched_args.append(False if len(ifg_ctx['master_zip_url']) == 1 or len(ifg_ctx['slave_zip_url']) == 1 else True)
        auto_bboxes.append(ifg_ctx['auto_bbox'])
        master_zip_urls.append(ifg_ctx['master_zip_url'])
        slave_zip_urls.append(ifg_ctx['slave_zip_url'])
        swathnums.append(ifg_ctx['swathnum'])
        bboxes.append(ifg_ctx['bbox'])

        # determine orbit type of product in case both master and slave orbits were restituted
        if POEORB_RE.search(master_orbit_urls[-1]) and POEORB_RE.search(slave_orbit_urls[-1]):
            ifg_id = ifg_id.replace('resorb', 'poeorb')        

        # calculate hash and new ifg id
        ifg_hash = hashlib.md5(json.dumps([
            id_tmpl,
            stitched_args[-1],
            master_zip_urls[-1],
            master_orbit_urls[-1],
            slave_zip_urls[-1],
            slave_orbit_urls[-1],
            swathnums[-1],
            #bboxes[-1],
            #auto_bboxes[-1],
            projects[-1],
            ifg_ctx.get('azimuth_looks', sfl_ifg_ctx.get('azimuth_looks', 3)),
            ifg_ctx.get('range_looks', sfl_ifg_ctx.get('range_looks', 7)),
            ifg_ctx.get('filter_strength', sfl_ifg_ctx.get('filter_strength', 0.5)),
            ifg_ctx.get('dem_type', sfl_ifg_ctx.get('dem_type', 'SRTM')),
        ])).hexdigest()
        ifg_id = ifg_id[0:-4] + ifg_hash[0:4]
        ifg_ids.append(ifg_id)

    logger.info("Found {} {} datasets to reprocess.".format(len(ifg_ids), dataset))

    return ( projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls,
             master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums,
             bboxes )
예제 #7
0
def initiate_standard_product_job(context_file):
    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get args
    project = context['project']
    master_ids = [i.strip() for i in context['master_ids']]
    slave_ids = [i.strip() for i in context['slave_ids']]
    subswaths = [1, 2, 3]  #context['subswaths']
    azimuth_looks = int(context['azimuth_looks'])
    range_looks = int(context['range_looks'])
    filter_strength = float(context['filter_strength'])
    precise_orbit_only = get_bool_param(context, 'precise_orbit_only')
    job_priority = int(context['priority'])

    subswaths = [1, 2, 3]

    # log inputs
    logger.info("project: {}".format(project))
    logger.info("master_ids: {}".format(master_ids))
    logger.info("slave_ids: {}".format(slave_ids))
    logger.info("subswaths: {}".format(subswaths))
    logger.info("azimuth_looks: {}".format(azimuth_looks))
    logger.info("range_looks: {}".format(range_looks))
    logger.info("filter_strength: {}".format(filter_strength))
    logger.info("precise_orbit_only: {}".format(precise_orbit_only))

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(
        rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # get metadata
    master_md = {i: get_metadata(i, rest_url, url) for i in master_ids}
    #logger.info("master_md: {}".format(json.dumps(master_md, indent=2)))
    slave_md = {i: get_metadata(i, rest_url, url) for i in slave_ids}
    #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2)))

    # get tracks
    track = get_track(master_md)
    logger.info("master_track: {}".format(track))
    slave_track = get_track(slave_md)
    logger.info("slave_track: {}".format(slave_track))
    if track != slave_track:
        raise RuntimeError(
            "Slave track {} doesn't match master track {}.".format(
                slave_track, track))

    ref_scence = master_md
    if len(master_ids) == 1:
        ref_scence = master_md
    elif len(slave_ids) == 1:
        ref_scence = slave_md
    elif len(master_ids) > 1 and len(slave_ids) > 1:
        raise RuntimeError("Single Scene Reference Required.")

    # get urls (prefer s3)
    master_urls = get_urls(master_md)
    logger.info("master_urls: {}".format(master_urls))
    slave_urls = get_urls(slave_md)
    logger.info("slave_ids: {}".format(slave_urls))

    dem_type = get_dem_type(master_md)

    # get dem_type
    dem_type = get_dem_type(master_md)
    logger.info("master_dem_type: {}".format(dem_type))
    slave_dem_type = get_dem_type(slave_md)
    logger.info("slave_dem_type: {}".format(slave_dem_type))
    if dem_type != slave_dem_type:
        dem_type = "SRTM+v3"

    # get orbits
    master_orbit_url = get_orbit(master_ids)
    logger.info("master_orbit_url: {}".format(master_orbit_url))
    slave_orbit_url = get_orbit(slave_ids)
    logger.info("slave_orbit_url: {}".format(slave_orbit_url))

    # get orbit type
    orbit_type = 'poeorb'
    for o in (master_orbit_url, slave_orbit_url):
        if RESORB_RE.search(o):
            orbit_type = 'resorb'
            break

    # fail if we expect only precise orbits
    #if precise_orbit_only and orbit_type == 'resorb':
    #raise RuntimeError("Precise orbit required.")

    # get ifg start and end dates
    ifg_master_dt, ifg_slave_dt = get_ifg_dates(master_ids, slave_ids)

    #submit jobs
    projects = []
    stitched_args = []
    ifg_ids = []
    master_zip_urls = []
    master_orbit_urls = []
    slave_zip_urls = []
    slave_orbit_urls = []
    swathnums = []
    bboxes = []
    auto_bboxes = []
    dem_types = []
    job_priorities = []
    orbit_dict = {}

    # generate job configs
    bbox = [-90., 90., -180., 180.]
    auto_bbox = True
    id_tmpl = IFG_ID_TMPL

    stitched_args.append(
        False if len(master_ids) == 1 or len(slave_ids) == 1 else True)
    master_zip_urls.append(master_urls)
    master_orbit_urls.append(master_orbit_url)
    slave_zip_urls.append(slave_urls)
    slave_orbit_urls.append(slave_orbit_url)
    swathnums.append(subswaths)
    bboxes.append(bbox)
    auto_bboxes.append(auto_bbox)
    projects.append(project)
    dem_types.append(dem_type)
    job_priorities.append(job_priority)

    ifg_hash = hashlib.md5(
        json.dumps([
            id_tmpl,
            stitched_args[-1],
            master_zip_urls[-1],
            master_orbit_urls[-1],
            slave_zip_urls[-1],
            slave_orbit_urls[-1],
            #swathnums[-1],
            #bboxes[-1],
            #auto_bboxes[-1],
            projects[-1],
            #azimuth_looks,
            #range_looks,
            filter_strength,
            dem_type
        ])).hexdigest()
    ifg_ids.append(
        id_tmpl.format('M', len(master_ids), len(slave_ids), track,
                       ifg_master_dt, ifg_slave_dt, orbit_type, ifg_hash[0:4]))

    logger.info("\n\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n" %
                (projects, stitched_args, auto_bboxes, ifg_ids,
                 master_zip_urls, master_orbit_urls, slave_zip_urls,
                 slave_orbit_urls, swathnums, bboxes, dem_types))
    return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls,
            master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums,
            bboxes, dem_types, job_priorities)
예제 #8
0
def get_topsapp_cfgs(context_file,
                     temporalBaseline=72,
                     id_tmpl=IFG_ID_TMPL,
                     minMatch=0,
                     covth=.95):
    """Return all possible topsApp configurations."""
    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get args
    project = context['project']
    sso = get_bool_param(context, 'singlesceneOnly')
    auto_bbox = get_bool_param(context, 'auto_bbox')
    precise_orbit_only = get_bool_param(context, 'precise_orbit_only')
    query = context['query']

    # pair direction:
    #   forward => reference scene is slave
    #   backward => reference scene is master
    pre_ref_pd = get_pair_direction(context, 'preReferencePairDirection')
    pre_search = False if pre_ref_pd == 'none' else True
    post_ref_pd = get_pair_direction(context, 'postReferencePairDirection')
    post_search = False if post_ref_pd == 'none' else True

    # overwrite temporal baseline from context
    if 'temporalBaseline' in context:
        temporalBaseline = int(context['temporalBaseline'])

    # overwrite minMatch
    if 'minMatch' in context:
        minMatch = int(context['minMatch'])

    # overwrite covth
    if 'covth' in context:
        covth = float(context['covth'])

    # log enumerator params
    logging.info("project: %s" % project)
    logging.info("singleceneOnly: %s" % sso)
    logging.info("auto_bbox: %s" % auto_bbox)
    logging.info("preReferencePairDirection: %s" % pre_ref_pd)
    logging.info("postReferencePairDirection: %s" % post_ref_pd)
    logging.info("temporalBaseline: %s" % temporalBaseline)
    logging.info("minMatch: %s" % minMatch)
    logging.info("covth: %s" % covth)

    # get bbox from query
    coords = None
    bbox = [-90., 90., -180., 180.]
    if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}):
        filts = query['query']['filtered']['filter']['and']
    elif 'geo_shape' in query.get('query', {}).get('filtered',
                                                   {}).get('filter', {}):
        filts = [{
            "geo_shape": query['query']['filtered']['filter']['geo_shape']
        }]
    else:
        filts = []
    for filt in filts:
        if 'geo_shape' in filt:
            coords = filt['geo_shape']['location']['shape']['coordinates']
            roi = {
                'type': 'Polygon',
                'coordinates': coords,
            }
            logger.info("query filter ROI: %s" % json.dumps(roi))
            roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi))
            roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope()
            bbox = [roi_y_min, roi_y_max, roi_x_min, roi_x_max]
            logger.info("query filter bbox: %s" % bbox)
            break

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(
        rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # query hits
    query.update({"partial_fields": {
        "partial": {
            "exclude": "city",
        }
    }})
    #logger.info("query: {}".format(json.dumps(query, indent=2)))
    r = requests.post(url, data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    ref_hits = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=60m' % rest_url,
                          data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        ref_hits.extend(res['hits']['hits'])

    # extract reference ids
    ref_ids = {h['_id']: True for h in ref_hits}
    logger.info("ref_ids: {}".format(json.dumps(ref_ids, indent=2)))
    logger.info("ref_hits count: {}".format(len(ref_hits)))

    # group ref hits by track and date
    grouped_refs = group_frames_by_track_date(ref_hits)

    # dedup any reprocessed reference SLCs
    dedup_reprocessed_slcs(grouped_refs['grouped'], grouped_refs['metadata'])

    #logger.info("ref hits: {}".format(json.dumps(grouped_refs['hits'], indent=2)))
    #logger.info("ref sorted_hits: {}".format(pformat(grouped_refs['grouped'])))
    #logger.info("ref slc_dates: {}".format(pformat(grouped_refs['dates'])))
    #logger.info("ref slc_footprints: {}".format(json.dumps(grouped_refs['footprints'], indent=2)))

    # build list reference scenes
    ref_scenes = []
    for track in grouped_refs['grouped']:
        logger.info("track: %s" % track)
        for ref_dt in grouped_refs['grouped'][track]:
            logger.info("reference date: %s" % ref_dt.isoformat())
            if sso:
                for ref_id in grouped_refs['grouped'][track][ref_dt]:
                    ref_scenes.append({
                        'id': [ref_id],
                        'track':
                        track,
                        'date':
                        ref_dt,
                        'location':
                        grouped_refs['footprints'][ref_id],
                        'pre_matches':
                        None,
                        'post_matches':
                        None
                    })
            else:
                union_poly = get_union_geometry(
                    grouped_refs['grouped'][track][ref_dt],
                    grouped_refs['footprints'])
                if len(union_poly['coordinates']) > 1:
                    logger.warn(
                        "Stitching %s will result in a disjoint geometry." %
                        grouped_refs['grouped'][track][ref_dt])
                    logger.warn("Skipping.")
                else:
                    ref_scenes.append({
                        'id':
                        grouped_refs['grouped'][track][ref_dt],
                        'track':
                        track,
                        'date':
                        ref_dt,
                        'location':
                        union_poly,
                        'pre_matches':
                        None,
                        'post_matches':
                        None
                    })

    # find reference scene matches
    for ref_scene in ref_scenes:
        logger.info("#" * 80)
        logger.info("ref id: %s" % ref_scene['id'])
        logger.info("ref date: %s" % ref_scene['date'])
        if pre_search:
            logger.info("*" * 80)
            pre_matches = group_frames_by_track_date(
                get_pair_hits(rest_url,
                              ref_scene,
                              'pre',
                              temporal_baseline=temporalBaseline,
                              min_match=minMatch,
                              covth=covth))
            dedup_reprocessed_slcs(pre_matches['grouped'],
                                   pre_matches['metadata'])
            ref_scene['pre_matches'] = pre_matches
        if post_search:
            logger.info("*" * 80)
            post_matches = group_frames_by_track_date(
                get_pair_hits(rest_url,
                              ref_scene,
                              'post',
                              temporal_baseline=temporalBaseline,
                              min_match=minMatch,
                              covth=covth))
            dedup_reprocessed_slcs(post_matches['grouped'],
                                   post_matches['metadata'])
            ref_scene['post_matches'] = post_matches

    #logger.info("ref_scenes: {}".format(pformat(ref_scenes)))
    #logger.info("ref_scenes count: {}".format(len(ref_scenes)))

    #submit jobs
    projects = []
    stitched_args = []
    ifg_ids = []
    master_zip_urls = []
    master_orbit_urls = []
    slave_zip_urls = []
    slave_orbit_urls = []
    swathnums = []
    bboxes = []
    auto_bboxes = []
    orbit_dict = {}
    for ref_scene in ref_scenes:
        ref_ids = ref_scene['id']
        track = ref_scene['track']
        ref_dts = []
        for i in ref_ids:
            ref_dts.extend(grouped_refs['dates'][i])
        #logger.info("ref_ids: %s" % ref_ids)
        #logger.info("ref_dts: %s" % ref_dts)

        # set orbit urls and cache for reference dates
        ref_dt_orb = "%s_%s" % (ref_dts[0].isoformat(),
                                ref_dts[-1].isoformat())
        if ref_dt_orb not in orbit_dict:
            match = SLC_RE.search(ref_ids[0])
            if not match:
                raise RuntimeError("Failed to recognize SLC ID %s." %
                                   ref_ids[0])
            mission = match.group('mission')
            orbit_dict[ref_dt_orb] = fetch("%s.0" % ref_dts[0].isoformat(),
                                           "%s.0" % ref_dts[-1].isoformat(),
                                           mission=mission,
                                           dry_run=True)
            if orbit_dict[ref_dt_orb] is None:
                raise RuntimeError(
                    "Failed to query for an orbit URL for track {} {} {}.".
                    format(track, ref_dts[0], ref_dts[-1]))

        # generate jobs for pre-reference pairs
        if ref_scene['pre_matches'] is not None:
            if track in ref_scene['pre_matches']['grouped']:
                matched_days = ref_scene['pre_matches']['grouped'][track]
                for matched_day, matched_ids in matched_days.iteritems():
                    matched_dts = []
                    for i in matched_ids:
                        matched_dts.extend(
                            ref_scene['pre_matches']['dates'][i])
                    #logger.info("pre_matches matched_ids: %s" % matched_ids)
                    #logger.info("pre_matches matched_dts: %s" % matched_dts)
                    all_dts = list(chain(ref_dts, matched_dts))
                    all_dts.sort()

                    # set orbit urls and cache for matched dates
                    matched_dt_orb = "%s_%s" % (matched_dts[0].isoformat(),
                                                matched_dts[-1].isoformat())
                    if matched_dt_orb not in orbit_dict:
                        match = SLC_RE.search(matched_ids[0])
                        if not match:
                            raise RuntimeError(
                                "Failed to recognize SLC ID %s." %
                                matched_ids[0])
                        mission = match.group('mission')
                        orbit_dict[matched_dt_orb] = fetch(
                            "%s.0" % matched_dts[0].isoformat(),
                            "%s.0" % matched_dts[-1].isoformat(),
                            mission=mission,
                            dry_run=True)
                        if orbit_dict[matched_dt_orb] is None:
                            raise RuntimeError(
                                "Failed to query for an orbit URL for track {} {} {}."
                                .format(track, matched_dts[0],
                                        matched_dts[-1]))

                    # get orbit type
                    orbit_type = 'poeorb'
                    for o in [
                            orbit_dict[ref_dt_orb], orbit_dict[matched_dt_orb]
                    ]:
                        if RESORB_RE.search(o):
                            orbit_type = 'resorb'
                            break

                    # filter if we expect only precise orbits
                    if precise_orbit_only and orbit_type == 'resorb':
                        logger.info(
                            "Precise orbit required. Filtering job configured with restituted orbit."
                        )
                    else:
                        # create jobs for backwards pair
                        if pre_ref_pd in ('backward', 'both'):
                            ifg_master_dt = all_dts[-1]
                            ifg_slave_dt = all_dts[0]
                            for swathnum in [1, 2, 3]:
                                stitched_args.append(
                                    False if len(ref_ids) == 1
                                    or len(matched_ids) == 1 else True)
                                master_zip_urls.append(
                                    [grouped_refs['hits'][i] for i in ref_ids])
                                master_orbit_urls.append(
                                    orbit_dict[ref_dt_orb])
                                slave_zip_urls.append([
                                    ref_scene['pre_matches']['hits'][i]
                                    for i in matched_ids
                                ])
                                slave_orbit_urls.append(
                                    orbit_dict[matched_dt_orb])
                                swathnums.append(swathnum)
                                bboxes.append(bbox)
                                auto_bboxes.append(auto_bbox)
                                projects.append(project)
                                ifg_hash = hashlib.md5(
                                    json.dumps([
                                        id_tmpl,
                                        stitched_args[-1],
                                        master_zip_urls[-1],
                                        master_orbit_urls[-1],
                                        slave_zip_urls[-1],
                                        slave_orbit_urls[-1],
                                        swathnums[-1],
                                        #bboxes[-1],
                                        #auto_bboxes[-1],
                                        projects[-1],
                                        context['azimuth_looks'],
                                        context['range_looks'],
                                        context['filter_strength'],
                                        context.get('dem_type', 'SRTM+v3'),
                                    ])).hexdigest()
                                ifg_ids.append(
                                    id_tmpl.format('M', len(ref_ids),
                                                   len(matched_ids), track,
                                                   ifg_master_dt, ifg_slave_dt,
                                                   swathnum, orbit_type,
                                                   ifg_hash[0:4]))

                        # create jobs for forward pair
                        if pre_ref_pd in ('forward', 'both'):
                            ifg_master_dt = all_dts[0]
                            ifg_slave_dt = all_dts[-1]
                            for swathnum in [1, 2, 3]:
                                stitched_args.append(
                                    False if len(ref_ids) == 1
                                    or len(matched_ids) == 1 else True)
                                master_zip_urls.append([
                                    ref_scene['pre_matches']['hits'][i]
                                    for i in matched_ids
                                ])
                                master_orbit_urls.append(
                                    orbit_dict[matched_dt_orb])
                                slave_zip_urls.append(
                                    [grouped_refs['hits'][i] for i in ref_ids])
                                slave_orbit_urls.append(orbit_dict[ref_dt_orb])
                                swathnums.append(swathnum)
                                bboxes.append(bbox)
                                auto_bboxes.append(auto_bbox)
                                projects.append(project)
                                ifg_hash = hashlib.md5(
                                    json.dumps([
                                        id_tmpl,
                                        stitched_args[-1],
                                        master_zip_urls[-1],
                                        master_orbit_urls[-1],
                                        slave_zip_urls[-1],
                                        slave_orbit_urls[-1],
                                        swathnums[-1],
                                        #bboxes[-1],
                                        #auto_bboxes[-1],
                                        projects[-1],
                                        context['azimuth_looks'],
                                        context['range_looks'],
                                        context['filter_strength'],
                                        context.get('dem_type', 'SRTM+v3'),
                                    ])).hexdigest()
                                ifg_ids.append(
                                    id_tmpl.format('S', len(matched_ids),
                                                   len(ref_ids), track,
                                                   ifg_master_dt, ifg_slave_dt,
                                                   swathnum, orbit_type,
                                                   ifg_hash[0:4]))

        # generate jobs for post-reference pairs
        if ref_scene['post_matches'] is not None:
            if track in ref_scene['post_matches']['grouped']:
                matched_days = ref_scene['post_matches']['grouped'][track]
                for matched_day, matched_ids in matched_days.iteritems():
                    matched_dts = []
                    for i in matched_ids:
                        matched_dts.extend(
                            ref_scene['post_matches']['dates'][i])
                    #logger.info("post_matches matched_ids: %s" % matched_ids)
                    #logger.info("post_matches matched_dts: %s" % matched_dts)
                    all_dts = list(chain(ref_dts, matched_dts))
                    all_dts.sort()

                    # set orbit urls and cache for matched dates
                    matched_dt_orb = "%s_%s" % (matched_dts[0].isoformat(),
                                                matched_dts[-1].isoformat())
                    if matched_dt_orb not in orbit_dict:
                        match = SLC_RE.search(matched_ids[0])
                        if not match:
                            raise RuntimeError(
                                "Failed to recognize SLC ID %s." %
                                matched_ids[0])
                        mission = match.group('mission')
                        orbit_dict[matched_dt_orb] = fetch(
                            "%s.0" % matched_dts[0].isoformat(),
                            "%s.0" % matched_dts[-1].isoformat(),
                            mission=mission,
                            dry_run=True)
                        if orbit_dict[matched_dt_orb] is None:
                            raise RuntimeError(
                                "Failed to query for an orbit URL for track {} {} {}."
                                .format(track, matched_dts[0],
                                        matched_dts[-1]))

                    # get orbit type
                    orbit_type = 'poeorb'
                    for o in [
                            orbit_dict[ref_dt_orb], orbit_dict[matched_dt_orb]
                    ]:
                        if RESORB_RE.search(o):
                            orbit_type = 'resorb'
                            break

                    # filter if we expect only precise orbits
                    if precise_orbit_only and orbit_type == 'resorb':
                        logger.info(
                            "Precise orbit required. Filtering job configured with restituted orbit."
                        )
                    else:
                        # create jobs for backwards pair
                        if post_ref_pd in ('backward', 'both'):
                            ifg_master_dt = all_dts[-1]
                            ifg_slave_dt = all_dts[0]
                            for swathnum in [1, 2, 3]:
                                stitched_args.append(
                                    False if len(ref_ids) == 1
                                    or len(matched_ids) == 1 else True)
                                master_zip_urls.append([
                                    ref_scene['post_matches']['hits'][i]
                                    for i in matched_ids
                                ])
                                master_orbit_urls.append(
                                    orbit_dict[matched_dt_orb])
                                slave_zip_urls.append(
                                    [grouped_refs['hits'][i] for i in ref_ids])
                                slave_orbit_urls.append(orbit_dict[ref_dt_orb])
                                swathnums.append(swathnum)
                                bboxes.append(bbox)
                                auto_bboxes.append(auto_bbox)
                                projects.append(project)
                                ifg_hash = hashlib.md5(
                                    json.dumps([
                                        id_tmpl,
                                        stitched_args[-1],
                                        master_zip_urls[-1],
                                        master_orbit_urls[-1],
                                        slave_zip_urls[-1],
                                        slave_orbit_urls[-1],
                                        swathnums[-1],
                                        #bboxes[-1],
                                        #auto_bboxes[-1],
                                        projects[-1],
                                        context['azimuth_looks'],
                                        context['range_looks'],
                                        context['filter_strength'],
                                        context.get('dem_type', 'SRTM+v3'),
                                    ])).hexdigest()
                                ifg_ids.append(
                                    id_tmpl.format('S', len(matched_ids),
                                                   len(ref_ids), track,
                                                   ifg_master_dt, ifg_slave_dt,
                                                   swathnum, orbit_type,
                                                   ifg_hash[0:4]))

                        # create jobs for forward pair
                        if post_ref_pd in ('forward', 'both'):
                            ifg_master_dt = all_dts[0]
                            ifg_slave_dt = all_dts[-1]
                            for swathnum in [1, 2, 3]:
                                stitched_args.append(
                                    False if len(ref_ids) == 1
                                    or len(matched_ids) == 1 else True)
                                master_zip_urls.append(
                                    [grouped_refs['hits'][i] for i in ref_ids])
                                master_orbit_urls.append(
                                    orbit_dict[ref_dt_orb])
                                slave_zip_urls.append([
                                    ref_scene['post_matches']['hits'][i]
                                    for i in matched_ids
                                ])
                                slave_orbit_urls.append(
                                    orbit_dict[matched_dt_orb])
                                swathnums.append(swathnum)
                                bboxes.append(bbox)
                                auto_bboxes.append(auto_bbox)
                                projects.append(project)
                                ifg_hash = hashlib.md5(
                                    json.dumps([
                                        id_tmpl,
                                        stitched_args[-1],
                                        master_zip_urls[-1],
                                        master_orbit_urls[-1],
                                        slave_zip_urls[-1],
                                        slave_orbit_urls[-1],
                                        swathnums[-1],
                                        #bboxes[-1],
                                        #auto_bboxes[-1],
                                        projects[-1],
                                        context['azimuth_looks'],
                                        context['range_looks'],
                                        context['filter_strength'],
                                        context.get('dem_type', 'SRTM+v3'),
                                    ])).hexdigest()
                                ifg_ids.append(
                                    id_tmpl.format('M', len(ref_ids),
                                                   len(matched_ids), track,
                                                   ifg_master_dt, ifg_slave_dt,
                                                   swathnum, orbit_type,
                                                   ifg_hash[0:4]))

    return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls,
            master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums,
            bboxes)
예제 #9
0
def get_topsapp_cfg(context_file, id_tmpl=IFG_ID_TMPL):
    """Return all possible topsApp configurations."""
    # get context
    with open(context_file) as f:
        context = json.load(f)

    # get args
    project = context['project']
    master_ids = [i.strip() for i in context['master_ids'].split()]
    slave_ids = [i.strip() for i in context['slave_ids'].split()]
    subswaths = [int(i.strip()) for i in context['subswaths'].split()]
    azimuth_looks = int(context['azimuth_looks'])
    range_looks = int(context['range_looks'])
    precise_orbit_only = get_bool_param(context, 'precise_orbit_only')

    # log inputs
    logger.info("project: {}".format(project))
    logger.info("master_ids: {}".format(master_ids))
    logger.info("slave_ids: {}".format(slave_ids))
    logger.info("subswaths: {}".format(subswaths))
    logger.info("azimuth_looks: {}".format(azimuth_looks))
    logger.info("range_looks: {}".format(range_looks))
    logger.info("precise_orbit_only: {}".format(precise_orbit_only))

    # query docs
    uu = UU()
    logger.info("rest_url: {}".format(uu.rest_url))
    logger.info("dav_url: {}".format(uu.dav_url))
    logger.info("version: {}".format(uu.version))
    logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix))

    # get normalized rest url
    rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url

    # get index name and url
    url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix)
    logger.info("idx: {}".format(uu.grq_index_prefix))
    logger.info("url: {}".format(url))

    # get metadata
    master_md = { i:get_metadata(i, rest_url, url) for i in master_ids }
    #logger.info("master_md: {}".format(json.dumps(master_md, indent=2)))
    slave_md = { i:get_metadata(i, rest_url, url) for i in slave_ids }
    #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2)))

    # get tracks
    track = get_track(master_md)
    logger.info("master_track: {}".format(track))
    slave_track = get_track(slave_md)
    logger.info("slave_track: {}".format(slave_track))
    if track != slave_track:
        raise RuntimeError("Slave track {} doesn't match master track {}.".format(slave_track, track))

    # get urls (prefer s3)
    master_urls = get_urls(master_md) 
    logger.info("master_urls: {}".format(master_urls))
    slave_urls = get_urls(slave_md) 
    logger.info("slave_ids: {}".format(slave_urls))

    # get orbits
    master_orbit_url = get_orbit(master_ids)
    logger.info("master_orbit_url: {}".format(master_orbit_url))
    slave_orbit_url = get_orbit(slave_ids)
    logger.info("slave_orbit_url: {}".format(slave_orbit_url))

    # get orbit type
    orbit_type = 'poeorb'
    for o in (master_orbit_url, slave_orbit_url):
        if RESORB_RE.search(o):
            orbit_type = 'resorb'
            break

    # fail if we expect only precise orbits
    if precise_orbit_only and orbit_type == 'resorb':
        raise RuntimeError("Precise orbit required.")

    # get ifg start and end dates
    ifg_master_dt, ifg_slave_dt = get_ifg_dates(master_ids, slave_ids)

    #submit jobs
    projects = []
    stitched_args = []
    ifg_ids = []
    master_zip_urls = []
    master_orbit_urls = []
    slave_zip_urls = []
    slave_orbit_urls = []
    swathnums = []
    bboxes = []
    auto_bboxes = []
    orbit_dict = {}

    # generate job configs
    bbox = [-90., 90., -180., 180.]
    auto_bbox = True
    for subswath in subswaths:
        stitched_args.append(False if len(master_ids) == 1 or len(slave_ids) == 1 else True)
        master_zip_urls.append(master_urls)
        master_orbit_urls.append(master_orbit_url)
        slave_zip_urls.append(slave_urls)
        slave_orbit_urls.append(slave_orbit_url)
        swathnums.append(subswath)
        bboxes.append(bbox)
        auto_bboxes.append(auto_bbox)
        projects.append(project)
        ifg_hash = hashlib.md5(json.dumps([
            id_tmpl,
            stitched_args[-1],
            master_zip_urls[-1],
            master_orbit_urls[-1],
            slave_zip_urls[-1],
            slave_orbit_urls[-1],
            swathnums[-1],
            bboxes[-1],
            auto_bboxes[-1],
            projects[-1],
            azimuth_looks,
            range_looks,
        ])).hexdigest()
        ifg_ids.append(id_tmpl.format('M', len(master_ids), len(slave_ids),
                                      track, ifg_master_dt,
                                      ifg_slave_dt, subswath,
                                      orbit_type, ifg_hash[0:4]))
                            

    return ( projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls,
             master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums,
             bboxes )