def query_grq(doc_id): """ This function queries ES :param endpoint: the value specifies which ES endpoint to send query can be MOZART or GRQ :param doc_id: id of product or job :return: result from elasticsearch """ es_url, es_index = None, None ''' if endpoint == GRQ_ES_ENDPOINT: es_url = app.conf["GRQ_ES_URL"] es_index = "grq" if endpoint == MOZART_ES_ENDPOINT: es_url = app.conf['JOBS_ES_URL'] es_index = "job_status-current" ''' uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url es_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url es_index = uu.grq_index_prefix query = { "query": { "bool": { "must": [{ "term": { "_id": doc_id } } # add job status: ] } } } #print(query) if es_url.endswith('/'): search_url = '%s%s/_search' % (es_url, es_index) else: search_url = '%s/%s/_search' % (es_url, es_index) r = requests.post(search_url, data=json.dumps(query)) if r.status_code != 200: print("Failed to query %s:\n%s" % (es_url, r.text)) print("query: %s" % json.dumps(query, indent=2)) print("returned: %s" % r.text) r.raise_for_status() result = r.json() print(result['hits']['total']) return result['hits']['hits']
def run_auditor(context, dataset="ifg"): ''' Route auditor for dataset type ''' try: #Read existing with open(context, "r") as fh1: context = json.load(fh1) try: coordinates = json.loads(context["audit_coordinates"]) except TypeError as ex: coordinates = context["audit_coordinates"] context["query"] = { "query": get_audit_input_query(context["audit_starttime"], context["audit_endtime"], coordinates) } #Write out new context enum_context = "enum_context.json" with open(enum_context, "w") as fh2: json.dump(context, fh2) #Call the pair-gen code with new context LOGGER.info("Enumerating IFGs") if dataset == "ifg": cfgs = enumerate_topsapp_cfgs.get_topsapp_cfgs("enum_context.json") elif dataset == "slcp": cfgs = enumerate_topsapp_cfgs.get_topsapp_cfgs_rsp( "enum_context.json") else: raise RuntimeError("Unknown dataset type for auditor: %s" % dataset) # query docs url_util = UU() LOGGER.info("rest_url: %s" % url_util.rest_url) LOGGER.info("grq_index_prefix: %s" % url_util.grq_index_prefix) LOGGER.info("version: %s" % url_util.version) # get normalized rest url rest_url = url_util.rest_url[:-1] if url_util.rest_url.endswith( '/') else url_util.rest_url return audit(cfgs, rest_url, url_util.grq_index_prefix, url_util.version) except Exception as ex: with open('_alt_error.txt', 'w') as fh1: fh1.write("{}\n".format(ex)) with open('_alt_traceback.txt', 'w') as fh2: fh2.write("{}\n".format(traceback.format_exc())) LOGGER.error("Exception of type %s occured with message %s" % (type(ex), ex)) LOGGER.error("Traceback:\n%s" % traceback.format_exc()) raise
def get_topsapp_cfgs(context_file, temporalBaseline=72, id_tmpl=IFG_ID_TMPL, minMatch=0, covth=.95): """Return all possible topsApp configurations.""" # get context with open(context_file) as f: context = json.load(f) # get args event_time = context['event_time'] start_time = context['start_time'] end_time = context['end_time'] project = context['project'] sso = get_bool_param(context, 'singlesceneOnly') auto_bbox = get_bool_param(context, 'auto_bbox') precise_orbit_only = get_bool_param(context, 'precise_orbit_only') query = context['query'] # pair direction: # forward => reference scene is slave # backward => reference scene is master pre_ref_pd = get_pair_direction(context, 'preReferencePairDirection') pre_search = False if pre_ref_pd == 'none' else True post_ref_pd = get_pair_direction(context, 'postReferencePairDirection') post_search = False if post_ref_pd == 'none' else True # overwrite temporal baseline from context if 'temporalBaseline' in context: temporalBaseline = int(context['temporalBaseline']) # overwrite minMatch if 'minMatch' in context: minMatch = int(context['minMatch']) # overwrite covth if 'covth' in context: covth = float(context['covth']) # log enumerator params logging.info("event_time: %s" % event_time) logging.info("start_time: %s" % start_time) logging.info("end_time: %s" % end_time) logging.info("project: %s" % project) logging.info("singleceneOnly: %s" % sso) logging.info("auto_bbox: %s" % auto_bbox) logging.info("preReferencePairDirection: %s" % pre_ref_pd) logging.info("postReferencePairDirection: %s" % post_ref_pd) logging.info("temporalBaseline: %s" % temporalBaseline) logging.info("minMatch: %s" % minMatch) logging.info("covth: %s" % covth) # get bbox from query coords = None bbox = [-90., 90., -180., 180.] if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = query['query']['filtered']['filter']['and'] elif 'geo_shape' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = [{ "geo_shape": query['query']['filtered']['filter']['geo_shape'] }] else: filts = [] for filt in filts: if 'geo_shape' in filt: coords = filt['geo_shape']['location']['shape']['coordinates'] roi = { 'type': 'Polygon', 'coordinates': coords, } logger.info("query filter ROI: %s" % json.dumps(roi)) roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi)) roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope() bbox = [roi_y_min, roi_y_max, roi_x_min, roi_x_max] logger.info("query filter bbox: %s" % bbox) break # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format( rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # query hits query.update({"partial_fields": { "partial": { "exclude": "city", } }}) #logger.info("query: {}".format(json.dumps(query, indent=2))) r = requests.post(url, data=json.dumps(query)) r.raise_for_status() scan_result = r.json() count = scan_result['hits']['total'] scroll_id = scan_result['_scroll_id'] ref_hits = [] while True: r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id) res = r.json() scroll_id = res['_scroll_id'] if len(res['hits']['hits']) == 0: break ref_hits.extend(res['hits']['hits']) # extract reference ids ref_ids = {h['_id']: True for h in ref_hits} logger.info("ref_ids: {}".format(json.dumps(ref_ids, indent=2))) logger.info("ref_hits count: {}".format(len(ref_hits))) # group ref hits by track and date grouped_refs = group_frames_by_track_date(ref_hits) # dedup any reprocessed reference SLCs dedup_reprocessed_slcs(grouped_refs['grouped'], grouped_refs['metadata']) #logger.info("ref hits: {}".format(json.dumps(grouped_refs['hits'], indent=2))) #logger.info("ref sorted_hits: {}".format(pformat(grouped_refs['grouped']))) #logger.info("ref slc_dates: {}".format(pformat(grouped_refs['dates']))) #logger.info("ref slc_footprints: {}".format(json.dumps(grouped_refs['footprints'], indent=2))) # build list reference scenes ref_scenes = [] for track in grouped_refs['grouped']: logger.info("track: %s" % track) for ref_dt in grouped_refs['grouped'][track]: logger.info("reference date: %s" % ref_dt.isoformat()) if sso: for ref_id in grouped_refs['grouped'][track][ref_dt]: ref_scenes.append({ 'id': [ref_id], 'track': track, 'date': ref_dt, 'location': grouped_refs['footprints'][ref_id], 'pre_matches': None, 'post_matches': None }) else: union_poly = get_union_geometry( grouped_refs['grouped'][track][ref_dt], grouped_refs['footprints']) if len(union_poly['coordinates']) > 1: logger.warn( "Stitching %s will result in a disjoint geometry." % grouped_refs['grouped'][track][ref_dt]) logger.warn("Skipping.") else: ref_scenes.append({ 'id': grouped_refs['grouped'][track][ref_dt], 'track': track, 'date': ref_dt, 'location': union_poly, 'pre_matches': None, 'post_matches': None }) # find reference scene matches projects = [] stitched_args = [] auto_bboxes = [] ifg_ids = [] master_zip_urls = [] master_orbit_urls = [] slave_zip_urls = [] slave_orbit_urls = [] swathnums = [] bboxes = [] mrpe_dict = {} for ref_scene in ref_scenes: for ref_id in ref_scene['id']: logger.info("#" * 80) logger.info("ref id: %s" % ref_id) logger.info("ref date: %s" % ref_scene['date']) logger.info("ref scene: %s" % pformat(ref_scene)) mrpe_hits = get_mrpe_hits(rest_url, ref_scene, start_time, event_time) for mrpe_hit in mrpe_hits: if mrpe_hit['_id'] in mrpe_dict: continue mrpe_dict[mrpe_hit['_id']] = True logger.info("mrpe_hit: %s" % pformat(mrpe_hit)) new_query = { "query": { "bool": { "must": [{ "term": { "_id": mrpe_hit['_id'], } }, { "term": { "system_version.raw": mrpe_hit['fields']['partial'][0] ['system_version'], } }] } } } new_context = deepcopy(context) new_context['query'] = new_query tmp_ctx_file = "%s.context.json" % ref_id with open('%s.context.json' % ref_id, 'w') as f: json.dump(new_context, f, indent=2) (tmp_projects, tmp_stitched_args, tmp_auto_bboxes, tmp_ifg_ids, tmp_master_zip_urls, tmp_master_orbit_urls, tmp_slave_zip_urls, tmp_slave_orbit_urls, tmp_swathnums, tmp_bboxes) = gtc(tmp_ctx_file, temporalBaseline=temporalBaseline, id_tmpl=id_tmpl, minMatch=minMatch, covth=covth) projects.extend(tmp_projects) stitched_args.extend(tmp_stitched_args) auto_bboxes.extend(tmp_auto_bboxes) ifg_ids.extend(tmp_ifg_ids) master_zip_urls.extend(tmp_master_zip_urls) master_orbit_urls.extend(tmp_master_orbit_urls) slave_zip_urls.extend(tmp_slave_zip_urls) slave_orbit_urls.extend(tmp_slave_orbit_urls) swathnums.extend(tmp_swathnums) bboxes.extend(tmp_bboxes) return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes)
def pair_selector(id, margin=0.2, overlap_min=.5, frame_id_margin=3): """Return last matching temporal S1 pair.""" uu = UU() print("S1 ID is {}".format(id)) print("rest_url: {}".format(uu.rest_url)) print("dav_url: {}".format(uu.dav_url)) print("version: {}".format(uu.version)) print("grq_index_prefix: {}".format(uu.grq_index_prefix)) # extract info match = ID_RE.search(id) if match is None: raise RuntimeError("Swath number extraction error: {}.".format(id)) swath_num = int(match.group(1)) vtype = match.group(2) yr = int(match.group(3)) mo = int(match.group(4)) dy = int(match.group(5)) hr = int(match.group(6)) mn = int(match.group(7)) ss = int(match.group(8)) # get index name and url idx = "{}_{}_s1-swath".format(uu.grq_index_prefix, uu.version.replace('.', '')) url = "{}{}/_search".format(uu.rest_url, idx) print("idx: {}".format(idx)) print("url: {}".format(url)) # get metadata query = { "query": { "term": { "_id": id } }, "partial_fields" : { "partial" : { "exclude" : "city", } } } r = requests.post(url, data=json.dumps(query)) r.raise_for_status() res = r.json() if res['hits']['total'] != 1: raise RuntimeError("Failed to find exactly 1 result for {}:\n\n{}".format(id, json.dumps(res, indent=2))) hit = res['hits']['hits'][0]['fields']['partial'][0] #print(json.dumps(hit, indent=2)) # find matching ones within +-50 days sensingStart = datetime.strptime(hit['metadata']['sensingStart'], '%Y-%m-%dT%H:%M:%S.%f') query_start = (sensingStart - timedelta(days=50)).isoformat() query_stop = (sensingStart + timedelta(days=50)).isoformat() query = { "query": { "bool": { "must": [ { "term": { "system_version": hit['system_version'] } }, { "term": { "metadata.trackNumber": hit['metadata']['trackNumber'] } }, { "range": { "metadata.frameID": { "from": int(hit['metadata']['frameID']) - frame_id_margin, "to": int(hit['metadata']['frameID']) + frame_id_margin } } }, { "bool": { "should": [ { "range": { "metadata.sensingStart": { "from": query_start, "to": query_stop } } }, { "range": { "metadata.sensingStop": { "from": query_start, "to": query_stop } } } ] } } ] } }, "sort": [ { "starttime": { "order": "desc" } } ], "partial_fields" : { "partial" : { "exclude" : "city", } } } #print(json.dumps(query, indent=2)) r = requests.post(url, data=json.dumps(query)) r.raise_for_status() res = r.json() print("total matches: {}".format(res['hits']['total'])) matches = res['hits']['hits'] print("matches: {}".format([m['_id'] for m in matches])) # filter matches filtered_matches = [] for m in matches: h = m['fields']['partial'][0] #print("h: {}".format(json.dumps(h, indent=2))) if h['id'] == id: print("Filtering self: %s" % h['id']) continue match = ID_RE.search(h['id']) if match is None: print("Filtering unrecognized id: %s" % h['id']) continue sn = int(match.group(1)) if sn != swath_num: print("Filtering %s due to unmatched swath number. Got %s but should be %s." % (h['id'], sn, swath_num)) continue vt = match.group(2) if vt != vtype: print("Filtering %s due to unmatched vtype. Got %s but should be %s." % (h['id'], vt, vtype)) continue overlap_pct = get_overlap(hit['location'], h['location']) print("overlap_pct is: %s" % overlap_pct) if overlap_pct < overlap_min: print("Filtering %s since overlap_pct < min overlap threshold of %s." % (h['id'], overlap_min)) continue filtered_matches.append(h) print("total filtered_matches: {}".format(len(filtered_matches))) print("filtered_matches: {}".format([fm['id'] for fm in filtered_matches])) # return if no filtered matches if len(filtered_matches) == 0: return filtered_matches # get bbox arg bbox = np.array(hit['metadata']['bbox']) bbox_str = "{0:.2f} {1:.2f} {2:.2f} {3:.2f}".format( bbox[:,0].min() - margin, bbox[:,0].max() + margin, bbox[:,1].min() - margin, bbox[:,1].max() + margin) # get orbit URL orbit_url = fetch(hit['starttime'], hit['endtime'], dry_run=True) if orbit_url is None: raise RuntimeError("Failed to query for an orbit URL for {}.".format( os.path.basename(hit['metadata']['archive_url']))) # result json ret_list = [] for filtered_match in filtered_matches: j = { "swath": swath_num, "bbox_str": bbox_str, "id": [ id ], "bbox": [ hit['metadata']['bbox'] ], "archive_url": [ hit['metadata']['archive_url'] ], "frameID": [ hit['metadata']['frameID'] ], "trackNumber": [ hit['metadata']['trackNumber'] ], "orbit_url": [ orbit_url ], } #print("filtered match: {}".format(json.dumps(filtered_match, indent=2))) st_time = datetime.strptime(filtered_match['metadata']['sensingStart'], '%Y-%m-%dT%H:%M:%S.%f') # extract info match = ID_RE.search(filtered_match['id']) if match is None: raise RuntimeError("Swath number extraction error: {}.".format(filtered_match['id'])) match_swath_num = int(match.group(1)) match_vtype = match.group(2) match_yr = int(match.group(3)) match_mo = int(match.group(4)) match_dy = int(match.group(5)) match_hr = int(match.group(6)) match_mn = int(match.group(7)) match_ss = int(match.group(8)) # get orbit URL match_orbit_url = fetch(filtered_match['starttime'], filtered_match['endtime'], dry_run=True) if match_orbit_url is None: raise RuntimeError("Failed to query for an orbit URL for {}.".format( os.path.basename(filtered_match['metadata']['archive_url']))) # each pair is (master, slave); determine which is which if st_time > sensingStart: ifg_start_dt = datetime(yr, mo, dy, hr, mn, ss) ifg_end_dt = datetime(match_yr, match_mo, match_dy, match_hr, match_mn, match_ss) j['id'].append(filtered_match['id']) j['bbox'].append(filtered_match['metadata']['bbox']) j['archive_url'].append(filtered_match['metadata']['archive_url']) j['frameID'].append(filtered_match['metadata']['frameID']) j['trackNumber'].append(filtered_match['metadata']['trackNumber']) j['orbit_url'].append(match_orbit_url) else: ifg_start_dt = datetime(match_yr, match_mo, match_dy, match_hr, match_mn, match_ss) ifg_end_dt = datetime(yr, mo, dy, hr, mn, ss) j['id'].insert(0, filtered_match['id']) j['bbox'].insert(0, filtered_match['metadata']['bbox']) j['archive_url'].insert(0, filtered_match['metadata']['archive_url']) j['frameID'].insert(0, filtered_match['metadata']['frameID']) j['trackNumber'].insert(0, filtered_match['metadata']['trackNumber']) j['orbit_url'].insert(0, match_orbit_url) # get ifg orbit type orbit_type = 'poeorb' for u in j['orbit_url']: if RESORB_RE.search(u): orbit_type = 'resorb' break j['orbit_type'] = orbit_type # generate ifg id ifg_id_tmpl = "S1-IFG_FID{:03d}_TN{:03d}_{:%Y%m%dT%H%M%S}-{:%Y%m%dT%H%M%S}_s{}-{}" j['ifg_id'] = ifg_id_tmpl.format( filtered_match['metadata']['frameID'], filtered_match['metadata']['trackNumber'], ifg_start_dt, ifg_end_dt, swath_num, orbit_type) # append ret_list.append(j) # write out pair info with open('pair.json', 'w') as f: json.dump({'pairs': ret_list}, f, indent=2, sort_keys=True) return ret_list
def get_stitch_cfgs(context_file): """Return all possible stitch interferogram configurations.""" # get context with open(context_file) as f: context = json.load(f) # get args project = context['project'] direction = context.get('direction', 'along') subswaths = [int(i) for i in context.get('subswaths', "1 2 3").split()] subswaths.sort() min_stitch_count = int(context['min_stitch_count']) extra_products = [i.strip() for i in context.get('extra_products', 'los.rdr.geo').split()] orig_query = context['query'] logger.info("orig_query: %s" % json.dumps(orig_query, indent=2)) # cleanse query of ids from triggered rules query = clean_query(orig_query) logger.info("clean query: %s" % json.dumps(query, indent=2)) # log enumerator params logger.info("project: %s" % project) logger.info("direction: %s" % direction) logger.info("subswaths: %s" % subswaths) logger.info("min_stitch_count: %s" % min_stitch_count) logger.info("extra_products: %s" % extra_products) # get bbox from query coords = None bbox = [-90., 90., -180., 180.] if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = query['query']['filtered']['filter']['and'] elif 'geo_shape' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = [ { "geo_shape": query['query']['filtered']['filter']['geo_shape'] } ] else: filts = [] for filt in filts: if 'geo_shape' in filt: coords = filt['geo_shape']['location']['shape']['coordinates'] roi = { 'type': 'Polygon', 'coordinates': coords, } logger.info("query filter ROI: %s" % json.dumps(roi)) roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi)) roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope() bbox = [ roi_y_min, roi_y_max, roi_x_min, roi_x_max ] logger.info("query filter bbox: %s" % bbox) break # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # query hits query.update({ "partial_fields" : { "partial" : { "exclude" : "city", } } }) #logger.info("query: {}".format(json.dumps(query, indent=2))) r = requests.post(url, data=json.dumps(query)) r.raise_for_status() scan_result = r.json() count = scan_result['hits']['total'] scroll_id = scan_result['_scroll_id'] hits = [] while True: r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id) res = r.json() scroll_id = res['_scroll_id'] if len(res['hits']['hits']) == 0: break hits.extend(res['hits']['hits']) # remove partial fields and reformat metadata as expected by stitcher_utils #hits = remove_partials(hits) # extract reference ids ids = { h['_id']: True for h in hits } logger.info("ids: {}".format(json.dumps(ids, indent=2))) logger.info("hits count: {}".format(len(hits))) # dump metadata valid_meta_ts_out_file = "valid_meta_ts_out.json" with open(valid_meta_ts_out_file, 'w') as f: json.dump(hits, f, indent=2) # group frames by track and date pairs grouped = group_frames_by_track_date(hits) logger.info("grouped: %s" % json.dumps(grouped, indent=2)) # enumerate configs projects = [] directions = [] extra_products_list = [] filenames = [] filename_urls = [] ifg_ids = [] base_products = ['filt_topophase.unw.geo', 'filt_topophase.unw.conncomp.geo', 'phsig.cor.geo'] base_products.extend(extra_products) for track in sorted(grouped['grouped']): for dt_pair in sorted(grouped['grouped'][track]): stitch_count = 0 # filter scenes without all requested subswaths swath_check = {} for swath in subswaths: if swath not in grouped['grouped'][track][dt_pair]: raise RuntimeError("Did not find singlescene IFGs for subswath %s for track %s dates %s. Check your query results." % (swath, track, dt_pair)) for tr, id in grouped['grouped'][track][dt_pair][swath]: swath_check.setdefault(tr, {})[swath] = id skip_tr = {} for tr in sorted(swath_check): for swath in subswaths: if swath not in swath_check[tr]: skip_tr[tr] = True furls = [] swathnums = [] ifg_sts = set() ifg_ets = set() fnames_tr = {} for swath in subswaths: swathnums.append(swath) for tr, id in grouped['grouped'][track][dt_pair][swath]: if tr in skip_tr: logger.warning("Skipping %s for scene %s since only subswaths %s exist." % (id, tr, sorted(swath_check[tr].keys()))) continue bisect.insort(fnames_tr.setdefault(tr, []), os.path.join(id, 'merged', 'filt_topophase.unw.geo')) for prod_file in base_products: furls.append({ 'url': "%s/merged/%s" % (grouped['hits'][id], prod_file), 'local_path': "%s/merged/" % id, }) furls.append({ 'url': "%s/merged/%s.xml" % (grouped['hits'][id], prod_file), 'local_path': "%s/merged/" % id, }) furls.append({ 'url': "%s/fine_interferogram.xml" % grouped['hits'][id], 'local_path': "%s/" % id, }) furls.append({ 'url': "%s/%s.dataset.json" % (grouped['hits'][id], id), 'local_path': "%s/_%s.dataset.json" % (id, id), }) furls.append({ 'url': "%s/%s.met.json" % (grouped['hits'][id], id), 'local_path': "%s/_%s.met.json" % (id, id), }) stitch_count += 1 st, et = tr.split('_') ifg_sts.add(st) ifg_ets.add(et) ifg_sts = list(ifg_sts) ifg_sts.sort() ifg_ets = list(ifg_ets) ifg_ets.sort() # check minimum stitch count met if stitch_count < min_stitch_count: logger.warning("Failed to find minimum stitch count of %s for track %s date pair %s: %s" % (min_stitch_count, track, dt_pair, stitch_count)) continue # build job params projects.append(project) directions.append(direction) extra_products_list.append(extra_products) filenames.append([fnames_tr[tr] for tr in sorted(fnames_tr)]) filename_urls.append(furls) ifg_hash = hashlib.md5(json.dumps([ projects[-1], directions[-1], extra_products_list[-1], filenames[-1], filename_urls[-1], ], sort_keys=True)).hexdigest() ifg_ids.append(ID_TMPL.format(int(track), ifg_sts[0], ifg_ets[-1], ''.join(map(str, swathnums)), direction, ifg_hash[0:4])) logger.info("projects: %s" % projects) logger.info("directions: %s" % directions) logger.info("extra_products: %s" % extra_products_list) logger.info("filenames: %s" % json.dumps(filenames, indent=2)) logger.info("filename_urls: %s" % json.dumps(filename_urls, indent=2)) logger.info("ifg_ids: %s" % ifg_ids) return ( projects, directions, extra_products_list, filenames, filename_urls, ifg_ids )
def get_topsapp_cfgs(context_file, id_tmpl=IFG_ID_TMPL): """Return all possible topsApp configurations that can be reprocessed with precise orbit.""" # get context with open(context_file) as f: context = json.load(f) # get dataset type to query if id_tmpl == IFG_ID_TMPL: dataset = "S1-IFG" elif id_tmpl == RSP_ID_TMPL: dataset = "S1-SLCP" else: raise RuntimeError("Failed to recognize dataset from id template: %s" % id_tmpl) # get params ifg_version = context['ifg_version'] starttime = context['starttime'] endtime = context['endtime'] orb_ds_url = context['url'] orb_file = context['orbit_file'] platform = context['platform'] # get precise orbit date orb_dt = get_orbit_date(orb_file) # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # build query query = { "query": { "bool": { "must": [ { "term": { "dataset.raw": dataset } }, { "term": { "version.raw": ifg_version } }, { "term": { "metadata.orbit_type.raw": "resorb" } }, { "term": { "metadata.platform.raw": platform } }, { "bool": { "should": [ { "range": { "starttime": { "from": starttime, "to": endtime } } }, { "range": { "endtime": { "from": starttime, "to": endtime } } } ] } } ] } }, "partial_fields" : { "partial" : { "exclude" : "city", } } } logger.info("query: {}".format(json.dumps(query, indent=2))) r = requests.post(url, data=json.dumps(query)) r.raise_for_status() scan_result = r.json() count = scan_result['hits']['total'] scroll_id = scan_result['_scroll_id'] hits = [] while True: r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id) res = r.json() scroll_id = res['_scroll_id'] if len(res['hits']['hits']) == 0: break hits.extend(res['hits']['hits']) #logger.info("hits: {}".format(json.dumps(hits, indent=2))) logger.info("hits count: {}".format(len(hits))) # collect topsapps cfgs projects = [] stitched_args = [] auto_bboxes = [] master_zip_urls = [] slave_zip_urls = [] swathnums = [] bboxes = [] master_orbit_urls = [] slave_orbit_urls = [] ifg_ids = [] for hit in hits: # propagate unmodified params ifg_ctx = hit['fields']['partial'][0]['metadata']['context'] sfl_ifg_ctx = ifg_ctx.get('context', {}) # old id ifg_id = ifg_ctx['id'] # determine orbit to replace logger.info("latest precise orbit file date: {}".format(orb_dt.isoformat('T'))) mo_dt = get_orbit_date(ifg_ctx['master_orbit_file']) logger.info("original master orbit file date: {}".format(mo_dt.isoformat('T'))) so_dt = get_orbit_date(ifg_ctx['slave_orbit_file']) logger.info("original slave orbit file date: {}".format(so_dt.isoformat('T'))) if orb_dt == mo_dt: master_orbit_urls.append(os.path.join(orb_ds_url, orb_file)) slave_orbit_urls.append(ifg_ctx['slave_orbit_url']) elif orb_dt == so_dt: master_orbit_urls.append(ifg_ctx['master_orbit_url']) slave_orbit_urls.append(os.path.join(orb_ds_url, orb_file)) else: logger.info("Precise orbit file {} doesn't align with S1-IFG {}. Skipping.".format(orb_file, ifg_id)) continue logger.info("sfl_ifg_ctx: {}".format(json.dumps(sfl_ifg_ctx, indent=2))) # carry over the rest of the params projects.append(ifg_ctx['project']) stitched_args.append(False if len(ifg_ctx['master_zip_url']) == 1 or len(ifg_ctx['slave_zip_url']) == 1 else True) auto_bboxes.append(ifg_ctx['auto_bbox']) master_zip_urls.append(ifg_ctx['master_zip_url']) slave_zip_urls.append(ifg_ctx['slave_zip_url']) swathnums.append(ifg_ctx['swathnum']) bboxes.append(ifg_ctx['bbox']) # determine orbit type of product in case both master and slave orbits were restituted if POEORB_RE.search(master_orbit_urls[-1]) and POEORB_RE.search(slave_orbit_urls[-1]): ifg_id = ifg_id.replace('resorb', 'poeorb') # calculate hash and new ifg id ifg_hash = hashlib.md5(json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], ifg_ctx.get('azimuth_looks', sfl_ifg_ctx.get('azimuth_looks', 3)), ifg_ctx.get('range_looks', sfl_ifg_ctx.get('range_looks', 7)), ifg_ctx.get('filter_strength', sfl_ifg_ctx.get('filter_strength', 0.5)), ifg_ctx.get('dem_type', sfl_ifg_ctx.get('dem_type', 'SRTM')), ])).hexdigest() ifg_id = ifg_id[0:-4] + ifg_hash[0:4] ifg_ids.append(ifg_id) logger.info("Found {} {} datasets to reprocess.".format(len(ifg_ids), dataset)) return ( projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes )
def initiate_standard_product_job(context_file): # get context with open(context_file) as f: context = json.load(f) # get args project = context['project'] master_ids = [i.strip() for i in context['master_ids']] slave_ids = [i.strip() for i in context['slave_ids']] subswaths = [1, 2, 3] #context['subswaths'] azimuth_looks = int(context['azimuth_looks']) range_looks = int(context['range_looks']) filter_strength = float(context['filter_strength']) precise_orbit_only = get_bool_param(context, 'precise_orbit_only') job_priority = int(context['priority']) subswaths = [1, 2, 3] # log inputs logger.info("project: {}".format(project)) logger.info("master_ids: {}".format(master_ids)) logger.info("slave_ids: {}".format(slave_ids)) logger.info("subswaths: {}".format(subswaths)) logger.info("azimuth_looks: {}".format(azimuth_looks)) logger.info("range_looks: {}".format(range_looks)) logger.info("filter_strength: {}".format(filter_strength)) logger.info("precise_orbit_only: {}".format(precise_orbit_only)) # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format( rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # get metadata master_md = {i: get_metadata(i, rest_url, url) for i in master_ids} #logger.info("master_md: {}".format(json.dumps(master_md, indent=2))) slave_md = {i: get_metadata(i, rest_url, url) for i in slave_ids} #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2))) # get tracks track = get_track(master_md) logger.info("master_track: {}".format(track)) slave_track = get_track(slave_md) logger.info("slave_track: {}".format(slave_track)) if track != slave_track: raise RuntimeError( "Slave track {} doesn't match master track {}.".format( slave_track, track)) ref_scence = master_md if len(master_ids) == 1: ref_scence = master_md elif len(slave_ids) == 1: ref_scence = slave_md elif len(master_ids) > 1 and len(slave_ids) > 1: raise RuntimeError("Single Scene Reference Required.") # get urls (prefer s3) master_urls = get_urls(master_md) logger.info("master_urls: {}".format(master_urls)) slave_urls = get_urls(slave_md) logger.info("slave_ids: {}".format(slave_urls)) dem_type = get_dem_type(master_md) # get dem_type dem_type = get_dem_type(master_md) logger.info("master_dem_type: {}".format(dem_type)) slave_dem_type = get_dem_type(slave_md) logger.info("slave_dem_type: {}".format(slave_dem_type)) if dem_type != slave_dem_type: dem_type = "SRTM+v3" # get orbits master_orbit_url = get_orbit(master_ids) logger.info("master_orbit_url: {}".format(master_orbit_url)) slave_orbit_url = get_orbit(slave_ids) logger.info("slave_orbit_url: {}".format(slave_orbit_url)) # get orbit type orbit_type = 'poeorb' for o in (master_orbit_url, slave_orbit_url): if RESORB_RE.search(o): orbit_type = 'resorb' break # fail if we expect only precise orbits #if precise_orbit_only and orbit_type == 'resorb': #raise RuntimeError("Precise orbit required.") # get ifg start and end dates ifg_master_dt, ifg_slave_dt = get_ifg_dates(master_ids, slave_ids) #submit jobs projects = [] stitched_args = [] ifg_ids = [] master_zip_urls = [] master_orbit_urls = [] slave_zip_urls = [] slave_orbit_urls = [] swathnums = [] bboxes = [] auto_bboxes = [] dem_types = [] job_priorities = [] orbit_dict = {} # generate job configs bbox = [-90., 90., -180., 180.] auto_bbox = True id_tmpl = IFG_ID_TMPL stitched_args.append( False if len(master_ids) == 1 or len(slave_ids) == 1 else True) master_zip_urls.append(master_urls) master_orbit_urls.append(master_orbit_url) slave_zip_urls.append(slave_urls) slave_orbit_urls.append(slave_orbit_url) swathnums.append(subswaths) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) dem_types.append(dem_type) job_priorities.append(job_priority) ifg_hash = hashlib.md5( json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], #swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], #azimuth_looks, #range_looks, filter_strength, dem_type ])).hexdigest() ifg_ids.append( id_tmpl.format('M', len(master_ids), len(slave_ids), track, ifg_master_dt, ifg_slave_dt, orbit_type, ifg_hash[0:4])) logger.info("\n\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n" % (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes, dem_types)) return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes, dem_types, job_priorities)
def get_topsapp_cfgs(context_file, temporalBaseline=72, id_tmpl=IFG_ID_TMPL, minMatch=0, covth=.95): """Return all possible topsApp configurations.""" # get context with open(context_file) as f: context = json.load(f) # get args project = context['project'] sso = get_bool_param(context, 'singlesceneOnly') auto_bbox = get_bool_param(context, 'auto_bbox') precise_orbit_only = get_bool_param(context, 'precise_orbit_only') query = context['query'] # pair direction: # forward => reference scene is slave # backward => reference scene is master pre_ref_pd = get_pair_direction(context, 'preReferencePairDirection') pre_search = False if pre_ref_pd == 'none' else True post_ref_pd = get_pair_direction(context, 'postReferencePairDirection') post_search = False if post_ref_pd == 'none' else True # overwrite temporal baseline from context if 'temporalBaseline' in context: temporalBaseline = int(context['temporalBaseline']) # overwrite minMatch if 'minMatch' in context: minMatch = int(context['minMatch']) # overwrite covth if 'covth' in context: covth = float(context['covth']) # log enumerator params logging.info("project: %s" % project) logging.info("singleceneOnly: %s" % sso) logging.info("auto_bbox: %s" % auto_bbox) logging.info("preReferencePairDirection: %s" % pre_ref_pd) logging.info("postReferencePairDirection: %s" % post_ref_pd) logging.info("temporalBaseline: %s" % temporalBaseline) logging.info("minMatch: %s" % minMatch) logging.info("covth: %s" % covth) # get bbox from query coords = None bbox = [-90., 90., -180., 180.] if 'and' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = query['query']['filtered']['filter']['and'] elif 'geo_shape' in query.get('query', {}).get('filtered', {}).get('filter', {}): filts = [{ "geo_shape": query['query']['filtered']['filter']['geo_shape'] }] else: filts = [] for filt in filts: if 'geo_shape' in filt: coords = filt['geo_shape']['location']['shape']['coordinates'] roi = { 'type': 'Polygon', 'coordinates': coords, } logger.info("query filter ROI: %s" % json.dumps(roi)) roi_geom = ogr.CreateGeometryFromJson(json.dumps(roi)) roi_x_min, roi_x_max, roi_y_min, roi_y_max = roi_geom.GetEnvelope() bbox = [roi_y_min, roi_y_max, roi_x_min, roi_x_max] logger.info("query filter bbox: %s" % bbox) break # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format( rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # query hits query.update({"partial_fields": { "partial": { "exclude": "city", } }}) #logger.info("query: {}".format(json.dumps(query, indent=2))) r = requests.post(url, data=json.dumps(query)) r.raise_for_status() scan_result = r.json() count = scan_result['hits']['total'] scroll_id = scan_result['_scroll_id'] ref_hits = [] while True: r = requests.post('%s/_search/scroll?scroll=60m' % rest_url, data=scroll_id) res = r.json() scroll_id = res['_scroll_id'] if len(res['hits']['hits']) == 0: break ref_hits.extend(res['hits']['hits']) # extract reference ids ref_ids = {h['_id']: True for h in ref_hits} logger.info("ref_ids: {}".format(json.dumps(ref_ids, indent=2))) logger.info("ref_hits count: {}".format(len(ref_hits))) # group ref hits by track and date grouped_refs = group_frames_by_track_date(ref_hits) # dedup any reprocessed reference SLCs dedup_reprocessed_slcs(grouped_refs['grouped'], grouped_refs['metadata']) #logger.info("ref hits: {}".format(json.dumps(grouped_refs['hits'], indent=2))) #logger.info("ref sorted_hits: {}".format(pformat(grouped_refs['grouped']))) #logger.info("ref slc_dates: {}".format(pformat(grouped_refs['dates']))) #logger.info("ref slc_footprints: {}".format(json.dumps(grouped_refs['footprints'], indent=2))) # build list reference scenes ref_scenes = [] for track in grouped_refs['grouped']: logger.info("track: %s" % track) for ref_dt in grouped_refs['grouped'][track]: logger.info("reference date: %s" % ref_dt.isoformat()) if sso: for ref_id in grouped_refs['grouped'][track][ref_dt]: ref_scenes.append({ 'id': [ref_id], 'track': track, 'date': ref_dt, 'location': grouped_refs['footprints'][ref_id], 'pre_matches': None, 'post_matches': None }) else: union_poly = get_union_geometry( grouped_refs['grouped'][track][ref_dt], grouped_refs['footprints']) if len(union_poly['coordinates']) > 1: logger.warn( "Stitching %s will result in a disjoint geometry." % grouped_refs['grouped'][track][ref_dt]) logger.warn("Skipping.") else: ref_scenes.append({ 'id': grouped_refs['grouped'][track][ref_dt], 'track': track, 'date': ref_dt, 'location': union_poly, 'pre_matches': None, 'post_matches': None }) # find reference scene matches for ref_scene in ref_scenes: logger.info("#" * 80) logger.info("ref id: %s" % ref_scene['id']) logger.info("ref date: %s" % ref_scene['date']) if pre_search: logger.info("*" * 80) pre_matches = group_frames_by_track_date( get_pair_hits(rest_url, ref_scene, 'pre', temporal_baseline=temporalBaseline, min_match=minMatch, covth=covth)) dedup_reprocessed_slcs(pre_matches['grouped'], pre_matches['metadata']) ref_scene['pre_matches'] = pre_matches if post_search: logger.info("*" * 80) post_matches = group_frames_by_track_date( get_pair_hits(rest_url, ref_scene, 'post', temporal_baseline=temporalBaseline, min_match=minMatch, covth=covth)) dedup_reprocessed_slcs(post_matches['grouped'], post_matches['metadata']) ref_scene['post_matches'] = post_matches #logger.info("ref_scenes: {}".format(pformat(ref_scenes))) #logger.info("ref_scenes count: {}".format(len(ref_scenes))) #submit jobs projects = [] stitched_args = [] ifg_ids = [] master_zip_urls = [] master_orbit_urls = [] slave_zip_urls = [] slave_orbit_urls = [] swathnums = [] bboxes = [] auto_bboxes = [] orbit_dict = {} for ref_scene in ref_scenes: ref_ids = ref_scene['id'] track = ref_scene['track'] ref_dts = [] for i in ref_ids: ref_dts.extend(grouped_refs['dates'][i]) #logger.info("ref_ids: %s" % ref_ids) #logger.info("ref_dts: %s" % ref_dts) # set orbit urls and cache for reference dates ref_dt_orb = "%s_%s" % (ref_dts[0].isoformat(), ref_dts[-1].isoformat()) if ref_dt_orb not in orbit_dict: match = SLC_RE.search(ref_ids[0]) if not match: raise RuntimeError("Failed to recognize SLC ID %s." % ref_ids[0]) mission = match.group('mission') orbit_dict[ref_dt_orb] = fetch("%s.0" % ref_dts[0].isoformat(), "%s.0" % ref_dts[-1].isoformat(), mission=mission, dry_run=True) if orbit_dict[ref_dt_orb] is None: raise RuntimeError( "Failed to query for an orbit URL for track {} {} {}.". format(track, ref_dts[0], ref_dts[-1])) # generate jobs for pre-reference pairs if ref_scene['pre_matches'] is not None: if track in ref_scene['pre_matches']['grouped']: matched_days = ref_scene['pre_matches']['grouped'][track] for matched_day, matched_ids in matched_days.iteritems(): matched_dts = [] for i in matched_ids: matched_dts.extend( ref_scene['pre_matches']['dates'][i]) #logger.info("pre_matches matched_ids: %s" % matched_ids) #logger.info("pre_matches matched_dts: %s" % matched_dts) all_dts = list(chain(ref_dts, matched_dts)) all_dts.sort() # set orbit urls and cache for matched dates matched_dt_orb = "%s_%s" % (matched_dts[0].isoformat(), matched_dts[-1].isoformat()) if matched_dt_orb not in orbit_dict: match = SLC_RE.search(matched_ids[0]) if not match: raise RuntimeError( "Failed to recognize SLC ID %s." % matched_ids[0]) mission = match.group('mission') orbit_dict[matched_dt_orb] = fetch( "%s.0" % matched_dts[0].isoformat(), "%s.0" % matched_dts[-1].isoformat(), mission=mission, dry_run=True) if orbit_dict[matched_dt_orb] is None: raise RuntimeError( "Failed to query for an orbit URL for track {} {} {}." .format(track, matched_dts[0], matched_dts[-1])) # get orbit type orbit_type = 'poeorb' for o in [ orbit_dict[ref_dt_orb], orbit_dict[matched_dt_orb] ]: if RESORB_RE.search(o): orbit_type = 'resorb' break # filter if we expect only precise orbits if precise_orbit_only and orbit_type == 'resorb': logger.info( "Precise orbit required. Filtering job configured with restituted orbit." ) else: # create jobs for backwards pair if pre_ref_pd in ('backward', 'both'): ifg_master_dt = all_dts[-1] ifg_slave_dt = all_dts[0] for swathnum in [1, 2, 3]: stitched_args.append( False if len(ref_ids) == 1 or len(matched_ids) == 1 else True) master_zip_urls.append( [grouped_refs['hits'][i] for i in ref_ids]) master_orbit_urls.append( orbit_dict[ref_dt_orb]) slave_zip_urls.append([ ref_scene['pre_matches']['hits'][i] for i in matched_ids ]) slave_orbit_urls.append( orbit_dict[matched_dt_orb]) swathnums.append(swathnum) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) ifg_hash = hashlib.md5( json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], context['azimuth_looks'], context['range_looks'], context['filter_strength'], context.get('dem_type', 'SRTM+v3'), ])).hexdigest() ifg_ids.append( id_tmpl.format('M', len(ref_ids), len(matched_ids), track, ifg_master_dt, ifg_slave_dt, swathnum, orbit_type, ifg_hash[0:4])) # create jobs for forward pair if pre_ref_pd in ('forward', 'both'): ifg_master_dt = all_dts[0] ifg_slave_dt = all_dts[-1] for swathnum in [1, 2, 3]: stitched_args.append( False if len(ref_ids) == 1 or len(matched_ids) == 1 else True) master_zip_urls.append([ ref_scene['pre_matches']['hits'][i] for i in matched_ids ]) master_orbit_urls.append( orbit_dict[matched_dt_orb]) slave_zip_urls.append( [grouped_refs['hits'][i] for i in ref_ids]) slave_orbit_urls.append(orbit_dict[ref_dt_orb]) swathnums.append(swathnum) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) ifg_hash = hashlib.md5( json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], context['azimuth_looks'], context['range_looks'], context['filter_strength'], context.get('dem_type', 'SRTM+v3'), ])).hexdigest() ifg_ids.append( id_tmpl.format('S', len(matched_ids), len(ref_ids), track, ifg_master_dt, ifg_slave_dt, swathnum, orbit_type, ifg_hash[0:4])) # generate jobs for post-reference pairs if ref_scene['post_matches'] is not None: if track in ref_scene['post_matches']['grouped']: matched_days = ref_scene['post_matches']['grouped'][track] for matched_day, matched_ids in matched_days.iteritems(): matched_dts = [] for i in matched_ids: matched_dts.extend( ref_scene['post_matches']['dates'][i]) #logger.info("post_matches matched_ids: %s" % matched_ids) #logger.info("post_matches matched_dts: %s" % matched_dts) all_dts = list(chain(ref_dts, matched_dts)) all_dts.sort() # set orbit urls and cache for matched dates matched_dt_orb = "%s_%s" % (matched_dts[0].isoformat(), matched_dts[-1].isoformat()) if matched_dt_orb not in orbit_dict: match = SLC_RE.search(matched_ids[0]) if not match: raise RuntimeError( "Failed to recognize SLC ID %s." % matched_ids[0]) mission = match.group('mission') orbit_dict[matched_dt_orb] = fetch( "%s.0" % matched_dts[0].isoformat(), "%s.0" % matched_dts[-1].isoformat(), mission=mission, dry_run=True) if orbit_dict[matched_dt_orb] is None: raise RuntimeError( "Failed to query for an orbit URL for track {} {} {}." .format(track, matched_dts[0], matched_dts[-1])) # get orbit type orbit_type = 'poeorb' for o in [ orbit_dict[ref_dt_orb], orbit_dict[matched_dt_orb] ]: if RESORB_RE.search(o): orbit_type = 'resorb' break # filter if we expect only precise orbits if precise_orbit_only and orbit_type == 'resorb': logger.info( "Precise orbit required. Filtering job configured with restituted orbit." ) else: # create jobs for backwards pair if post_ref_pd in ('backward', 'both'): ifg_master_dt = all_dts[-1] ifg_slave_dt = all_dts[0] for swathnum in [1, 2, 3]: stitched_args.append( False if len(ref_ids) == 1 or len(matched_ids) == 1 else True) master_zip_urls.append([ ref_scene['post_matches']['hits'][i] for i in matched_ids ]) master_orbit_urls.append( orbit_dict[matched_dt_orb]) slave_zip_urls.append( [grouped_refs['hits'][i] for i in ref_ids]) slave_orbit_urls.append(orbit_dict[ref_dt_orb]) swathnums.append(swathnum) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) ifg_hash = hashlib.md5( json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], context['azimuth_looks'], context['range_looks'], context['filter_strength'], context.get('dem_type', 'SRTM+v3'), ])).hexdigest() ifg_ids.append( id_tmpl.format('S', len(matched_ids), len(ref_ids), track, ifg_master_dt, ifg_slave_dt, swathnum, orbit_type, ifg_hash[0:4])) # create jobs for forward pair if post_ref_pd in ('forward', 'both'): ifg_master_dt = all_dts[0] ifg_slave_dt = all_dts[-1] for swathnum in [1, 2, 3]: stitched_args.append( False if len(ref_ids) == 1 or len(matched_ids) == 1 else True) master_zip_urls.append( [grouped_refs['hits'][i] for i in ref_ids]) master_orbit_urls.append( orbit_dict[ref_dt_orb]) slave_zip_urls.append([ ref_scene['post_matches']['hits'][i] for i in matched_ids ]) slave_orbit_urls.append( orbit_dict[matched_dt_orb]) swathnums.append(swathnum) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) ifg_hash = hashlib.md5( json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], #bboxes[-1], #auto_bboxes[-1], projects[-1], context['azimuth_looks'], context['range_looks'], context['filter_strength'], context.get('dem_type', 'SRTM+v3'), ])).hexdigest() ifg_ids.append( id_tmpl.format('M', len(ref_ids), len(matched_ids), track, ifg_master_dt, ifg_slave_dt, swathnum, orbit_type, ifg_hash[0:4])) return (projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes)
def get_topsapp_cfg(context_file, id_tmpl=IFG_ID_TMPL): """Return all possible topsApp configurations.""" # get context with open(context_file) as f: context = json.load(f) # get args project = context['project'] master_ids = [i.strip() for i in context['master_ids'].split()] slave_ids = [i.strip() for i in context['slave_ids'].split()] subswaths = [int(i.strip()) for i in context['subswaths'].split()] azimuth_looks = int(context['azimuth_looks']) range_looks = int(context['range_looks']) precise_orbit_only = get_bool_param(context, 'precise_orbit_only') # log inputs logger.info("project: {}".format(project)) logger.info("master_ids: {}".format(master_ids)) logger.info("slave_ids: {}".format(slave_ids)) logger.info("subswaths: {}".format(subswaths)) logger.info("azimuth_looks: {}".format(azimuth_looks)) logger.info("range_looks: {}".format(range_looks)) logger.info("precise_orbit_only: {}".format(precise_orbit_only)) # query docs uu = UU() logger.info("rest_url: {}".format(uu.rest_url)) logger.info("dav_url: {}".format(uu.dav_url)) logger.info("version: {}".format(uu.version)) logger.info("grq_index_prefix: {}".format(uu.grq_index_prefix)) # get normalized rest url rest_url = uu.rest_url[:-1] if uu.rest_url.endswith('/') else uu.rest_url # get index name and url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format(rest_url, uu.grq_index_prefix) logger.info("idx: {}".format(uu.grq_index_prefix)) logger.info("url: {}".format(url)) # get metadata master_md = { i:get_metadata(i, rest_url, url) for i in master_ids } #logger.info("master_md: {}".format(json.dumps(master_md, indent=2))) slave_md = { i:get_metadata(i, rest_url, url) for i in slave_ids } #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2))) # get tracks track = get_track(master_md) logger.info("master_track: {}".format(track)) slave_track = get_track(slave_md) logger.info("slave_track: {}".format(slave_track)) if track != slave_track: raise RuntimeError("Slave track {} doesn't match master track {}.".format(slave_track, track)) # get urls (prefer s3) master_urls = get_urls(master_md) logger.info("master_urls: {}".format(master_urls)) slave_urls = get_urls(slave_md) logger.info("slave_ids: {}".format(slave_urls)) # get orbits master_orbit_url = get_orbit(master_ids) logger.info("master_orbit_url: {}".format(master_orbit_url)) slave_orbit_url = get_orbit(slave_ids) logger.info("slave_orbit_url: {}".format(slave_orbit_url)) # get orbit type orbit_type = 'poeorb' for o in (master_orbit_url, slave_orbit_url): if RESORB_RE.search(o): orbit_type = 'resorb' break # fail if we expect only precise orbits if precise_orbit_only and orbit_type == 'resorb': raise RuntimeError("Precise orbit required.") # get ifg start and end dates ifg_master_dt, ifg_slave_dt = get_ifg_dates(master_ids, slave_ids) #submit jobs projects = [] stitched_args = [] ifg_ids = [] master_zip_urls = [] master_orbit_urls = [] slave_zip_urls = [] slave_orbit_urls = [] swathnums = [] bboxes = [] auto_bboxes = [] orbit_dict = {} # generate job configs bbox = [-90., 90., -180., 180.] auto_bbox = True for subswath in subswaths: stitched_args.append(False if len(master_ids) == 1 or len(slave_ids) == 1 else True) master_zip_urls.append(master_urls) master_orbit_urls.append(master_orbit_url) slave_zip_urls.append(slave_urls) slave_orbit_urls.append(slave_orbit_url) swathnums.append(subswath) bboxes.append(bbox) auto_bboxes.append(auto_bbox) projects.append(project) ifg_hash = hashlib.md5(json.dumps([ id_tmpl, stitched_args[-1], master_zip_urls[-1], master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], swathnums[-1], bboxes[-1], auto_bboxes[-1], projects[-1], azimuth_looks, range_looks, ])).hexdigest() ifg_ids.append(id_tmpl.format('M', len(master_ids), len(slave_ids), track, ifg_master_dt, ifg_slave_dt, subswath, orbit_type, ifg_hash[0:4])) return ( projects, stitched_args, auto_bboxes, ifg_ids, master_zip_urls, master_orbit_urls, slave_zip_urls, slave_orbit_urls, swathnums, bboxes )