def create_target_identification_nodes( bundle_db: BundleDB, target_identifications: List[TargetIdentification], reference_type: str, ) -> List[NodeBuilder]: """ Take in a list of TargetIdentification records (from db), build a target dictionary for each record and return a list of Target_Identification nodes. It will be inserted in XML when passing into make_label. """ reference_type_dict = { "data": "data_to_target", "collection": "collection_to_target", "bundle": "bundle_to_target", } target_identification_nodes: List[NodeBuilder] = [] for target in target_identifications: bundle_db.create_context_product( get_target_lidvid([target.type, target.name]), "target") target_dict: Dict[str, Any] = {} target_dict["name"] = target.name target_dict["type"] = target.type target_dict["alternate_designations"] = target.alternate_designations target_dict["description"] = target.description target_dict["lid"] = target.lid_reference target_dict["reference_type"] = reference_type_dict[reference_type] target_identification_nodes.append(get_target(target_dict)) return target_identification_nodes
def _fill_in_old_browse_collection( db: BundleDB, changes_dict: ChangesDict, bundle_lidvid: LIDVID, data_collection_lidvid: LIDVID, ) -> None: bundle_segment = bundle_lidvid.lid().parts()[0] collection_segment = data_collection_lidvid.lid().parts()[1] browse_collection_lid = data_collection_lidvid.lid().to_browse_lid() browse_collection_segment = browse_collection_lid.collection_id browse_collection_vid = data_collection_lidvid.vid() browse_collection_lidvid = LIDVID.create_from_lid_and_vid( browse_collection_lid, browse_collection_vid) changes_dict.set(browse_collection_lid, browse_collection_vid, False) db.create_bundle_collection_link(str(bundle_lidvid), str(browse_collection_lidvid)) try: PDS_LOGGER.open("Fill in old browse collection") PDS_LOGGER.log( "info", f"Created link and change for {browse_collection_lidvid}") for product in db.get_collection_products( str(browse_collection_lidvid)): product_lidvid = LIDVID(product.lidvid) changes_dict.set(product_lidvid.lid(), product_lidvid.vid(), False) PDS_LOGGER.log("info", f"Created link and change for {product_lidvid}") except Exception as e: PDS_LOGGER.exception(e) finally: PDS_LOGGER.close()
def make_schema_collection_label( bundle_db: BundleDB, info: Citation_Information, collection_lidvid: str, bundle_lidvid: str, verify: bool, mod_date: str, ) -> bytes: """ Create the label text for the schema collection having this LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ # TODO this is sloppy; is there a better way? products = bundle_db.get_schema_products() record_count = len(products) if record_count <= 0: raise ValueError(f"{collection_lidvid} has no schema products.") collection_lid = lidvid_to_lid(collection_lidvid) collection_vid = lidvid_to_vid(collection_lidvid) collection: Collection = bundle_db.get_collection(collection_lidvid) proposal_id = bundle_db.get_bundle(bundle_lidvid).proposal_id instruments = ",".join(bundle_db.get_instruments_of_the_bundle()).upper() title: NodeBuilder = make_schema_collection_title( { "instrument": instruments, "proposal_id": str(proposal_id), } ) inventory_name = get_collection_inventory_name(bundle_db, collection_lidvid) try: label = ( make_label( { "collection_lid": collection_lid, "collection_vid": collection_vid, "record_count": record_count, "title": title, "mod_date": mod_date, "proposal_id": str(proposal_id), "Citation_Information": make_citation_information(info), "inventory_name": inventory_name, "Context_Area": combine_nodes_into_fragment([]), "Reference_List": combine_nodes_into_fragment([]), "collection_type": "Schema", } ) .toxml() .encode() ) except Exception as e: raise LabelError(collection_lidvid) from e return pretty_and_verify(label, verify)
def _populate_bundle(changes_dict: ChangesDict, db: BundleDB) -> LIDVID: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_bundle_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) db.create_bundle(str(lidvid)) # there's only one, so return it return lidvid raise RuntimeError("No changed bundle LID in changes_dict.")
def _populate_schema_collection(db: BundleDB, bundle_lidvid: str) -> None: # TODO We're assuming here that there will only ever be one schema # collection. I'm not sure that's true. lid = LIDVID(bundle_lidvid).lid().extend_lid("schema") new_lidvid = LIDVID.create_from_lid_and_vid(lid, VID("1.0")) collection_lidvid = str(new_lidvid) db.create_schema_collection(collection_lidvid, bundle_lidvid) # TODO Hardcoded here. Is this what we want to do? for lidvid in [DISP_LIDVID, HST_LIDVID, PDS4_LIDVID]: db.create_schema_product(lidvid)
def populate_database_from_browse_file( db: BundleDB, browse_product_lidvid: str, fits_product_lidvid: str, collection_lidvid: str, os_filepath: str, basename: str, byte_size: int, ) -> None: db.create_browse_product(browse_product_lidvid, fits_product_lidvid, collection_lidvid) db.create_browse_file(os_filepath, basename, browse_product_lidvid, byte_size)
def make_document_product_label( bundle_db: BundleDB, info: Citation_Information, document_product_lidvid: str, bundle_lidvid: str, verify: bool, publication_date: Optional[str] = None, ) -> bytes: """ Create the label text for the document product in the bundle having this :class:`~pdart.pds4.lidvid` using the database connection. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ bundle = bundle_db.get_bundle(bundle_lidvid) proposal_id = bundle.proposal_id investigation_lidvid = ( f"urn:nasa:pds:context:investigation:individual.hst_{proposal_id:05}::1.0" ) title = f"Summary of the observation plan for HST proposal {proposal_id}" product_lid = lidvid_to_lid(document_product_lidvid) product_vid = lidvid_to_vid(document_product_lidvid) publication_date = publication_date or date.today().isoformat() product_files: List[File] = bundle_db.get_product_files(document_product_lidvid) document_file_basenames = [file.basename for file in product_files] try: label = ( make_label( { "investigation_lidvid": investigation_lidvid, "product_lid": product_lid, "product_vid": product_vid, "title": title, "publication_date": publication_date, "Citation_Information": make_doc_citation_information(info), "Document_Edition": make_document_edition( "0.0", document_file_basenames ), } ) .toxml() .encode() ) except Exception as e: raise LabelError(document_product_lidvid) from e return pretty_and_verify(label, verify)
def make_browse_product_label( bundle_db: BundleDB, browse_collection_lidvid: str, browse_product_lidvid: str, browse_file_basename: str, bundle_lidvid: str, verify: bool, ) -> bytes: """ Create the label text for the browse product having the given LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ product: Product = bundle_db.get_product(browse_product_lidvid) if not isinstance(product, BrowseProduct): raise TypeError(f"{product} is not a BrowseProduct.") browse_product: BrowseProduct = product fits_product_lidvid = browse_product.fits_product_lidvid file: File = bundle_db.get_file(browse_file_basename, browse_product_lidvid) if not isinstance(file, BrowseFile): raise TypeError(f"{file} is not a BrowseFile.") browse_file: BrowseFile = file collection: Collection = bundle_db.get_collection(browse_collection_lidvid) if not isinstance(collection, OtherCollection): raise TypeError(f"{collection} is not a OtherCollection.") browse_collection: OtherCollection = collection bundle = bundle_db.get_bundle(bundle_lidvid) try: label = (make_label({ "proposal_id": bundle.proposal_id, "suffix": browse_collection.suffix, "browse_lid": lidvid_to_lid(browse_product_lidvid), "browse_vid": lidvid_to_vid(browse_product_lidvid), "data_lidvid": fits_product_lidvid, "browse_file_name": browse_file_basename, "object_length": browse_file.byte_size, }).toxml().encode()) except Exception as e: raise LabelError(browse_product_lidvid, browse_file_basename) from e return pretty_and_verify(label, verify)
def make_collection_label( bundle_db: BundleDB, info: Citation_Information, collection_lidvid: str, bundle_lidvid: str, verify: bool, use_mod_date_for_testing: bool = False, ) -> bytes: """ Create the label text for the collection having this LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ collection = bundle_db.get_collection(collection_lidvid) # If a label is created for testing purpose to compare with pre-made XML # we will use MOD_DATE_FOR_TESTESING as the modification date. if not use_mod_date_for_testing: # Get the date when the label is created mod_date = get_current_date() else: mod_date = MOD_DATE_FOR_TESTESING return switch_on_collection_subtype( collection, make_context_collection_label, make_other_collection_label, make_schema_collection_label, make_other_collection_label, )(bundle_db, info, collection_lidvid, bundle_lidvid, verify, mod_date)
def get_collection_inventory_name(bundle_db: BundleDB, collection_lidvid: str) -> str: # We have to jump through some hoops to apply # switch_on_collection_type(). def get_context_collection_inventory_name(collection: Collection) -> str: return "collection_context.csv" def get_document_collection_inventory_name(collection: Collection) -> str: return "collection.csv" def get_schema_collection_inventory_name(collection: Collection) -> str: return "collection_schema.csv" def get_other_collection_inventory_name(collection: Collection) -> str: collection_obj = cast(OtherCollection, collection) prefix = collection_obj.prefix instrument = collection_obj.instrument suffix = collection_obj.suffix return f"collection_{prefix}_{instrument}_{suffix}.csv" collection: Collection = bundle_db.get_collection(collection_lidvid) return switch_on_collection_subtype( collection, get_context_collection_inventory_name, get_document_collection_inventory_name, get_schema_collection_inventory_name, get_other_collection_inventory_name, )(collection)
def make_transfer_manifest(bundle_db: BundleDB, bundle_lidvid: str, lidvid_to_dirpath: _LTD) -> str: bundle = bundle_db.get_bundle(bundle_lidvid) pairs = [_make_bundle_pair(bundle, lidvid_to_dirpath)] for collection in bundle_db.get_bundle_collections(str(bundle.lidvid)): pairs.append( _make_collection_pair(bundle_db, collection, lidvid_to_dirpath)) for product in bundle_db.get_collection_products(str( collection.lidvid)): pairs.append(_make_product_pair(product, lidvid_to_dirpath)) sorted_pairs = sorted(pairs) max_width = max(len(lidvid) for (lidvid, _filepath) in sorted_pairs) return "".join([ # TODO rewrite this in f-string notation "%-*s %s\n" % (max_width, lidvid, str(filepath)) for (lidvid, filepath) in sorted_pairs ])
def _populate_target_identification(changes_dict: ChangesDict, db: BundleDB, sv_deltas: COWFS) -> None: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_product_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) product_path = lid_to_dirpath(lidvid.lid()) # Get a list of SHM/SPT/SHP fits files fits_files = [ fits_file for fits_file in sv_deltas.listdir(product_path) if (fs.path.splitext(fits_file)[1].lower() == ".fits" and has_suffix_shm_spt_shf( fs.path.splitext(fits_file)[0].lower())) ] # Pass the path of SHM/SPT/SHP fits files to create a record in # target identification table for fits_file in fits_files: fits_file_path = fs.path.join(product_path, fits_file) fits_os_path = sv_deltas.getsyspath(fits_file_path) db.create_target_identification(fits_os_path)
def populate_database_from_fits_file( db: BundleDB, os_filepath: str, fits_product_lidvid: str ) -> None: file_basename = basename(os_filepath) try: fits = astropy.io.fits.open(os_filepath) try: db.create_fits_file( os_filepath, file_basename, fits_product_lidvid, len(fits) ) _populate_hdus_associations_and_cards( db, fits, file_basename, fits_product_lidvid ) finally: fits.close() except OSError as e: db.create_bad_fits_file(os_filepath, file_basename, fits_product_lidvid, str(e))
def make_document_collection_inventory(bundle_db: BundleDB, collection_lidvid: str) -> bytes: """ Create the inventory text for the collection having this LIDVID using the bundle database. """ products = bundle_db.get_collection_products(collection_lidvid) inventory_lines: List[str] = [ f"P,{product.lidvid}\r\n" for product in products ] # Include handbooks in the document collection csv inst_list = bundle_db.get_instruments_of_the_bundle() for inst in inst_list: data_handbook_lid = f"S,urn:nasa:pds:hst-support:document:{inst}-dhb\r\n" inst_handbook_lid = f"S,urn:nasa:pds:hst-support:document:{inst}-ihb\r\n" inventory_lines.append(data_handbook_lid) inventory_lines.append(inst_handbook_lid) res: str = "".join(inventory_lines) return res.encode()
def _directory_siblings(working_dir: str, bundle_db: BundleDB, product_lidvid: str) -> List[str]: # Look in the mastDownload directory and search for the file with # the product_lidvid's basename. Then return all its siblings' # basenames. for dirpath, dirnames, filenames in os.walk( os.path.join(working_dir, "mastDownload")): basename = bundle_db.get_product_file(product_lidvid).basename if basename in filenames: return sorted(filenames) return []
def make_schema_collection_inventory(bundle_db: BundleDB, collection_lidvid: str) -> bytes: """ Create the inventory text for the collection having this LIDVID using the bundle database. """ products = bundle_db.get_schema_products() inventory_lines: List[str] = [ f"S,{product.lidvid}\r\n" for product in products ] res: str = "".join(inventory_lines) return res.encode()
def make_collection_inventory(bundle_db: BundleDB, collection_lidvid: str) -> bytes: """ Create the inventory text for the collection having this LIDVID using the bundle database. """ collection = bundle_db.get_collection(collection_lidvid) return switch_on_collection_subtype( collection, make_context_collection_inventory, make_document_collection_inventory, make_schema_collection_inventory, make_other_collection_inventory, )(bundle_db, collection_lidvid)
def make_checksum_manifest( bundle_db: BundleDB, bundle_lidvid: str, lidvid_to_dirpath: _LTD ) -> str: files: List[File] = [] bundle = bundle_db.get_bundle(bundle_lidvid) bundle_lidvid = str(bundle.lidvid) label_pairs = [ make_bundle_label_pair( bundle_db.get_bundle_label(bundle_lidvid), lidvid_to_dirpath ) ] for collection in bundle_db.get_bundle_collections(bundle_lidvid): collection_lidvid = str(collection.lidvid) label_pairs.append( make_collection_label_pair( bundle_db.get_collection_label(collection_lidvid), lidvid_to_dirpath ) ) label_pairs.append( make_collection_inventory_pair( bundle_db.get_collection_inventory(collection_lidvid), lidvid_to_dirpath, ) ) for product in bundle_db.get_collection_products(collection_lidvid): product_lidvid = str(product.lidvid) label_pairs.append( make_product_label_pair( bundle_db.get_product_label(product_lidvid), lidvid_to_dirpath ) ) files.extend(bundle_db.get_product_files(product_lidvid)) for product_label in bundle_db.get_context_product_labels(): label_pairs.append(make_context_product_pair(product_label, lidvid_to_dirpath)) file_pairs = [make_checksum_pair(file, lidvid_to_dirpath) for file in files] sorted_pairs = sorted(file_pairs + label_pairs) return "".join(f"{hash} {path}\n" for (path, hash) in sorted_pairs)
def _populate_products(changes_dict: ChangesDict, db: BundleDB, sv_deltas: COWFS) -> None: for lid, (vid, changed) in changes_dict.items(): if lid.is_product_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) collection_lidvid = changes_dict.parent_lidvid(lidvid) if changed: product_path = lid_to_dirpath(lidvid.lid()) if collection_lidvid.lid().collection_id == "document": db.create_document_product(str(lidvid), str(collection_lidvid)) doc_files = [ doc_file for doc_file in sv_deltas.listdir(product_path) if (fs.path.splitext(doc_file)[1].lower() in DOCUMENT_SUFFIXES) ] for doc_file in doc_files: sys_filepath = sv_deltas.getsyspath( fs.path.join(product_path, doc_file)) db.create_document_file(sys_filepath, doc_file, str(lidvid)) else: db.create_fits_product(str(lidvid), str(collection_lidvid)) fits_files = [ fits_file for fits_file in sv_deltas.listdir(product_path) if fs.path.splitext(fits_file)[1].lower() == ".fits" ] for fits_file in fits_files: fits_file_path = fs.path.join(product_path, fits_file) fits_os_path = sv_deltas.getsyspath(fits_file_path) populate_database_from_fits_file( db, fits_os_path, str(lidvid)) else: if changes_dict.changed(collection_lidvid.lid()): db.create_collection_product_link(str(collection_lidvid), str(lidvid))
def make_doc_citation_information2(bundle_db: BundleDB, bundle_lid: str, proposal_id: int) -> NodeBuilder: """ Create a ``<Citation_Information />`` element for the proposal ID. """ proposal_info = bundle_db.get_proposal_info(bundle_lid) return _citation_information_template({ "author_list": proposal_info.author_list, "publication_year": proposal_info.publication_year, "description": _make_proposal_description( proposal_id, proposal_info.proposal_title, proposal_info.pi_name, proposal_info.proposal_year, ), })
def _populate_collections(changes_dict: ChangesDict, db: BundleDB) -> None: for lid, (vid, changed) in changes_dict.items(): if lid.is_collection_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) bundle_lidvid = changes_dict.parent_lidvid(lidvid) if changed: if lid.collection_id == "document": db.create_document_collection(str(lidvid), str(bundle_lidvid)) elif lid.collection_id == "schema": # it's created separately _populate_schema_collection(db, str(bundle_lidvid)) else: db.create_other_collection(str(lidvid), str(bundle_lidvid)) else: if changes_dict.changed(bundle_lidvid.lid()): db.create_bundle_collection_link(str(bundle_lidvid), str(lidvid))
def get_file_offsets( bundle_db: BundleDB, fits_product_lidvid: str ) -> List[Tuple[int, int, int, int]]: return bundle_db.get_file_offsets(fits_product_lidvid)
def get_card_dictionaries( bundle_db: BundleDB, fits_product_lidvid: str, file_basename: str ) -> List[Dict[str, Any]]: return bundle_db.get_card_dictionaries(fits_product_lidvid, file_basename)
def make_fits_product_label( working_dir: str, bundle_db: BundleDB, collection_lidvid: str, product_lidvid: str, bundle_lidvid: str, file_basename: str, verify: bool, use_mod_date_for_testing: bool = False, ) -> bytes: try: product = bundle_db.get_product(product_lidvid) collection = bundle_db.get_collection(collection_lidvid) if not isinstance(collection, OtherCollection): raise TypeError(f"{collection} is not OtherCollection.") instrument = collection.instrument suffix = collection.suffix # If a label is created for testing purpose to compare with pre-made XML # we will use MOD_DATE_FOR_TESTESING as the modification date. if not use_mod_date_for_testing: # Get the date when the label is created mod_date = get_current_date() else: mod_date = MOD_DATE_FOR_TESTESING card_dicts = bundle_db.get_card_dictionaries(product_lidvid, file_basename) lookup = DictLookup(file_basename, card_dicts) siblings = _directory_siblings(working_dir, bundle_db, product_lidvid) hdu_lookups = _find_RAWish_lookups(bundle_db, product_lidvid, file_basename, siblings) shm_lookup = _find_SHMish_lookup(bundle_db, product_lidvid, file_basename, siblings) start_date_time, stop_date_time = get_start_stop_date_times( hdu_lookups, shm_lookup) exposure_duration = get_exposure_duration(hdu_lookups, shm_lookup) start_stop_times = { "start_date_time": start_date_time, "stop_date_time": stop_date_time, "exposure_duration": exposure_duration, } # Store start/stop time for each fits_product in fits_products table. # The min/max will be pulled out for roll-up in data collection/bundle. bundle_db.update_fits_product_time(product_lidvid, start_date_time, stop_date_time) hst_parameters = get_hst_parameters(hdu_lookups, shm_lookup) bundle = bundle_db.get_bundle(bundle_lidvid) proposal_id = bundle.proposal_id investigation_area_name = mk_Investigation_Area_name(proposal_id) investigation_area_lidvid = mk_Investigation_Area_lidvid(proposal_id) bundle_db.create_context_product(investigation_area_lidvid, "investigation") bundle_db.create_context_product(instrument_host_lidvid(), "instrument_host") bundle_db.create_context_product(observing_system_lidvid(instrument), "instrument") # Fetch target identifications from db target_id = shm_lookup["TARG_ID"] target_identifications = bundle_db.get_target_identifications_based_on_id( target_id) # At this stage, target identifications should be in the db if len(target_identifications) == 0: raise ValueError("Target identification is not stored in db.") target_identification_nodes: List[NodeBuilder] = [] target_identification_nodes = create_target_identification_nodes( bundle_db, target_identifications, "data") # Get wavelength instrument_id = get_instrument_id(hdu_lookups, shm_lookup) detector_ids = get_detector_ids(hdu_lookups, shm_lookup) filter_name = get_filter_name(hdu_lookups, shm_lookup) wavelength_range = wavelength_ranges(instrument_id, detector_ids, filter_name) bundle_db.update_wavelength_range(product_lidvid, wavelength_range) # Get title channel_id = get_channel_id(hdu_lookups, shm_lookup) try: titles = get_titles_format(instrument_id, channel_id, suffix) product_title = titles[0] + "." product_title = product_title.format(I=instrument_id + "/" + channel_id, F=file_basename, P=proposal_id) collection_title = titles[1] + "." collection_title = collection_title.format(I=instrument_id + "/" + channel_id, F=file_basename, P=proposal_id) # save data/misc collection title to OtherCollection table bundle_db.update_fits_product_collection_title( collection_lidvid, collection_title) except KeyError: # If product_title doesn't exist in SUFFIX_TITLES, we use the # following text as the product_title. product_title = ( f"{instrument_id} data file {file_basename} " + f"obtained by the HST Observing Program {proposal_id}.") # Dictionary used for primary result summary processing_level = get_processing_level(suffix, instrument_id, channel_id) primary_result_dict: Dict[str, Any] = {} primary_result_dict["processing_level"] = processing_level primary_result_dict["description"] = product_title primary_result_dict["wavelength_range"] = wavelength_range # Dictionary passed into templates. Use the same data dictionary for # either data label template or misc label template data_dict = { "lid": lidvid_to_lid(product_lidvid), "vid": lidvid_to_vid(product_lidvid), "title": product_title, "mod_date": mod_date, "file_name": file_basename, "file_contents": get_file_contents(bundle_db, card_dicts, instrument, product_lidvid), "Investigation_Area": investigation_area(investigation_area_name, investigation_area_lidvid, "data"), "Observing_System": observing_system(instrument), "Time_Coordinates": get_time_coordinates(start_stop_times), "Target_Identification": combine_nodes_into_fragment(target_identification_nodes), "HST": hst_parameters, "Primary_Result_Summary": primary_result_summary(primary_result_dict), "Reference_List": make_document_reference_list([instrument], "data"), } # Pass the data_dict to either data label or misc label based on # collection_type collection_type = get_collection_type(suffix, instrument_id, channel_id) if collection_type == "data": label = make_data_label(data_dict).toxml().encode() elif collection_type == "miscellaneous": label = make_misc_label(data_dict).toxml().encode() except AssertionError: raise AssertionError( f"{product_lidvid} has no target identifications stored in DB.") except Exception as e: print(str(e)) raise LabelError(product_lidvid, file_basename, (lookup, hdu_lookups[0], shm_lookup)) from e return pretty_and_verify(label, verify)
def _build_browse_collection( db: BundleDB, changes_dict: ChangesDict, browse_deltas: COWFS, bundle_lidvid: LIDVID, data_collection_lidvid: LIDVID, bundle_path: str, ) -> None: bundle_segment = bundle_lidvid.lid().parts()[0] collection_segment = data_collection_lidvid.lid().parts()[1] browse_collection_lid = data_collection_lidvid.lid().to_browse_lid() collection_path = f"{bundle_path}{collection_segment}$/" browse_collection_segment = browse_collection_lid.collection_id browse_collection_path = f"{bundle_path}{browse_collection_segment}$/" browse_collection_vid = data_collection_lidvid.vid() browse_collection_lidvid = LIDVID.create_from_lid_and_vid( browse_collection_lid, browse_collection_vid) changes_dict.set(browse_collection_lid, browse_collection_vid, True) browse_deltas.makedirs(browse_collection_path, recreate=True) db.create_other_collection(str(browse_collection_lidvid), str(bundle_lidvid)) db.create_bundle_collection_link(str(bundle_lidvid), str(browse_collection_lidvid)) product_segments = [ str(prod[:-1]) for prod in browse_deltas.listdir(collection_path) if "$" in prod ] for product_segment in product_segments: # These product_segments are from the data_collection product_lid = LID.create_from_parts( [bundle_segment, collection_segment, product_segment]) product_vid = changes_dict.vid(product_lid) product_path = f"{collection_path}{product_segment}$/" browse_product_path = f"{browse_collection_path}{product_segment}$/" browse_product_lidvid = _extend_lidvid(browse_collection_lid, product_vid, product_segment) if changes_dict.changed(product_lid): fits_product_lidvid = _extend_lidvid( data_collection_lidvid.lid(), data_collection_lidvid.vid(), product_segment, ) bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), True) browse_deltas.makedirs(browse_product_path, recreate=True) db.create_browse_product( browse_product_lidvid, fits_product_lidvid, str(browse_collection_lidvid), ) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid) for fits_file in browse_deltas.listdir(product_path): fits_filepath = fs.path.join(product_path, fits_file) fits_os_filepath = browse_deltas.getsyspath(fits_filepath) browse_file = fs.path.splitext(fits_file)[0] + ".jpg" browse_filepath = fs.path.join(browse_product_path, browse_file) # In a COWFS, a directory does not have a # syspath, only files. So we write a stub # file into the directory, find its syspath # and its directory's syspath. Then we remove # the stub file. browse_deltas.touch(browse_filepath) browse_product_os_filepath = browse_deltas.getsyspath( browse_filepath) browse_deltas.remove(browse_filepath) browse_product_os_dirpath = fs.path.dirname( browse_product_os_filepath) # Picmaker expects a list of strings. If you give it # str, it'll index into it and complain about '/' # not being a file. So don't do that! try: picmaker.ImagesToPics( [str(fits_os_filepath)], browse_product_os_dirpath, filter="None", percentiles=(1, 99), ) except IndexError as e: tb = traceback.format_exc() message = f"File {fits_file}: {e}\n{tb}" raise Exception(message) browse_os_filepath = fs.path.join(browse_product_os_dirpath, browse_file) size = os.stat(browse_os_filepath).st_size db.create_browse_file(browse_os_filepath, browse_file, browse_product_lidvid, size) else: bpl = LIDVID(browse_product_lidvid) changes_dict.set(bpl.lid(), bpl.vid(), False) db.create_collection_product_link(str(browse_collection_lidvid), browse_product_lidvid)
def make_other_collection_label( bundle_db: BundleDB, info: Citation_Information, collection_lidvid: str, bundle_lidvid: str, verify: bool, mod_date: str, ) -> bytes: """ Create the label text for the document, browse, and data collection having this LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ # TODO this is sloppy; is there a better way? products = bundle_db.get_collection_products(collection_lidvid) record_count = len(products) if record_count <= 0: raise ValueError(f"{collection_lidvid} has no products.") collection_lid = lidvid_to_lid(collection_lidvid) collection_vid = lidvid_to_vid(collection_lidvid) collection: Collection = bundle_db.get_collection(collection_lidvid) proposal_id = bundle_db.get_bundle(bundle_lidvid).proposal_id instruments = ",".join(bundle_db.get_instruments_of_the_bundle()).upper() def make_ctxt_coll_title(_coll: Collection) -> NodeBuilder: return make_context_collection_title( { "instrument": instruments, "proposal_id": str(proposal_id), } ) def make_doc_coll_title(_coll: Collection) -> NodeBuilder: return make_document_collection_title( { "instrument": instruments, "proposal_id": str(proposal_id), } ) def make_sch_coll_title(_coll: Collection) -> NodeBuilder: return make_schema_collection_title( { "instrument": instruments, "proposal_id": str(proposal_id), } ) def make_other_coll_title(coll: Collection) -> NodeBuilder: other_collection = cast(OtherCollection, coll) if other_collection.prefix == "browse": collection_title = ( f"{other_collection.prefix.capitalize()} " + f"collection of {other_collection.instrument.upper()} " + f"observations obtained from HST Observing Program {proposal_id}." ) else: # Get the data/misc collection title from db. collection_title = str(other_collection.title) return make_other_collection_title({"collection_title": collection_title}) title: NodeBuilder = switch_on_collection_subtype( collection, make_ctxt_coll_title, make_doc_coll_title, make_sch_coll_title, make_other_coll_title, )(collection) inventory_name = get_collection_inventory_name(bundle_db, collection_lidvid) # Properly assign collection type for Document, Browse, or Data collection. # Context node only exists in Data collection label. # Reference_List only exists in Data collection label. context_node: List[NodeBuilder] = [] reference_list_node: List[NodeBuilder] = [] collection_type: str = "" type_name = type(collection).__name__ if type_name == "DocumentCollection": collection_type = "Document" # For document collection, we need to add all handbooks in the csv but # we won't create the label for it. inst_list = bundle_db.get_instruments_of_the_bundle() record_count += 2 * len(inst_list) elif type_name == "OtherCollection": collection_type = cast(OtherCollection, collection).prefix.capitalize() suffix = cast(OtherCollection, collection).suffix instrument = cast(OtherCollection, collection).instrument # Roll-up (Context node) only exists in data collection if collection_type == "Data": # Get min start_time and max stop_time start_time, stop_time = bundle_db.get_roll_up_time_from_db(suffix) # Make sure start/stop time exists in db. if start_time is None: raise ValueError("Start time is not stored in FitsProduct table.") if stop_time is None: raise ValueError("Stop time is not stored in FitsProduct table.") start_stop_times = { "start_date_time": start_time, "stop_date_time": stop_time, } time_coordinates_node = get_time_coordinates(start_stop_times) # Dictionary used for primary result summary primary_result_dict: Dict[str, Any] = {} # Check if it's raw or calibrated image, we will update this later processing_level = get_processing_level( suffix=suffix, instrument_id=instrument.upper() ) primary_result_dict["processing_level"] = processing_level p_title = bundle_db.get_fits_product_collection_title(collection_lidvid) primary_result_dict["description"] = p_title # Get unique wavelength names for roll-up in data collection wavelength_range = bundle_db.get_wavelength_range_from_db(suffix) primary_result_dict["wavelength_range"] = wavelength_range primary_result_summary_node = primary_result_summary(primary_result_dict) # Get the list of target identifications nodes for the collection target_identifications = bundle_db.get_all_target_identification() target_identification_nodes: List[NodeBuilder] = [] target_identification_nodes = create_target_identification_nodes( bundle_db, target_identifications, "collection" ) # Get the investigation node for the collection investigation_area_name = mk_Investigation_Area_name(proposal_id) investigation_area_lidvid = mk_Investigation_Area_lidvid(proposal_id) investigation_area_node = investigation_area( investigation_area_name, investigation_area_lidvid, "collection" ) # Get the observing system node for the collection observing_system_node = observing_system(instrument) context_node = [ make_collection_context_node( time_coordinates_node, primary_result_summary_node, investigation_area_node, observing_system_node, target_identification_nodes, ) ] # document reference list only exists in data collection reference_list_node = [ make_document_reference_list([instrument], "collection") ] try: label = ( make_label( { "collection_lid": collection_lid, "collection_vid": collection_vid, "record_count": record_count, "title": title, "mod_date": mod_date, "proposal_id": str(proposal_id), "Citation_Information": make_citation_information(info), "inventory_name": inventory_name, "Context_Area": combine_nodes_into_fragment(context_node), "collection_type": collection_type, "Reference_List": combine_nodes_into_fragment(reference_list_node), } ) .toxml() .encode() ) except Exception as e: raise LabelError(collection_lidvid) from e return pretty_and_verify(label, verify)
def make_investigation_label( bundle_db: BundleDB, bundle_lidvid: str, info: Citation_Information, verify: bool, use_mod_date_for_testing: bool = False, ) -> bytes: """ Create the label text for the context investigation having this LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ bundle = bundle_db.get_bundle(bundle_lidvid) proposal_id = bundle.proposal_id # Get the bundle title from part of CitationInformation description title = ( info.title + ", HST Cycle " + str(info.cycle) + " Program " + str(info.propno) + ", " + info.publication_year + "." ) investigation_lid = mk_Investigation_Area_lid(proposal_id) investigation_lidvid = mk_Investigation_Area_lidvid(proposal_id) # Get min start_time and max stop_time start_time, stop_time = bundle_db.get_roll_up_time_from_db() # Make sure start/stop time exists in db. if start_time is None: raise ValueError("Start time is not stored in FitsProduct table.") if stop_time is None: raise ValueError("Stop time is not stored in FitsProduct table.") start_date = date_time_to_date(start_time) stop_date = date_time_to_date(stop_time) # internal_reference_nodes: List[NodeBuilder] = [make_alias(alias) for alias in alias_list] context_products = bundle_db.get_reference_context_products(investigation_lidvid) internal_reference_nodes: List[NodeBuilder] = [] for product in context_products: ref_lid = lidvid_to_lid(product.lidvid) ref_type = f"investigation_to_{product.ref_type}" ref_node = make_internal_ref(ref_lid, ref_type) internal_reference_nodes.append(ref_node) description = info.abstract_formatted(indent=8) # type: ignore if len(description) != 0: description = "\n".join(description) else: description = " " * 8 + "None" description_nodes: List[NodeBuilder] = [make_description(description)] if not use_mod_date_for_testing: # Get the date when the label is created mod_date = get_current_date() else: mod_date = MOD_DATE_FOR_TESTESING try: label = ( make_label( { "investigation_lid": investigation_lid, "bundle_vid": lidvid_to_vid(bundle.lidvid), "title": title, "mod_date": mod_date, "start_date": start_date, "stop_date": stop_date, "internal_reference": combine_nodes_into_fragment( internal_reference_nodes ), "description": combine_nodes_into_fragment(description_nodes), } ) .toxml() .encode() ) except Exception as e: raise LabelError(investigation_lid) from e return pretty_and_verify(label, verify)
def _populate_citation_info(changes_dict: ChangesDict, db: BundleDB, info_param: Tuple) -> None: for lid, (vid, changed) in changes_dict.items(): if changed and lid.is_bundle_lid(): lidvid = LIDVID.create_from_lid_and_vid(lid, vid) db.create_citation(str(lidvid), info_param)
def make_bundle_label( bundle_db: BundleDB, bundle_lidvid: str, info: Citation_Information, verify: bool, use_mod_date_for_testing: bool = False, ) -> bytes: """ Create the label text for the bundle in the bundle database using the database connection. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ bundle = bundle_db.get_bundle(bundle_lidvid) proposal_id = bundle.proposal_id def get_ref_type(collection: Collection) -> str: ref_type = switch_on_collection_subtype( collection, "bundle_has_context_collection", "bundle_has_document_collection", "bundle_has_schema_collection", "bundle_has_other_collection", ) if ref_type == "bundle_has_other_collection": collection_type = cast(OtherCollection, collection).prefix ref_type = f"bundle_has_{collection_type}_collection" return ref_type reduced_collections = [ make_bundle_entry_member({ "collection_lidvid": collection.lidvid, "ref_type": get_ref_type(collection), }) for collection in bundle_db.get_bundle_collections(bundle.lidvid) ] # Get the bundle title from part of CitationInformation description title = (info.title + ", HST Cycle " + str(info.cycle) + " Program " + str(info.propno) + ", " + info.publication_year + ".") # Get the list of target identifications nodes for the collection target_identifications = bundle_db.get_all_target_identification() target_identification_nodes: List[NodeBuilder] = [] target_identification_nodes = create_target_identification_nodes( bundle_db, target_identifications, "bundle") # Get the investigation node for the collection investigation_area_name = mk_Investigation_Area_name(proposal_id) investigation_area_lidvid = mk_Investigation_Area_lidvid(proposal_id) investigation_area_node = investigation_area(investigation_area_name, investigation_area_lidvid, "bundle") # Get min start_time and max stop_time start_time, stop_time = bundle_db.get_roll_up_time_from_db() # Make sure start/stop time exists in db. if start_time is None: raise ValueError("Start time is not stored in FitsProduct table.") if stop_time is None: raise ValueError("Stop time is not stored in FitsProduct table.") start_stop_times = { "start_date_time": start_time, "stop_date_time": stop_time, } time_coordinates_node = get_time_coordinates(start_stop_times) # Dictionary used for primary result summary primary_result_dict: Dict[str, Any] = {} # Put dummy value in processing level, wait for update. primary_result_dict["processing_level"] = "Raw" instruments_list = bundle_db.get_instruments_of_the_bundle() instruments = ", ".join(instruments_list).upper() p_title = (f"{instruments} observations obtained by the HST " + f"Observing Program {proposal_id}.") primary_result_dict["description"] = p_title # Get unique wavelength names for roll-up in bundle wavelength_range = bundle_db.get_wavelength_range_from_db() primary_result_dict["wavelength_range"] = wavelength_range primary_result_summary_node = primary_result_summary(primary_result_dict) # Get the observing system node for the bundle observing_system_nodes: List[NodeBuilder] = [ observing_system(instrument) for instrument in instruments_list ] context_node: List[NodeBuilder] = [] context_node = [ make_bundle_context_node( time_coordinates_node, primary_result_summary_node, investigation_area_node, observing_system_nodes, target_identification_nodes, ) ] if not use_mod_date_for_testing: # Get the date when the label is created mod_date = get_current_date() else: mod_date = MOD_DATE_FOR_TESTESING try: label = (make_label({ "bundle_lid": lidvid_to_lid(bundle.lidvid), "bundle_vid": lidvid_to_vid(bundle.lidvid), "proposal_id": str(proposal_id), "title": title, "Citation_Information": make_citation_information(info, is_for_bundle=True), "mod_date": mod_date, "Bundle_Member_Entries": combine_nodes_into_fragment(reduced_collections), "Context_Area": combine_nodes_into_fragment(context_node), "Reference_List": make_document_reference_list(instruments_list, "bundle"), }).toxml().encode()) except Exception as e: raise LabelError(bundle.lidvid) from e if label[:6] != b"<?xml ": raise ValueError("Bundle label is not XML.") return pretty_and_verify(label, verify)
def make_context_target_label( bundle_db: BundleDB, target: str, verify: bool, use_mod_date_for_testing: bool = False, ) -> bytes: """ Create the label text for the context target having this LIDVID using the bundle database. If verify is True, verify the label against its XML and Schematron schemas. Raise an exception if either fails. """ target_lid = f"urn:nasa:pds:context:target:{target}" target_lidvid = f"{target_lid}::1.0" target_identification = bundle_db.get_target_identification_based_on_lid( target_lid) bundle_db.create_context_product( get_target_lidvid( [target_identification.type, target_identification.name]), "target", ) alias = str(target_identification.alternate_designations) if len(alias) != 0: alias_list = alias.split("\n") alias_nodes: List[NodeBuilder] = [ make_alias(alias) for alias in alias_list ] target_description = str(target_identification.description) if len(target_description) != 0: # properly align multi line textnodes with 8 spaces target_description = " " * 8 + target_description target_description = target_description.replace("\n", "\n" + " " * 8) else: target_description = " " * 8 + "None" description_nodes: List[NodeBuilder] = [ make_description(target_description) ] if not use_mod_date_for_testing: # Get the date when the label is created mod_date = get_current_date() else: mod_date = MOD_DATE_FOR_TESTESING try: label = (make_label({ "target_lid": target_lid, "target_vid": "1.0", "title": target_identification.name, "alias": combine_nodes_into_fragment(alias_nodes), "name": target_identification.name, "type": target_identification.type, "description": combine_nodes_into_fragment(description_nodes), "mod_date": mod_date, }).toxml().encode()) except Exception as e: raise LabelError(target_lidvid) from e return pretty_and_verify(label, verify)