def test_range(): """Parses YYYY/YYYY into a range of years.""" assert year_parser.integer_years(["1937/1939", "1942/1943"]) == [ 1937, 1938, 1939, 1942, 1943, ] #Parses YYY/YYYY into a range of years. assert year_parser.integer_years(["990/1000"]) == [ 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000 ]
def test_duplicates(): """Deduplicates elements in output.""" assert year_parser.integer_years(["1934-06/1935-07", "1934-06-01", "1934"]) == [ 1934, 1935, ]
def test_multiple_dates(): """Parses multiple input values into a list of outputs.""" assert year_parser.integer_years(["1941-10-01", "1935", "1945"]) == [ 1935, 1941, 1945, ]
def test_range(): """Parses YYYY/YYYY into a range of years.""" assert year_parser.integer_years(["1937/1939", "1942/1943"]) == [ 1937, 1938, 1939, 1942, 1943, ]
def map_record(row: DLCSRecord, config: typing.Dict) -> UrsusRecord: """Maps a metadata record from CSV to Ursus Solr. Args: record: A mapping representing the CSV record. Returns: A mapping representing the record to submit to Solr. """ record: UrsusRecord = { field_name: map_field_value(row, field_name, config=config) for field_name in mapper.FIELD_MAPPING } # thumbnail record["thumbnail_url_ss"] = (record.get("thumbnail_url_ss") or thumbnail_from_child(record, config=config) or thumbnail_from_manifest(record)) # collection name if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]: dlcs_collection_name = config["collection_names"][row["Parent ARK"]] record["dlcs_collection_name_tesim"] = [dlcs_collection_name] # facet fields record["features_sim"] = record.get("features_tesim") record["genre_sim"] = record.get("genre_tesim") record["human_readable_language_sim"] = record.get( "human_readable_language_tesim") record["human_readable_resource_type_sim"] = record.get( "resource_type_tesim") record["location_sim"] = record.get("location_tesim") record["member_of_collections_ssim"] = record.get( "dlcs_collection_name_tesim") record["named_subject_sim"] = record.get("named_subject_tesim") record["place_of_origin_sim"] = record.get("place_of_origin_tesim") record["script_sim"] = record.get("script_tesim") record["subject_sim"] = record.get("subject_tesim") record["support_sim"] = record.get("support_tesim") record["writing_system_sim"] = record.get("writing_system_tesim") record["year_isim"] = year_parser.integer_years( record.get("normalized_date_tesim")) # sort fields titles = record.get("title_tesim") if isinstance(titles, typing.Sequence) and len(titles) >= 1: record["sort_title_ssort"] = titles[0] years = record.get("year_isim") if isinstance(years, typing.Sequence) and len(years) >= 1: record["sort_year_isi"] = min(years) return record
def map_record(row: DLCSRecord, solr_client: Solr, config: typing.Dict) -> UrsusRecord: """Maps a metadata record from CSV to Ursus Solr. Args: record: A mapping representing the CSV record. Returns: A mapping representing the record to submit to Solr. """ record: UrsusRecord = { field_name: map_field_value(row, field_name, config=config) for field_name in mapper.FIELD_MAPPING } # THUMBNAIL record["thumbnail_url_ss"] = (record.get("thumbnail_url_ss") or thumbnail_from_child(record, config=config) or thumbnail_from_manifest(record)) # COLLECTION NAME if "Parent ARK" in row and row["Parent ARK"] in config["collection_names"]: dlcs_collection_name = config["collection_names"][row["Parent ARK"]] record["dlcs_collection_name_tesim"] = [dlcs_collection_name] # FIELDS record["uniform_title_sim"] = record.get("uniform_title_tesim") record["architect_sim"] = record.get("architect_tesim") record["author_sim"] = record.get("author_tesim") record["illuminator_sim"] = record.get("illuminator_tesim") record["scribe_sim"] = record.get("scribe_tesim") record["rubricator_sim"] = record.get("rubricator_tesim") record["commentator_sim"] = record.get("commentator_tesim") record["translator_sim"] = record.get("translator_tesim") record["lyricist_sim"] = record.get("lyricist_tesim") record["composer_sim"] = record.get("composer_tesim") record["illustrator_sim"] = record.get("illustrator_tesim") record["editor_sim"] = record.get("editor_tesim") record["calligrapher_sim"] = record.get("calligrapher_tesim") record["engraver_sim"] = record.get("engraver_tesim") record["printmaker_sim"] = record.get("printmaker_tesim") record["human_readable_language_sim"] = record.get( "human_readable_language_tesim") record["names_sim"] = name_fields(record) record["keywords_sim"] = keywords_fields(record) # explicit record["features_sim"] = record.get("features_tesim") # incipit # inscription record["script_sim"] = record.get("script_tesim") record["writing_system_sim"] = record.get("writing_system_tesim") record["year_isim"] = year_parser.integer_years( record.get("normalized_date_tesim")) record["date_dtsim"] = solr_transformed_dates( solr_client, (date_parser.get_dates(record.get("normalized_date_tesim")))) record["place_of_origin_sim"] = record.get("place_of_origin_tesim") record["associated_name_sim"] = record.get("associated_name_tesim") record["form_sim"] = record.get("form_ssi") record["support_sim"] = record.get("support_tesim") record["genre_sim"] = record.get("genre_tesim") record["subject_sim"] = record.get("subject_tesim") record["location_sim"] = record.get("location_tesim") record["named_subject_sim"] = record.get("named_subject_tesim") record["human_readable_resource_type_sim"] = record.get( "resource_type_tesim") record["member_of_collections_ssim"] = record.get( "dlcs_collection_name_tesim") # SINAI INDEX record["header_index_tesim"] = header_fields(record) record["name_fields_index_tesim"] = name_fields_index(record) # SORT FIELDS titles = record.get("title_tesim") if isinstance(titles, typing.Sequence) and len(titles) >= 1: record["sort_title_ssort"] = titles[0] # used a solr copyfield for shelfmark sorting # shelfmarks = record.get("shelfmark_ssi") # print(shelfmarks) # if isinstance(shelfmarks, typing.Sequence) and len(shelfmarks) >= 1: # print(shelfmarks[0]) # record["shelfmark_aplha_numeric_ssort"] = shelfmarks[0] # ----------------------------------------------------------------------- years = record.get("year_isim") if isinstance(years, typing.Sequence) and len(years) >= 1: record["sort_year_isi"] = min(years) dates = record.get("date_dtsim") if isinstance(dates, typing.Sequence) and len(dates) >= 1: record["date_dtsort"] = dates[0] return record
def test_iso_8601(): """Parses an iso 8601 standard string""" assert year_parser.integer_years(["1941-10-01"]) == [1941]
def test_range_with_months(): """Months can be included in range elements, but are ingored.""" assert year_parser.integer_years(["1934-06/1934-07"]) == [1934]
def test_unparseable(): """Doesn't return anything for unparseable values, but still parses other elements in input.""" assert year_parser.integer_years(["1953", "[between 1928-1939]"]) == [1953]
def test_empty(): """Returns an empty list if given an empty input.""" assert year_parser.integer_years([]) == []
def test_year_and_month(): """Parses YYYY-MM""" assert year_parser.integer_years(["1953-10"]) == [1953]
def test_just_year(): """Parses a bare year.""" assert year_parser.integer_years(["1953"]) == [1953]