Exemplo n.º 1
0
def test_mets_dnx_with_json_structmap_IDs():
    """Make sure that each div in the structMap gets the right ID
    a bug was found where all fptr elements in the structMap got the
    fid of the first file. This is to check that doesn't happen anymore."""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "img1.jpg",
         "fileOriginalPath": "path/to/files/img1.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1",
         "fileSizeBytes": "119191"},
         {"fileOriginalName": "img2.jpg",
         "fileOriginalPath": "path/to/files/img2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2",
         "fileSizeBytes": "119192"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_2'),
                                    digital_original=True)
    fptr_list = mets.findall('.//{http://www.loc.gov/METS/}fptr')
    # print(fptr_list)
    file_ids = []
    for fptr in fptr_list:
        file_ids.append(fptr.attrib['FILEID'])
    assert len(file_ids) == len(set(file_ids))
Exemplo n.º 2
0
def test_mets_dnx_with_json_supply_filesizebytes():
    """For testing new function for building SIP with JSON documents
    describing the structure and metadata of files.
    Specifically testing that all files in the filesec have an ADMID 
    attrib."""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "img1.jpg",
         "fileOriginalPath": "path/to/files/img1.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1",
         "fileSizeBytes": "119191"},
         {"fileOriginalName": "img2.jpg",
         "fileOriginalPath": "path/to/files/img2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2",
         "fileSizeBytes": "119192"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_2'),
                                    digital_original=True)
    print(ET.tounicode(mets, pretty_print=True))
    bytes_list = mets.findall('.//key[@id="fileSizeBytes"]')
    # print(bytes_list)
    for element in bytes_list:
        assert element.text in ['119191', '119192']
Exemplo n.º 3
0
def test_mm_and_ad_for_json_mets():
    """Make sure that all details for mms and ads are being processed correctly"""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "presmaster.jpg",
         "fileOriginalPath": "pm/presmaster.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1",
         "fileSizeBytes": "119191"}]"""
    mm_json = """[{"fileOriginalName": "modifiedmaster.jpg",
         "fileOriginalPath": "mm/modifiedmaster.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2",
         "fileSizeBytes": "119192"}]"""
    ad_json = """[{"fileOriginalName": "deriv.jpg",
         "fileOriginalPath": "ad/deriv.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Three",
         "note": "This is a note for image 3",
         "fileSizeBytes": "119192"},
         {"fileOriginalName": "deriv_2.jpg",
         "fileOriginalPath": "ad/deriv_2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Three",
         "note": "This is a note for image 3",
         "fileSizeBytes": "119192"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    modified_master_json=mm_json,
                                    access_derivative_json=ad_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_3'),
                                    digital_original=True)
    # structmaps = mets.findall('./{http://www.loc.gov/METS/}structMap')
    assert mets.find(
        './{http://www.loc.gov/METS/}structMap[@ID="rep1-1"]') != None
    assert mets.find(
        './{http://www.loc.gov/METS/}structMap[@ID="rep2-1"]') != None
    assert mets.find(
        './{http://www.loc.gov/METS/}structMap[@ID="rep3-1"]') != None
    print(ET.tounicode(mets, pretty_print=True))
Exemplo n.º 4
0
def test_mets_dnx_with_json_structmap_IDs():
    """Make sure files are in the correct order in the 
    StructMap"""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "img1.jpg",
         "fileOriginalPath": "img1.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1",
         "fileSizeBytes": "119191"},
         {"fileOriginalName": "img2.jpg",
         "fileOriginalPath": "img2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2",
         "fileSizeBytes": "119192"},
         {"fileOriginalName": "img3.jpg",
         "fileOriginalPath": "img3.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Three",
         "note": "This is a note for image 3",
         "fileSizeBytes": "119192"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_3'),
                                    digital_original=True)
    div_list = mets.findall(
        './/{http://www.loc.gov/METS/}div/{http://www.loc.gov/METS/}div//{http://www.loc.gov/METS/}div'
    )
    # print(fptr_list)
    div_labels = []
    print(mets.tounicode(pretty_print=True))
    for div in div_list:
        div_labels.append(div.attrib['LABEL'])
    assert div_labels == ["Image One", "Image Two", "Image Three"]
Exemplo n.º 5
0
def test_structmap_has_table_of_contents_div_for_json():
    """Make sure files are in the correct order in the 
    StructMap"""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "img1.jpg",
         "fileOriginalPath": "img1.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1",
         "fileSizeBytes": "119191"},
         {"fileOriginalName": "img2.jpg",
         "fileOriginalPath": "img2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2",
         "fileSizeBytes": "119192"},
         {"fileOriginalName": "img3.jpg",
         "fileOriginalPath": "img3.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Three",
         "note": "This is a note for image 3",
         "fileSizeBytes": "119192"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_3'),
                                    digital_original=True)
    toc_div = mets.find(
        './/{http://www.loc.gov/METS/}structMap/{http://www.loc.gov/METS/}div/{http://www.loc.gov/METS/}div'
    )
    # print(fptr_list)
    print(toc_div.attrib['LABEL'])
    # div_labels = []
    assert toc_div.attrib['LABEL'] == 'Table of Contents'
Exemplo n.º 6
0
def test_mets_dnx_with_json_for_admid_in_filesec_files():
    """For testing new function for building SIP with JSON documents
    describing the structure and metadata of files.
    Specifically testing that all files in the filesec have an ADMID 
    attrib."""
    ie_dc_dict = {"dc:title": "test title"}

    pm_json = """[
        {"fileOriginalName": "img1.jpg",
         "fileOriginalPath": "path/to/files/img1.jpg",
         "MD5": "aff64bf1391ac627edb3234a422f9a77",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image One",
         "note": "This is a note for image 1"},
         {"fileOriginalName": "img2.jpg",
         "fileOriginalPath": "path/to/files/img2.jpg",
         "MD5": "9d09f20ab8e37e5d32cdd1508b49f0a9",
         "fileCreationDate": "1st of January, 1601",
         "fileModificationDate": "1st of January, 1601",
         "label": "Image Two",
         "note": "This is a note for image 2"}
    ]"""
    mets = mdf.build_mets_from_json(ie_dmd_dict=ie_dc_dict,
                                    pres_master_json=pm_json,
                                    input_dir=os.path.join(
                                        CURRENT_DIR, 'data', 'test_batch_2'),
                                    digital_original=True)
    print(ET.tounicode(mets, pretty_print=True))
    files_list = mets.findall(
        ".//{http://www.loc.gov/METS/}fileSec"
        "/{http://www.loc.gov/METS/}fileGrp/{http://www.loc.gov/METS/}file")
    for file in files_list:
        assert ("ADMID" in file.attrib)
        assert (file.attrib['ADMID'].endswith('-amd'))
    amdsec_list = mets.findall(".//{http://www.loc.gov/METS/}amdSec")
    for amdsec in amdsec_list:
        assert ("ID" in amdsec.attrib)
        assert (amdsec.attrib["ID"].endswith("-amd"))