Exemplo n.º 1
0
 def test_group_id_derived(self):
     """ It should return the group ID for the derived from file. """
     file_uuid = str(uuid.uuid4())
     f = metsrw.FSEntry('level1.txt', file_uuid=file_uuid)
     derived = metsrw.FSEntry('level3.txt',
                              file_uuid=str(uuid.uuid4()),
                              derived_from=f)
     assert derived.group_id() == 'Group-' + file_uuid
     assert derived.group_id() == f.group_id()
Exemplo n.º 2
0
 def test_serialize_structmap_child_empty(self):
     """ It should handle children with no structMap entry. """
     d = metsrw.FSEntry('dir', type='Directory')
     f = metsrw.FSEntry(use='deletion', file_uuid=str(uuid.uuid4()))
     d.add_child(f)
     el = d.serialize_structmap(recurse=True)
     assert el.tag == '{http://www.loc.gov/METS/}div'
     assert el.attrib['TYPE'] == 'Directory'
     assert el.attrib['LABEL'] == 'dir'
     assert len(el.attrib) == 2
     assert len(el) == 0
def add_md_dir_to_structmap(sip):
    """Add the metadata directory to the structmap."""
    md_dir = metsrw.FSEntry(path="metadata", use=None, type="Directory")
    sip.add_child(md_dir)
    # Add dataset.json to the fileSec output.
    fsentry = metsrw.FSEntry(
        path="metadata/dataset.json", use="metadata", file_uuid=str(uuid.uuid4())
    )
    # Add dataset.json to the metadata fileSec group.
    md_dir.add_child(fsentry)
    return sip
Exemplo n.º 4
0
 def test_filesec(self):
     o = metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4()))
     p = metsrw.FSEntry('objects/file1-preservation.txt', use='preservaton', file_uuid=str(uuid.uuid4()))
     o2 = metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4()))
     mw = metsrw.METSDocument()
     element = mw._filesec([o, p, o2])
     assert isinstance(element, etree._Element)
     assert element.tag == '{http://www.loc.gov/METS/}fileSec'
     assert len(element) == 2  # 2 groups
     assert element[0].tag == '{http://www.loc.gov/METS/}fileGrp'
     assert element[0].get('USE') == 'original'
     assert element[1].tag == '{http://www.loc.gov/METS/}fileGrp'
     assert element[1].get('USE') == 'preservaton'
Exemplo n.º 5
0
 def test_serialize_structmap_no_recurse(self):
     """
     It should produce a mets:div element.
     It should have a TYPE and LABEL.
     It should not have children.
     """
     d = metsrw.FSEntry('dir', type='Directory')
     f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     d.add_child(f)
     el = d.serialize_structmap(recurse=False)
     assert el.tag == '{http://www.loc.gov/METS/}div'
     assert el.attrib['TYPE'] == 'Directory'
     assert el.attrib['LABEL'] == 'dir'
     assert len(el) == 0
Exemplo n.º 6
0
 def create_test_pointer_file(self):
     # 1. Get the PREMIS events and object as premisrw class instances.
     compression_event = premisrw.PREMISEvent(data=c.EX_COMPR_EVT)
     events = [compression_event]
     _, compression_program_version, archive_tool = (
         compression_event.compression_details)
     premis_object = premisrw.PREMISObject(
         xsi_type=c.EX_PTR_XSI_TYPE,
         identifier_value=c.EX_PTR_IDENTIFIER_VALUE,
         message_digest_algorithm=c.EX_PTR_MESSAGE_DIGEST_ALGORITHM,
         message_digest=c.EX_PTR_MESSAGE_DIGEST,
         size=c.EX_PTR_SIZE,
         format_name=c.EX_PTR_FORMAT_NAME,
         format_registry_key=c.EX_PTR_FORMAT_REGISTRY_KEY,
         creating_application_name=archive_tool,
         creating_application_version=compression_program_version,
         date_created_by_application=c.EX_PTR_DATE_CREATED_BY_APPLICATION)
     transform_files = compression_event.get_decompression_transform_files()
     # 2. Construct the METS pointer file
     mw = metsrw.METSDocument()
     mets_fs_entry = metsrw.FSEntry(path=c.EX_PTR_PATH,
                                    file_uuid=c.EX_PTR_IDENTIFIER_VALUE,
                                    use=c.EX_PTR_PACKAGE_TYPE,
                                    type=c.EX_PTR_PACKAGE_TYPE,
                                    transform_files=transform_files,
                                    mets_div_type=c.EX_PTR_AIP_SUBTYPE)
     mets_fs_entry.add_premis_object(premis_object.serialize())
     for event in events:
         mets_fs_entry.add_premis_event(event.serialize())
     for agent in [c.EX_AGT_1, c.EX_AGT_2]:
         mets_fs_entry.add_premis_agent(premisrw.data_to_premis(agent))
     mw.append_file(mets_fs_entry)
     return mw
Exemplo n.º 7
0
 def test_collect_mdsec_elements(self):
     f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     f1.amdsecs.append(metsrw.AMDSec())
     f1.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4()))
     f2.dmdsecs.append(metsrw.SubSection('dmdSec', None))
     mw = metsrw.METSDocument()
     elements = mw._collect_mdsec_elements([f1, f2])
     # Check ordering - dmdSec before amdSec
     assert isinstance(elements, list)
     assert len(elements) == 3
     assert isinstance(elements[0], metsrw.SubSection)
     assert elements[0].subsection == 'dmdSec'
     assert isinstance(elements[1], metsrw.SubSection)
     assert elements[1].subsection == 'dmdSec'
     assert isinstance(elements[2], metsrw.AMDSec)
Exemplo n.º 8
0
 def test_serialize_filesec_no_use(self):
     """
     It should not produce a mets:file element.
     """
     f = metsrw.FSEntry('file1.txt', use=None, file_uuid=str(uuid.uuid4()))
     el = f.serialize_filesec()
     assert el is None
Exemplo n.º 9
0
    def test_add_metadata_to_fsentry(self):
        f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
        f1.add_dublin_core('<dc />')
        assert f1.dmdsecs
        assert len(f1.dmdsecs) == 1
        assert f1.dmdsecs[0].subsection == 'dmdSec'
        assert f1.dmdsecs[0].contents.mdtype == 'DC'

        # Can only have 1 amdSec, so subsequent subsections are children of AMDSec
        f1.add_premis_object('<premis>object</premis>')
        assert f1.amdsecs
        assert f1.amdsecs[0].subsections
        assert f1.amdsecs[0].subsections[0].subsection == 'techMD'
        assert f1.amdsecs[0].subsections[0].contents.mdtype == 'PREMIS:OBJECT'

        f1.add_premis_event('<premis>event</premis>')
        assert f1.amdsecs[0].subsections[1].subsection == 'digiprovMD'
        assert f1.amdsecs[0].subsections[1].contents.mdtype == 'PREMIS:EVENT'

        f1.add_premis_agent('<premis>agent</premis>')
        assert f1.amdsecs[0].subsections[2].subsection == 'digiprovMD'
        assert f1.amdsecs[0].subsections[2].contents.mdtype == 'PREMIS:AGENT'

        f1.add_premis_rights('<premis>rights</premis>')
        assert f1.amdsecs[0].subsections[3].subsection == 'rightsMD'
        assert f1.amdsecs[0].subsections[3].contents.mdtype == 'PREMIS:RIGHTS'

        assert len(f1.amdsecs[0].subsections) == 4
Exemplo n.º 10
0
def test_record_backlog_event(tmp_path):
    transfer = Transfer.objects.create(uuid="756db89c-1380-459d-83bc-d3772f1e7dd8")
    user = User.objects.create(id=1)
    transfer.update_active_agent(user_id=user.id)
    file_obj = File.objects.create(
        uuid="3c567bc8-0847-4d12-a77d-0ed3a0361c0a", transfer=transfer
    )

    # ``_record_backlog_event`` expects the METS file to exist already.
    # We're creating one with a single file in it.
    (tmp_path / "metadata/submissionDocumentation").mkdir(parents=True)
    mets_path = str(tmp_path / "metadata/submissionDocumentation/METS.xml")
    mets = metsrw.METSDocument()
    mets.append_file(
        metsrw.FSEntry(
            path="foobar.jpg", label="foobar", type="Item", file_uuid=file_obj.uuid
        )
    )
    mets.write(mets_path, pretty_print=True)

    move_to_backlog._record_backlog_event(transfer.uuid, str(tmp_path), "2019-03-12")

    # Load METS document again and test that the file has a PREMIS event.
    mets = metsrw.METSDocument().fromfile(mets_path)
    fsentry = next(iter(mets.all_files()))
    premis_events = fsentry.get_premis_events()
    assert len(premis_events) == 1
    assert premis_events[0].event_type == "placement in backlog"
    assert premis_events[0].event_date_time == "2019-03-12"
Exemplo n.º 11
0
 def __init__(self, root_path, db_base_path, transfer):
     self.root_path = root_path
     self.db_base_path = db_base_path
     self.transfer = transfer
     self.root_node = metsrw.FSEntry(path=os.path.basename(root_path),
                                     type="Directory")
     self.file_index = {}
     self.dir_index = {}
Exemplo n.º 12
0
 def test_files(self):
     # Test collects several children deep
     f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()))
     d2 = metsrw.FSEntry('dir2', type='Directory', children=[f3])
     f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4()))
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4()))
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     files = mw.all_files()
     assert files
     assert len(files) == 6
     assert d in files
     assert f1 in files
     assert d1 in files
     assert f2 in files
     assert d2 in files
     assert f3 in files
     f4_uuid = str(uuid.uuid4())
     f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid)
     mw.append_file(f4)
     files = mw.all_files()
     assert len(files) == 7
     assert f4 in files
Exemplo n.º 13
0
    def test_is_empty_dir(self):
        """It should be able to determine whether it is an empty directory."""

        r = metsrw.FSEntry('dir', type='Directory')
        d1 = metsrw.FSEntry('dir', type='Directory')
        d2 = metsrw.FSEntry('dir', type='Directory')
        d1a = metsrw.FSEntry('dir', type='Directory')
        d2a = metsrw.FSEntry('dir', type='Directory')
        d2b = metsrw.FSEntry('dir', type='Directory')
        f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
        r.add_child(d1)
        r.add_child(d2)
        d1.add_child(d1a)
        d2.add_child(d2a)
        d2.add_child(d2b)
        d1a.add_child(f)

        assert d2a.is_empty_dir
        assert not d2a.children
        assert not d1a.is_empty_dir
        assert len(d1a.children) == 1
        assert not d1.is_empty_dir
        assert not r.is_empty_dir
        assert not f.is_empty_dir
        # Directory d2 is an empty directory because it contains nothing but
        # empty directories.
        assert d2.is_empty_dir
        assert len(d2.children) == 2
Exemplo n.º 14
0
 def test_admids(self):
     """ It should return 0 or 1 amdSecs. """
     f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     assert len(f.admids) == 0
     f.add_premis_object('<premis>object</premis>')
     assert len(f.admids) == 1
     f.add_premis_event('<premis>event</premis>')
     # Can only have one amdSec
     assert len(f.admids) == 1
Exemplo n.º 15
0
 def test_serialize_filesec_not_item(self):
     """
     It should not produce a mets:file element.
     """
     f = metsrw.FSEntry('file1.txt',
                        type='Directory',
                        file_uuid=str(uuid.uuid4()))
     el = f.serialize_filesec()
     assert el is None
Exemplo n.º 16
0
    def test_remove_child(self):
        """
        It should remove the child from the parent's children list.
        It should remove the parent from the child's parent link.
        """
        d = metsrw.FSEntry('dir', type='Directory')
        f1 = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
        f2 = metsrw.FSEntry('file2.txt', file_uuid=str(uuid.uuid4()))
        d.add_child(f1)
        d.add_child(f2)
        assert f1 in d.children
        assert f1.parent is d
        assert len(d.children) == 2

        d.remove_child(f1)

        assert f1 not in d.children
        assert f1.parent is None
        assert len(d.children) == 1
def convert_dataverse_to_mets(
    job,
    unit_path,
    dataset_md_name="dataset.json",
    output_md_path=None,
    output_md_name=None,
):
    """Create a transfer METS file from a Dataverse's dataset.json file"""
    logger.info(
        "Convert Dataverse structure called with '%s' unit directory", unit_path
    )

    json_metadata = load_md_and_return_json(unit_path, dataset_md_name)
    if json_metadata is None:
        raise ConvertDataverseError("Unable to the load Dataverse metadata file")
    dataset_md_latest = get_latest_version_metadata(json_metadata)
    if dataset_md_latest is None:
        raise ConvertDataverseError(
            "Unable to find the dataset metadata section from dataset.json"
        )

    # If a dataset is restricted we may not have access to all the files. We
    # may also want to flag this dataset to the users of this service. We
    # can do this here and below. We do not yet know whether this microservice
    # should fail because we don't know how all datasets behave when some
    # restrictions are placed on them.
    contact_information = retrieve_terms_of_access(dataset_md_latest)

    # Create METS
    try:
        sip = metsrw.FSEntry(
            path="None",
            label=get_ddi_title(dataset_md_latest),
            use=None,
            type="Directory",
        )
    except TypeError as err:
        citation_msg = ("Unable to gather citation data from dataset.json: %s", err)
        logger.error(citation_msg)
        raise ConvertDataverseError(citation_msg)

    sip = add_ddi_xml(job, sip, json_metadata, dataset_md_latest)
    if sip is None:
        raise ConvertDataverseError("Error creating SIP from Dataverse DDI")

    sip = add_metadata_ref(sip, dataset_md_name, "metadata/{}".format(dataset_md_name))

    sip = add_dataset_files_to_md(job, sip, dataset_md_latest, contact_information)
    if sip is None:
        raise ConvertDataverseError("Error adding Dataset files to METS")

    # On success of the following two functions, the module will return None
    # to JobContext which expects non-zero as a failure code only.
    sip = add_md_dir_to_structmap(sip)
    write_mets_to_file(sip, unit_path, output_md_path, output_md_name)
Exemplo n.º 18
0
 def test_serialize_structmap_recurse(self):
     """
     It should produce a mets:div element.
     It should have a TYPE and LABEL.
     It should have a child mets:div with the file.
     """
     d = metsrw.FSEntry('dir', type='Directory')
     f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     d.add_child(f)
     el = d.serialize_structmap(recurse=True)
     assert el.tag == '{http://www.loc.gov/METS/}div'
     assert el.attrib['TYPE'] == 'Directory'
     assert el.attrib['LABEL'] == 'dir'
     assert len(el) == 1
     assert el[0].tag == '{http://www.loc.gov/METS/}div'
     assert el[0].attrib['TYPE'] == 'Item'
     assert el[0].attrib['LABEL'] == 'file1.txt'
     assert len(el[0]) == 1
     assert el[0][0].tag == '{http://www.loc.gov/METS/}fptr'
     assert el[0][0].attrib['FILEID'].startswith('file-')
Exemplo n.º 19
0
    def test_add_child(self):
        """
        It should add a new entry to the children list.
        It should add a parent link.
        It should handle duplicates.
        """
        d = metsrw.FSEntry('dir', type='Directory')
        f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))

        d.add_child(f)
        assert f in d.children
        assert len(d.children) == 1
        assert f.parent is d

        d.add_child(f)
        assert f in d.children
        assert len(d.children) == 1
        assert f.parent is d

        with pytest.raises(ValueError):
            f.add_child(d)
Exemplo n.º 20
0
    def build_tree(self, path, parent=None):
        dir_entries = sorted(os.scandir(path), key=lambda d: d.name)
        for dir_entry in dir_entries:
            entry_relative_path = os.path.relpath(dir_entry.path,
                                                  start=self.root_path)
            if dir_entry.is_dir():
                fsentry = metsrw.FSEntry(path=entry_relative_path,
                                         label=dir_entry.name,
                                         type="Directory")
                db_path = "".join(
                    [self.db_base_path, entry_relative_path, os.path.sep])
                self.dir_index[db_path] = fsentry
                self.build_tree(dir_entry.path, parent=fsentry)
            else:
                fsentry = metsrw.FSEntry(path=entry_relative_path,
                                         label=dir_entry.name,
                                         type="Item")
                db_path = "".join([self.db_base_path, entry_relative_path])
                self.file_index[db_path] = fsentry

            parent.add_child(fsentry)
Exemplo n.º 21
0
    def test_add_file_to_child(self):
        # Test collects several children deep
        f2 = metsrw.FSEntry('level2.txt', file_uuid=str(uuid.uuid4()))
        d1 = metsrw.FSEntry('dir1', type='Directory', children=[f2])
        f1 = metsrw.FSEntry('level1.txt', file_uuid=str(uuid.uuid4()))
        d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
        mw = metsrw.METSDocument()
        mw.append_file(d)
        files = mw.all_files()
        assert files
        assert len(files) == 4
        assert d in files
        assert f1 in files
        assert d1 in files
        assert f2 in files

        f3 = metsrw.FSEntry('level3.txt', file_uuid=str(uuid.uuid4()))
        d1.add_child(f3)
        files = mw.all_files()
        assert len(files) == 5
        assert f3 in files
Exemplo n.º 22
0
 def test_remove_file(self):
     """ It should """
     # Setup
     f3_uuid = str(uuid.uuid4())
     f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid)
     d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3])
     f2_uuid = str(uuid.uuid4())
     f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid)
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1_uuid = str(uuid.uuid4())
     f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid)
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     assert len(mw.all_files()) == 6
     # Test remove file
     mw.remove_entry(f3)
     assert len(mw.all_files()) == 5
     assert mw.get_file(file_uuid=f3_uuid) is None
     assert f3 not in d2.children
     assert f3 not in mw.all_files()
     # Test remove dir
     mw.remove_entry(d1)
     assert len(mw.all_files()) == 2
     assert mw.get_file(path='dir1') is None
     assert d1 not in d.children
     assert d1 not in mw.all_files()
     assert f2 not in mw.all_files()
     assert d2 not in mw.all_files()
     assert f1 in d.children
     # Test remove root element
     mw.remove_entry(d)
     assert len(mw.all_files()) == 0
Exemplo n.º 23
0
 def test_serialize_filesec_no_path(self):
     """
     It should produce a mets:file element.
     It should not have a child mets:FLocat.
     """
     file_uuid = str(uuid.uuid4())
     f = metsrw.FSEntry(file_uuid=file_uuid, use='deletion')
     el = f.serialize_filesec()
     assert el.tag == '{http://www.loc.gov/METS/}file'
     assert el.attrib['ID'] == 'file-' + file_uuid
     assert el.attrib['GROUPID'] == 'Group-' + file_uuid
     assert len(el.attrib) == 2
     assert len(el) == 0
Exemplo n.º 24
0
    def test_structmap(self):
        """
        It should create a structMap tag.
        It should have a div tag for the directory.
        It should have div tags for the children beneath the directory.
        It should not have div tags for deleted files (without label).
        """
        children = [
            metsrw.FSEntry('objects/file1.txt', file_uuid=str(uuid.uuid4())),
            metsrw.FSEntry('objects/file2.txt', file_uuid=str(uuid.uuid4())),
        ]
        parent = metsrw.FSEntry('objects', type='Directory', children=children)
        deleted_f = metsrw.FSEntry(use='deletion', file_uuid=str(uuid.uuid4()))

        writer = metsrw.METSDocument()
        writer.append_file(parent)
        writer.append_file(deleted_f)
        sm = writer._structmap()

        assert sm.tag == '{http://www.loc.gov/METS/}structMap'
        assert sm.attrib['TYPE'] == 'physical'
        assert sm.attrib['ID'] == 'structMap_1'
        assert sm.attrib['LABEL'] == 'Archivematica default'
        assert len(sm.attrib) == 3
        assert len(sm) == 1
        parent = sm[0]
        assert parent.tag == '{http://www.loc.gov/METS/}div'
        assert parent.attrib['LABEL'] == 'objects'
        assert parent.attrib['TYPE'] == 'Directory'
        assert len(parent.attrib) == 2
        assert len(parent) == 2
        assert parent[0].attrib['LABEL'] == 'file1.txt'
        assert parent[0].attrib['TYPE'] == 'Item'
        assert len(parent[0].attrib) == 2
        assert parent[0].find('{http://www.loc.gov/METS/}fptr') is not None
        assert parent[1].attrib['LABEL'] == 'file2.txt'
        assert parent[1].attrib['TYPE'] == 'Item'
        assert len(parent[1].attrib) == 2
        assert parent[1].find('{http://www.loc.gov/METS/}fptr') is not None
Exemplo n.º 25
0
 def test_serialize_structmap_file(self):
     """
     It should produce a mets:div element.
     It should have a TYPE and LABEL.
     It should have a child mets:fptr element with FILEID.
     """
     f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     f.add_dublin_core('<dc />')
     el = f.serialize_structmap(recurse=False)
     assert el.tag == '{http://www.loc.gov/METS/}div'
     assert el.attrib['TYPE'] == 'Item'
     assert el.attrib['LABEL'] == 'file1.txt'
     assert len(el.attrib['DMDID'].split()) == 1
     assert len(el) == 1
     assert el[0].tag == '{http://www.loc.gov/METS/}fptr'
     assert el[0].attrib['FILEID'].startswith('file-')
Exemplo n.º 26
0
 def test_serialize_filesec_metadata(self):
     """
     It should produce a mets:file element.
     It should have an ID attribute.
     It should have one ADMID.
     It should have a child mets:FLocat element with the path.
     """
     f = metsrw.FSEntry('file1.txt', file_uuid=str(uuid.uuid4()))
     f.add_premis_object('<premis>object</premis>')
     el = f.serialize_filesec()
     assert el.tag == '{http://www.loc.gov/METS/}file'
     assert el.attrib['ID'].startswith('file-')
     assert len(el.attrib['ADMID'].split()) == 1
     assert len(el) == 1
     assert el[0].tag == '{http://www.loc.gov/METS/}FLocat'
     assert el[0].attrib['LOCTYPE'] == 'OTHER'
     assert el[0].attrib['OTHERLOCTYPE'] == 'SYSTEM'
     assert el[0].attrib[
         '{http://www.w3.org/1999/xlink}href'] == 'file1.txt'
Exemplo n.º 27
0
 def test_get_file(self):
     # Setup
     f3_uuid = str(uuid.uuid4())
     f3 = metsrw.FSEntry('dir1/dir2/level3.txt', file_uuid=f3_uuid)
     d2 = metsrw.FSEntry('dir1/dir2', type='Directory', children=[f3])
     f2_uuid = str(uuid.uuid4())
     f2 = metsrw.FSEntry('dir1/level2.txt', file_uuid=f2_uuid)
     d1 = metsrw.FSEntry('dir1', type='Directory', children=[d2, f2])
     f1_uuid = str(uuid.uuid4())
     f1 = metsrw.FSEntry('level1.txt', file_uuid=f1_uuid)
     d = metsrw.FSEntry('root', type='Directory', children=[d1, f1])
     mw = metsrw.METSDocument()
     mw.append_file(d)
     # Test
     # By UUID
     assert mw.get_file(file_uuid=f3_uuid) == f3
     assert mw.get_file(file_uuid=f2_uuid) == f2
     assert mw.get_file(file_uuid=f1_uuid) == f1
     assert mw.get_file(file_uuid='does not exist') is None
     # By path
     assert mw.get_file(path='dir1/dir2/level3.txt') == f3
     assert mw.get_file(path='dir1/dir2') == d2
     assert mw.get_file(path='dir1/level2.txt') == f2
     assert mw.get_file(path='dir1') == d1
     assert mw.get_file(path='level1.txt') == f1
     assert mw.get_file(path='does not exist') is None
     # By label
     assert mw.get_file(label='level3.txt') == f3
     assert mw.get_file(label='dir2') == d2
     assert mw.get_file(label='level2.txt') == f2
     assert mw.get_file(label='dir1') == d1
     assert mw.get_file(label='level1.txt') == f1
     assert mw.get_file(label='does not exist') is None
     # By multiple
     assert mw.get_file(label='level3.txt',
                        path='dir1/dir2/level3.txt') == f3
     assert mw.get_file(label='dir2', type='Directory') == d2
     assert mw.get_file(label='level2.txt', type='Item') == f2
     assert mw.get_file(file_uuid=None, type='Item') is None
     # Updates list
     f4_uuid = str(uuid.uuid4())
     f4 = metsrw.FSEntry('file4.txt', file_uuid=f4_uuid)
     mw.append_file(f4)
     assert mw.get_file(file_uuid=f4_uuid) == f4
     assert mw.get_file(path='file4.txt') == f4
Exemplo n.º 28
0
def create_dip_mets(aip_dir, aip_name, fsentries, mets, dip_mets_file):
    """Creates DIP METS file for AtoM/default upload."""

    LOGGER.info("Creating DIP METS file for AtoM/default upload.")
    objects_entry = None
    for fsentry in fsentries:
        # Do not delete AIP entry
        if (fsentry.label == os.path.basename(aip_dir)
                and fsentry.type.lower() == "directory"):
            continue

        # Do not delete objects entry and save it for parenting
        if fsentry.label == "objects" and fsentry.type.lower() == "directory":
            objects_entry = fsentry
            continue

        # Delete all the others
        mets.remove_entry(fsentry)

    if not objects_entry:
        LOGGER.error("Could not find objects entry in METS file")
        return

    # Create new entry for ZIP file
    entry = metsrw.FSEntry(
        label="{}.zip".format(aip_name),
        path="objects/{}.zip".format(aip_name),
        file_uuid=str(uuid.uuid4()),
    )

    # Add new entry to objects directory
    objects_entry.add_child(entry)

    # Create DIP METS file
    try:
        mets.write(dip_mets_file, fully_qualified=True, pretty_print=True)
    except Exception:
        LOGGER.error("Could not create DIP METS file")
        return
Exemplo n.º 29
0
 def test_serialize_filesec_basic(self):
     """
     It should produce a mets:file element.
     It should have an ID attribute.
     It should not have ADMIDs.
     It should have a child mets:FLocat element with the path.
     """
     f = metsrw.FSEntry('file1.txt',
                        file_uuid=str(uuid.uuid4()),
                        checksumtype='MD5',
                        checksum='daa05c683a4913b268653f7a7e36a5b4')
     el = f.serialize_filesec()
     assert el.tag == '{http://www.loc.gov/METS/}file'
     assert el.attrib['ID'].startswith('file-')
     assert el.attrib['CHECKSUM'] == 'daa05c683a4913b268653f7a7e36a5b4'
     assert el.attrib['CHECKSUMTYPE'] == 'MD5'
     assert el.attrib.get('ADMID') is None
     assert len(el) == 1
     assert el[0].tag == '{http://www.loc.gov/METS/}FLocat'
     assert el[0].attrib['LOCTYPE'] == 'OTHER'
     assert el[0].attrib['OTHERLOCTYPE'] == 'SYSTEM'
     assert el[0].attrib[
         '{http://www.w3.org/1999/xlink}href'] == 'file1.txt'
Exemplo n.º 30
0
    def test_dependency_injection(self):
        """Test the dependency injection (DI) infrastructure for metsrw plugins.

        - client: metsrw.FSEntry
        - services: classes for reading and writing metadata elements, e.g.,
          the PREMISObject class of metsrw.plugins.premisrw or other classes
          exposing the same interface.
        - injector: this test code or the code in metsrw/di.py which calls
          ``provide`` on the ``feature_broker`` singleton.

        The ``FSEntry`` class declares its dependency on the class attributes
        ``premis_object_class``, ``premis_event_class``, and
        ``premis_agent_class`` and further requires that these return classes
        with ``fromtree`` and ``serialize`` methods::

            >>> premis_object_class = Dependency(
            ...     has_methods('serialize'),
            ...     has_class_methods('fromtree'),
            ...     is_class)

        """

        # Clear the feature broker and then register/provide the premisrw
        # plugin classes (services) with the feature broker.
        feature_broker = metsrw.feature_broker
        assert len(feature_broker) == 3
        feature_broker.clear()
        assert not feature_broker
        feature_broker.provide('premis_object_class', premisrw.PREMISObject)
        feature_broker.provide('premis_event_class', premisrw.PREMISEvent)
        feature_broker.provide('premis_agent_class', premisrw.PREMISAgent)
        assert len(feature_broker) == 3

        # Create premisrw instances.
        compression_premis_event = premisrw.PREMISEvent(data=EX_COMPR_EVT)
        premis_events = [compression_premis_event]
        premis_agents = [
            premisrw.PREMISAgent(data=x) for x in [EX_AGT_1, EX_AGT_2]
        ]
        _, compression_program_version, archive_tool = (
            compression_premis_event.compression_details)
        premis_object = premisrw.PREMISObject(
            xsi_type=EX_PTR_XSI_TYPE,
            identifier_value=EX_PTR_IDENTIFIER_VALUE,
            message_digest_algorithm=EX_PTR_MESSAGE_DIGEST_ALGORITHM,
            message_digest=EX_PTR_MESSAGE_DIGEST,
            size=EX_PTR_SIZE,
            format_name=EX_PTR_FORMAT_NAME,
            format_registry_key=EX_PTR_FORMAT_REGISTRY_KEY,
            creating_application_name=archive_tool,
            creating_application_version=compression_program_version,
            date_created_by_application=EX_PTR_DATE_CREATED_BY_APPLICATION)
        transform_files = compression_premis_event.get_decompression_transform_files(
        )

        # Create metsrw ``METSDocument`` and ``FSEntry`` instances.
        mets_doc = metsrw.METSDocument()
        fs_entry = metsrw.FSEntry(path=EX_PTR_PATH,
                                  file_uuid=EX_PTR_IDENTIFIER_VALUE,
                                  use=EX_PTR_PACKAGE_TYPE,
                                  type=EX_PTR_PACKAGE_TYPE,
                                  transform_files=transform_files,
                                  mets_div_type=EX_PTR_AIP_SUBTYPE)
        mets_doc.append_file(fs_entry)

        # Use the ``add_premis_...`` methods to add the PREMIS metadata
        # elements to the ``FSEntry`` instance. This will assert that each
        # PREMIS instance is of the correct type (e.g., that ``premis_object``
        # is an instance of ``FSEntry().premis_object_class``) and will call the
        # instance's ``serialize`` method and incorporate the resulting
        # ``lxml.etree._ElementTree`` instance into the ``FSEntry`` instance
        # appropriately.
        fs_entry.add_premis_object(premis_object)
        for premis_event in premis_events:
            fs_entry.add_premis_event(premis_event)
        for premis_agent in premis_agents:
            fs_entry.add_premis_agent(premis_agent)

        # Assert that the instances returned by the
        # ``FSEntry().get_premis_...`` methods are of the anticipated type.
        new_premis_agents = fs_entry.get_premis_agents()
        for new_premis_agent in new_premis_agents:
            assert isinstance(new_premis_agent, premisrw.PREMISAgent)
            assert new_premis_agent in premis_agents
            assert id(new_premis_agent) not in [id(pa) for pa in premis_agents]
        new_premis_events = fs_entry.get_premis_events()
        for new_premis_event in new_premis_events:
            assert isinstance(new_premis_event, premisrw.PREMISEvent)
            assert new_premis_event in premis_events
            assert id(new_premis_event) not in [id(pa) for pa in premis_events]
        new_premis_objects = fs_entry.get_premis_objects()
        for new_premis_object in new_premis_objects:
            assert isinstance(new_premis_object, premisrw.PREMISObject)
            assert new_premis_object == premis_object
            assert id(new_premis_object) is not premis_object

        # Assert that the resulting mets XML contains a
        # premis:objectIdentifierValue in the anticipated location in the
        # structure with the anticipated value.
        mets_doc_el = mets_doc.serialize()
        xpath = ('mets:amdSec/mets:techMD/mets:mdWrap[@MDTYPE="PREMIS:OBJECT"]'
                 '/mets:xmlData/premis:object/premis:objectIdentifier/'
                 'premis:objectIdentifierValue')
        a = mets_doc_el.find(xpath, namespaces=metsrw.NAMESPACES)
        assert a.text == EX_PTR_IDENTIFIER_VALUE

        # Now change the feature broker so that ``FSEntry``'s dependency on a
        # ``premis_object_class`` class attribute is being fulfilled by a new
        # class: ``BetterPREMISObject``.
        feature_broker.provide('premis_object_class', BetterPREMISObject)

        # Now create a new PREMIS object
        premis_object_tree = premis_object.serialize()
        better_premis_object = BetterPREMISObject.fromtree(premis_object_tree)

        # And re-create the ``METSDocument`` and ``FSEntry`` instances.
        mets_doc = metsrw.METSDocument()
        fs_entry = metsrw.FSEntry(path=EX_PTR_PATH,
                                  file_uuid=EX_PTR_IDENTIFIER_VALUE,
                                  use=EX_PTR_PACKAGE_TYPE,
                                  type=EX_PTR_PACKAGE_TYPE,
                                  transform_files=transform_files,
                                  mets_div_type=EX_PTR_AIP_SUBTYPE)
        mets_doc.append_file(fs_entry)

        # Add the PREMIS metadata again, but this time use the instance of
        # ``BetterPREMISObject``.
        fs_entry.add_premis_object(better_premis_object)
        for premis_event in premis_events:
            fs_entry.add_premis_event(premis_event)
        for premis_agent in premis_agents:
            fs_entry.add_premis_agent(premis_agent)

        # Assert that the instances returned by the
        # ``FSEntry().get_premis_...`` methods are of the anticipated type.
        new_premis_objects = fs_entry.get_premis_objects()
        for new_premis_object in new_premis_objects:
            assert isinstance(new_premis_object, BetterPREMISObject)

        # Make sure we can still find the PREMIS object id value.
        mets_doc_el = mets_doc.serialize()
        assert (mets_doc_el.find(
            xpath,
            namespaces=metsrw.NAMESPACES).text == EX_PTR_IDENTIFIER_VALUE)

        # Reset the feature broker to its default state so subsequent tests
        # don't break.
        metsrw.set_feature_broker_to_default_state(feature_broker)