def compile_structmap(**kwargs):
    """Generate METS file section and structural map based on
    created/imported administrative metada and descriptive metadata.

    :kwargs: Given arguments:
             workspace: Workspace directory
             structmap_type: Type of structmap
             root_type: Type of root div
             dmdsec_loc: Location of structured descriptive metadata
             file_ids: Dict to be populated with file paths and IDs
             stdout: True to print output to stdout
    """
    attributes = _attribute_values(kwargs)

    # Create an event documenting the structmap creation
    _create_event(workspace=attributes["workspace"],
                  structmap_type=attributes["structmap_type"],
                  root_type=attributes["root_type"])

    # Get reference list only after the structmap creation event
    attributes = get_reference_lists(**attributes)

    if attributes["structmap_type"] == 'EAD3-logical':
        # If structured descriptive metadata for structMap divs is used, also
        # the fileSec element (apparently?) is different. The
        # create_ead3_structmap function populates the fileGrp element.
        filegrp = mets.filegrp()
        filesec_element = mets.filesec(child_elements=[filegrp])
        filesec = mets.mets(child_elements=[filesec_element])

        structmap = create_ead3_structmap(filegrp, attributes)
    else:
        (filesec, file_ids) = create_filesec(**attributes)

        # Add file path and ID dict to attributes
        attributes['file_ids'] = file_ids
        structmap = create_structmap(filesec.getroot(), **attributes)

    if attributes["stdout"]:
        print(xml_utils.serialize(filesec).decode("utf-8"))
        print(xml_utils.serialize(structmap).decode("utf-8"))

    output_sm_file = os.path.join(attributes["workspace"], 'structmap.xml')
    output_fs_file = os.path.join(attributes["workspace"], 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(structmap))

    with open(output_fs_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(filesec))

    print("compile_structmap created files: %s %s" %
          (output_sm_file, output_fs_file))
示例#2
0
def create_filesec(workspace, filelist):
    """Creates METS document element tree that contains fileSec element.
    """
    filegrp = mets.filegrp()
    filesec = mets.filesec(child_elements=[filegrp])

    create_filegrp(workspace, filegrp, filelist)

    mets_element = mets.mets(child_elements=[filesec])
    ET.cleanup_namespaces(mets_element)
    return ET.ElementTree(mets_element)
示例#3
0
def main(arguments=None):
    """The main method for compile_sturctmap"""
    args = parse_arguments(arguments)

    structmap = mets.structmap(type_attr=args.type_attr)
    mets_structmap = mets.mets(child_elements=[structmap])

    filegrp = mets.filegrp()
    filesec = mets.filesec(child_elements=[filegrp])
    mets_filesec = mets.mets(child_elements=[filesec])

    _, dmdsec_id = ids_for_files(args.workspace,
                                 None,
                                 'dmdsec.xml',
                                 dash_count=0)

    if args.dmdsec_struct == 'ead3':
        container_div = mets.div(type_attr='logical')
        structmap.append(container_div)
        create_ead3_structmap(args.dmdsec_loc, args.workspace, container_div,
                              filegrp, dmdsec_id)
    else:
        amdids = get_links_event_agent(args.workspace, None)
        container_div = mets.div(type_attr='directory',
                                 dmdid=dmdsec_id,
                                 admid=amdids)
        structmap.append(container_div)
        divs = div_structure(args.workspace)
        create_structmap(args.workspace, divs, container_div, filegrp)

    if args.stdout:
        print h.serialize(mets_filesec)
        print h.serialize(mets_structmap)

    output_sm_file = os.path.join(args.workspace, 'structmap.xml')
    output_fs_file = os.path.join(args.workspace, 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'w+') as outfile:
        outfile.write(h.serialize(mets_structmap))

    with open(output_fs_file, 'w+') as outfile:
        outfile.write(h.serialize(mets_filesec))

    print "compile_structmap created files: %s %s" % (output_sm_file,
                                                      output_fs_file)

    return 0
示例#4
0
def compile_structmap(workspace="./workspace/",
                      structmap_type=None,
                      root_type=None,
                      dmdsec_loc=None,
                      stdout=False):
    """Generate METS file section and structural map based on
    created/imported administrative metada and descriptive metadata.
    """
    filelist = get_objectlist(workspace)

    if structmap_type == 'EAD3-logical':
        # If structured descriptive metadata for structMap divs is used, also
        # the fileSec element (apparently?) is different. The
        # create_ead3_structmap function populates the fileGrp element.
        filegrp = mets.filegrp()
        filesec_element = mets.filesec(child_elements=[filegrp])
        filesec = mets.mets(child_elements=[filesec_element])

        structmap = create_ead3_structmap(dmdsec_loc, workspace, filegrp,
                                          filelist, structmap_type)
    else:
        filesec = create_filesec(workspace, filelist)
        structmap = create_structmap(workspace, filesec.getroot(), filelist,
                                     structmap_type, root_type)

    if stdout:
        print(xml_utils.serialize(filesec).decode("utf-8"))
        print(xml_utils.serialize(structmap).decode("utf-8"))

    output_sm_file = os.path.join(workspace, 'structmap.xml')
    output_fs_file = os.path.join(workspace, 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(structmap))

    with open(output_fs_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(filesec))

    print("compile_structmap created files: %s %s" %
          (output_sm_file, output_fs_file))
示例#5
0
def test_get_fileid():
    """Test get_fileid function. Create a fileGrp element with few files and
    test that the function finds correct file IDs.
    """

    # Create fileGrp element that contains three file elements with different
    # identifiers and paths
    files = [
        mets.file_elem(file_id='identifier%s' % num,
                       admid_elements=['foo', 'bar'],
                       loctype='foo',
                       xlink_href='file://path/to/file+name%s' % num,
                       xlink_type='foo') for num in range(3)
    ]

    filegrp = mets.filegrp(child_elements=files)

    assert compile_structmap.get_fileid(filegrp, 'path/to/file name1') \
        == 'identifier1'
def test_add_fptrs_div_ead(testpath, run_cli, hrefs, length, child_elem,
                           order):
    """Tests the add_fptrs_div_ead function by asserting that the c_div
    element has been modified with fptrs and divs correctly according to
    the test cases.
    """
    create_test_data(testpath, run_cli, order=order)
    div_elem = '<mets:div xmlns:mets="http://www.loc.gov/METS/"></mets:div>'

    xml = ET.fromstring(div_elem)
    attrs = {}
    attrs["all_amd_refs"] = read_md_references(
        testpath, "import-object-md-references.jsonl")
    attrs["object_refs"] = attrs["all_amd_refs"]
    attrs["workspace"] = testpath
    attrs["filelist"] = [
        'tests/data/structured/Publication files/publication.txt',
        'tests/data/structured/Software files/koodi.java'
    ]
    filegrp = filegrp = mets.filegrp()
    c_div = compile_structmap.add_fptrs_div_ead(xml, hrefs, filegrp, attrs)

    # Child elements are either new divs or fptrs
    assert c_div.xpath(
        './*')[0].tag == '{http://www.loc.gov/METS/}%s' % child_elem

    # Number of child elements should equal the number of valid hrefs
    assert len(c_div.xpath('./*')) == length

    # Number of fptr elements should equal the number of valid hrefs
    assert len(c_div.findall('.//{http://www.loc.gov/METS/}fptr')) == length

    # If file properties exist, it is written to the divs
    if order and length == 1:
        assert 'ORDER' in c_div.attrib
    elif order:
        assert 'ORDER' in c_div.xpath('./*')[0].attrib
        assert c_div.xpath('./*')[0].get('TYPE') == 'dao'
    else:
        assert 'ORDER' not in c_div.attrib
def create_filesec(**attributes):
    """
    Creates METS document element tree that contains fileSec element.

    :attributes: Attribute values as a dict
                 all_amd_refs: XML element tree of administrative metadata
                               references
                 filelist: Sorted list of digital objects (file paths)
    :returns: A tuple of METS XML Element tree including file section
              element and a dict of file paths and identifiers
    """
    attributes = get_reference_lists(**_attribute_values(attributes))
    filegrp = mets.filegrp()
    filesec = mets.filesec(child_elements=[filegrp])

    file_ids = {}
    for path in attributes["filelist"]:
        fileid = add_file_to_filesec(attributes["all_amd_refs"],
                                     attributes["object_refs"], path, filegrp)
        file_ids[path] = fileid

    mets_element = mets.mets(child_elements=[filesec])
    ET.cleanup_namespaces(mets_element)
    return (ET.ElementTree(mets_element), file_ids)