def compile_structmap(**kwargs):
    """Generate METS file section and structural map based on
    created/imported administrative metada and descriptive metadata.

    :kwargs: Given arguments:
             workspace: Workspace directory
             structmap_type: Type of structmap
             root_type: Type of root div
             dmdsec_loc: Location of structured descriptive metadata
             file_ids: Dict to be populated with file paths and IDs
             stdout: True to print output to stdout
    """
    attributes = _attribute_values(kwargs)

    # Create an event documenting the structmap creation
    _create_event(workspace=attributes["workspace"],
                  structmap_type=attributes["structmap_type"],
                  root_type=attributes["root_type"])

    # Get reference list only after the structmap creation event
    attributes = get_reference_lists(**attributes)

    if attributes["structmap_type"] == 'EAD3-logical':
        # If structured descriptive metadata for structMap divs is used, also
        # the fileSec element (apparently?) is different. The
        # create_ead3_structmap function populates the fileGrp element.
        filegrp = mets.filegrp()
        filesec_element = mets.filesec(child_elements=[filegrp])
        filesec = mets.mets(child_elements=[filesec_element])

        structmap = create_ead3_structmap(filegrp, attributes)
    else:
        (filesec, file_ids) = create_filesec(**attributes)

        # Add file path and ID dict to attributes
        attributes['file_ids'] = file_ids
        structmap = create_structmap(filesec.getroot(), **attributes)

    if attributes["stdout"]:
        print(xml_utils.serialize(filesec).decode("utf-8"))
        print(xml_utils.serialize(structmap).decode("utf-8"))

    output_sm_file = os.path.join(attributes["workspace"], 'structmap.xml')
    output_fs_file = os.path.join(attributes["workspace"], 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(structmap))

    with open(output_fs_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(filesec))

    print("compile_structmap created files: %s %s" %
          (output_sm_file, output_fs_file))
def main(arguments=None):
    """The main method for import_description"""
    args = parse_arguments(arguments)

    if args.dmdsec_target:
        url_t_path = encode_path(args.dmdsec_target, suffix='-dmdsec.xml')
    else:
        url_t_path = 'dmdsec.xml'

    with open(args.dmdsec_location, 'r') as content_file:
        content = content_file.read()

    _mets = mets.mets()

    tree = lxml.etree.fromstring(content)

    if args.desc_root == 'remove':
        childs = tree.findall('*')
    else:
        childs = [tree]
    xmldata_e = mets.xmldata(child_elements=childs)
    ns = h.get_namespace(childs[0])

    if ns in METS_MDTYPES.keys():
        mdt = METS_MDTYPES[ns]['mdtype']
        if 'othermdtype' in METS_MDTYPES[ns]:
            mdo = METS_MDTYPES[ns]['othermdtype']
        else:
            mdo = None
        mdv = METS_MDTYPES[ns]['version']
    else:
        raise TypeError("Invalid namespace: %s" % ns)

    mdwrap_e = mets.mdwrap(mdtype=mdt,
                           othermdtype=mdo,
                           mdtypeversion=mdv,
                           child_elements=[xmldata_e])
    dmdsec_e = mets.dmdsec(encode_id(url_t_path), child_elements=[mdwrap_e])

    _mets.append(dmdsec_e)

    if args.stdout:
        print h.serialize(_mets)

    output_file = os.path.join(args.workspace, url_t_path)
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "import_description created file: %s" % output_file

    return 0
示例#3
0
def compile_mets(**kwargs):
    """
    Merge partial METS documents in workspace directory into
    one METS document.

    :kwargs: Given arguments:
             mets_profile: METS profile (mandatory)
             organization_name: Creator name (mandatory)
             contractid: Contract ID (mandatory)
             objid: Unique identifier for the package
             contentid: Identifier of the content
             create_date: Package creation date
             last_moddate: Last modification date
             workspace: Workspace path
             base_path: Base path of the digital objects
             record_status: Record status
             label: Short description about the package
             clean: True for cleaning the workspace from temporary files
             copy_files: True copies the digital objects from base_path to
                         workspace
             stdout: True prints the output to stdout
             packagingservice: Packaging service specific parameter
    """
    attributes = _attribute_values(kwargs, True)

    mets_document = create_mets(**attributes)

    if attributes["stdout"]:
        print(xml_utils.serialize(mets_document.getroot()))

    output_file = os.path.join(attributes["workspace"], 'mets.xml')

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(mets_document.getroot()))

    print("compile_mets created file: %s" % output_file)

    if attributes["copy_files"]:
        copy_objects(attributes["workspace"], attributes["base_path"])
        print("compile_mets copied objects from %s to "
              "workspace" % attributes["base_path"])

    if attributes["clean"]:
        clean_metsparts(attributes["workspace"])
        print("compile_mets cleaned work files from workspace.")
def test_serialize():
    """test serialize"""
    xml = '<a:x xmlns:a="b"><a:y/></a:x>'
    ser_xml = (b'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n'
               b'<a:x xmlns:a="b">\n  <a:y/>\n</a:x>\n')
    result = u.serialize(ET.fromstring(xml))
    assert result == ser_xml
示例#5
0
def compile_structmap(workspace="./workspace/",
                      structmap_type=None,
                      root_type=None,
                      dmdsec_loc=None,
                      stdout=False):
    """Generate METS file section and structural map based on
    created/imported administrative metada and descriptive metadata.
    """
    filelist = get_objectlist(workspace)

    if structmap_type == 'EAD3-logical':
        # If structured descriptive metadata for structMap divs is used, also
        # the fileSec element (apparently?) is different. The
        # create_ead3_structmap function populates the fileGrp element.
        filegrp = mets.filegrp()
        filesec_element = mets.filesec(child_elements=[filegrp])
        filesec = mets.mets(child_elements=[filesec_element])

        structmap = create_ead3_structmap(dmdsec_loc, workspace, filegrp,
                                          filelist, structmap_type)
    else:
        filesec = create_filesec(workspace, filelist)
        structmap = create_structmap(workspace, filesec.getroot(), filelist,
                                     structmap_type, root_type)

    if stdout:
        print(xml_utils.serialize(filesec).decode("utf-8"))
        print(xml_utils.serialize(structmap).decode("utf-8"))

    output_sm_file = os.path.join(workspace, 'structmap.xml')
    output_fs_file = os.path.join(workspace, 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(structmap))

    with open(output_fs_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(filesec))

    print("compile_structmap created files: %s %s" %
          (output_sm_file, output_fs_file))
示例#6
0
def main(arguments=None):
    """The main method for argparser"""
    args = parse_arguments(arguments)

    # Loop files and create premis objects
    files = collect_filepaths(dirs=args.files, base=args.base_path)
    for filename in files:
        if args.base_path != '':
            filerel = os.path.relpath(filename, args.base_path)
        else:
            filerel = filename

        xmldata = mets.xmldata()
        premis_object = create_premis_object(
            xmldata, filename, args.skip_inspection, args.format_name,
            args.format_version, args.digest_algorithm, args.message_digest,
            args.date_created, args.charset)

        mdwrap = mets.mdwrap('PREMIS:OBJECT', '2.3', child_elements=[xmldata])
        techmd = mets.techmd(encode_id(
            encode_path(filerel, suffix="-techmd.xml")),
                             child_elements=[mdwrap])
        amdsec = mets.amdsec(child_elements=[techmd])
        _mets = mets.mets(child_elements=[amdsec])

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(args.workspace):
            os.makedirs(args.workspace)

        filename = encode_path(filerel, suffix="-techmd.xml")

        with open(os.path.join(args.workspace, filename), 'w+') as outfile:
            outfile.write(h.serialize(_mets))
            print "Wrote METS technical metadata to file %s" % outfile.name

    return 0
示例#7
0
def main(arguments=None):
    """The main method for compile_sturctmap"""
    args = parse_arguments(arguments)

    structmap = mets.structmap(type_attr=args.type_attr)
    mets_structmap = mets.mets(child_elements=[structmap])

    filegrp = mets.filegrp()
    filesec = mets.filesec(child_elements=[filegrp])
    mets_filesec = mets.mets(child_elements=[filesec])

    _, dmdsec_id = ids_for_files(args.workspace,
                                 None,
                                 'dmdsec.xml',
                                 dash_count=0)

    if args.dmdsec_struct == 'ead3':
        container_div = mets.div(type_attr='logical')
        structmap.append(container_div)
        create_ead3_structmap(args.dmdsec_loc, args.workspace, container_div,
                              filegrp, dmdsec_id)
    else:
        amdids = get_links_event_agent(args.workspace, None)
        container_div = mets.div(type_attr='directory',
                                 dmdid=dmdsec_id,
                                 admid=amdids)
        structmap.append(container_div)
        divs = div_structure(args.workspace)
        create_structmap(args.workspace, divs, container_div, filegrp)

    if args.stdout:
        print h.serialize(mets_filesec)
        print h.serialize(mets_structmap)

    output_sm_file = os.path.join(args.workspace, 'structmap.xml')
    output_fs_file = os.path.join(args.workspace, 'filesec.xml')

    if not os.path.exists(os.path.dirname(output_sm_file)):
        os.makedirs(os.path.dirname(output_sm_file))

    if not os.path.exists(os.path.dirname(output_fs_file)):
        os.makedirs(os.path.dirname(output_fs_file))

    with open(output_sm_file, 'w+') as outfile:
        outfile.write(h.serialize(mets_structmap))

    with open(output_fs_file, 'w+') as outfile:
        outfile.write(h.serialize(mets_filesec))

    print "compile_structmap created files: %s %s" % (output_sm_file,
                                                      output_fs_file)

    return 0
def test_construct_catalog_xml(tmpdir, rewrite_rules, next_catalogs):
    """Tests that the catalog has been constructed correctly."""
    filename = tmpdir.mkdir('test').join('foo.xml')
    base_dir = tmpdir.mkdir('base_catalog')
    catalog = construct_catalog_xml(base_path=base_dir.strpath,
                                    rewrite_rules=rewrite_rules,
                                    next_catalogs=next_catalogs)
    with open(filename.strpath, 'wb') as in_file:
        in_file.write(serialize(catalog))

    with open(filename.strpath, 'rb') as out_file:
        tree = ET.fromstring(out_file.read())

    for key in tree.attrib:
        if key.endswith('base'):
            assert tree.attrib[key].rstrip('/') == base_dir.strpath

    assert len(tree) == len(rewrite_rules) + len(next_catalogs)

    # Ensure that the keys and values of the input dict are text (unless it
    # is None), so that we can compare the input with the output
    decoded_rules = None
    if rewrite_rules:
        decoded_rules = dict([(ensure_text(k), ensure_text(v))
                              for k, v in rewrite_rules.items()])

    for element in tree:
        if 'rewriteURI' in element.tag:
            assert element.attrib['rewritePrefix'] == decoded_rules[
                element.attrib['uriStartString']]
            # Remove the entry from the parameter to signify that we've
            # evaluated it.
            del decoded_rules[element.attrib['uriStartString']]
        if 'nextCatalog' in element.tag:
            assert element.attrib['catalog'] in next_catalogs
            # Remove the entry from the parameter to signify that we've
            # evaluated it.
            next_catalogs.remove(element.attrib['catalog'])

    # These two parameters have to be Falsey at the end of the test.
    assert not decoded_rules
    assert not next_catalogs
    def run(self):
        """Create a METS document that contains logical structural map.

        Logical structural map is based on dataset metadata retrieved
        from Metax.

        :returns: ``None``
        """
        # Read the generated physical structmap from file
        physical_structmap = ET.parse(
            os.path.join(self.sip_creation_path, 'structmap.xml'))

        # Get dmdsec id from physical_structmap
        dmdsec_id = physical_structmap.getroot()[0][0].attrib['DMDID']

        # Get provenance id's
        provenance_ids = self.get_provenance_ids()

        # Init logical structmap
        logical_structmap = mets.structmap(type_attr='Fairdata-logical')
        mets_structmap = mets.mets(child_elements=[logical_structmap])

        # Create logical structmap
        categories = self.find_file_categories()
        wrapper_div = mets.div(type_attr='logical',
                               dmdid=[dmdsec_id],
                               admid=provenance_ids)
        for category in categories:
            div = mets.div(type_attr=category)
            for filename in categories.get(category):
                fileid = self.get_fileid(encode_path(filename, safe='/'))
                div.append(mets.fptr(fileid))
            wrapper_div.append(div)
        logical_structmap.append(wrapper_div)

        with self.output().open('wb') as output:
            output.write(h.serialize(mets_structmap))
示例#10
0
def main(arguments=None):
    """The main method for premis_event"""
    args = parse_arguments(arguments)

    if args.agent_name:

        _mets = mets.mets()
        amdsec = mets.amdsec()
        _mets.append(amdsec)

        if args.event_target:
            agent_id = encode_id(
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-%s-agent.xml' %
                            (args.event_target, args.event_type)))
        else:
            agent_id = encode_id(
                encode_path('%s-agent.xml' % (args.event_type)))
            output_file = os.path.join(
                args.workspace,
                encode_path('%s-agent.xml' % (args.event_type)))
        linking_agent_identifier = create_premis_agent(amdsec, agent_id,
                                                       args.agent_name,
                                                       args.agent_type)

        if args.stdout:
            print h.serialize(_mets)

        if not os.path.exists(os.path.dirname(output_file)):
            os.makedirs(os.path.dirname(output_file))

        with open(output_file, 'w+') as outfile:
            outfile.write(h.serialize(_mets))

        print "premis_event created file: %s" % output_file

    else:
        linking_agent_identifier = None

    # Create event
    _mets = mets.mets()
    amdsec = mets.amdsec()
    _mets.append(amdsec)

    if args.event_target:
        event_id = encode_id(
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
        output_file = os.path.join(
            args.workspace,
            encode_path('%s-%s-event.xml' %
                        (args.event_target, args.event_type)))
    else:
        event_id = encode_id(encode_path('%s-event.xml' % (args.event_type)))
        output_file = os.path.join(
            args.workspace, encode_path('%s-event.xml' % (args.event_type)))

    create_premis_event(amdsec, args.event_type, args.event_datetime,
                        args.event_detail, args.event_outcome,
                        args.event_outcome_detail, linking_agent_identifier,
                        event_id)

    if args.stdout:
        print h.serialize(_mets)

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "premis_event created file: %s" % output_file

    return 0
示例#11
0
def main(arguments=None):
    """The main method
    """
    args = parse_arguments(arguments)

    # Create mets header
    _mets = mets.mets(METS_PROFILE[args.mets_profile],
                      objid=args.objid,
                      label=args.label,
                      namespaces=NAMESPACES)
    _mets = mets_extend(_mets, METS_CATALOG, METS_SPECIFICATION,
                        args.contentid, args.contractid)
    # Create list of additional agent elements if packagingservice is defined
    _agents = [mets.agent(args.organization_name)]
    if args.packagingservice:
        _agents.append(
            mets.agent(args.organization_name, agent_role='ARCHIVIST'))
        _agents.append(
            mets.agent(args.packagingservice,
                       agent_type='OTHER',
                       agent_role='CREATOR',
                       othertype='SOFTWARE'))
    _metshdr = mets.metshdr(args.create_date, args.last_moddate,
                            args.record_status, _agents)
    _mets.append(_metshdr)

    # Collect elements from workspace XML files
    elements = []
    for entry in scandir(args.workspace):
        if entry.name.endswith(
            ('-techmd.xml', '-agent.xml', '-event.xml', 'dmdsec.xml',
             'structmap.xml', 'filesec.xml', 'rightsmd.xml',
             '-othermd.xml')) and entry.is_file():
            element = lxml.etree.parse(entry.path).getroot()[0]
            elements.append(element)

    elements = mets.merge_elements('{%s}amdSec' % NAMESPACES['mets'], elements)
    elements.sort(key=mets.order)

    for element in elements:
        _mets.append(element)

    if args.stdout:
        print h.serialize(_mets)

    output_file = os.path.join(args.workspace, 'mets.xml')

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'w+') as outfile:
        outfile.write(h.serialize(_mets))

    print "compile_mets created file: %s" % output_file

    if args.copy_files:
        copy_files(args.workspace, args.base_path)
        print "compile_mets copied objects from %s to workspace" % \
            args.base_path

    if args.clean:
        clean_metsparts(args.workspace)
        print "compile_mets cleaned work files from workspace"

    return 0
示例#12
0
def compile_mets(mets_profile,
                 organization_name,
                 contractid,
                 objid=None,
                 label=None,
                 contentid=None,
                 create_date=None,
                 last_moddate=None,
                 record_status="submission",
                 workspace="./workspace",
                 clean=False,
                 copy_files=False,
                 base_path=".",
                 stdout=False,
                 packagingservice=None):
    """Merge partial METS documents in workspace directory into
    one METS document."""
    contract = "urn:uuid:%s" % contractid

    if not objid:
        objid = six.text_type(uuid.uuid4())

    if not create_date:
        create_date = datetime.datetime.utcnow().isoformat()

    mets_document = create_mets(workspace,
                                mets_attributes={
                                    'PROFILE': mets_profile,
                                    'OBJID': objid,
                                    'LABEL': label,
                                    "CONTENTID": contentid,
                                    "CONTRACTID": contract
                                },
                                metshdr_attributes={
                                    "CREATEDATE": create_date,
                                    "LASTMODDATE": last_moddate,
                                    "RECORDSTATUS": record_status
                                },
                                organization=organization_name,
                                packagingservice=packagingservice)

    if stdout:
        print(xml_utils.serialize(mets_document.getroot()))

    output_file = os.path.join(workspace, 'mets.xml')

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, 'wb+') as outfile:
        outfile.write(xml_utils.serialize(mets_document.getroot()))

    print("compile_mets created file: %s" % output_file)

    if copy_files:
        copy_objects(workspace, base_path)
        print("compile_mets copied objects from %s to workspace" % base_path)

    if clean:
        clean_metsparts(workspace)
        print("compile_mets cleaned work files from workspace")
示例#13
0
    def run(self):
        """Create structural map.

        Creates METS fileSec element based on contents of
        `sip-in-progress` directory and writes it to METS document
        `filesec.xml`. FileSec element is used to create physical
        structure map which is written to METS document `structmap.xml`.

        :returns: ``None``
        """
        # Merge premis event reference files
        md_ids = []
        for input_target in ('create_provenance_information',
                             'create_descriptive_metadata',
                             'create_technical_metadata'):
            md_ids += (read_md_references(
                self.workspace,
                self.input()[input_target].path)['.']['md_ids'])
        with open(os.path.join(self.sip_creation_path,
                               'premis-event-md-references.jsonl'), 'w') \
                as references:
            references.write(
                json.dumps({
                    ".": {
                        "path_type": "directory",
                        "streams": {},
                        "md_ids": md_ids
                    }
                }))

        # Setup required reference list and supplementary files information.
        (all_amd_refs, all_dmd_refs, object_refs, filelist,
         file_properties) = get_reference_lists(
             workspace=self.sip_creation_path)
        (supplementary_files, supplementary_types) = iter_supplementary(
            file_properties=file_properties)

        # Create fileSec
        (filesec, file_ids) = compile_structmap.create_filesec(
            all_amd_refs=all_amd_refs,
            object_refs=object_refs,
            file_properties=file_properties,
            supplementary_files=supplementary_files,
            supplementary_types=supplementary_types)
        with self.output()[0].open('wb') as filesecxml:
            filesecxml.write(serialize(filesec))

        # Create physical structmap
        structmap = compile_structmap.create_structmap(
            filesec=filesec,
            structmap_type='Fairdata-physical',
            file_ids=file_ids,
            all_amd_refs=all_amd_refs,
            all_dmd_refs=all_dmd_refs,
            filelist=filelist,
            supplementary_files=supplementary_files,
            supplementary_types=supplementary_types,
            file_properties=file_properties,
            workspace=self.sip_creation_path)
        with self.output()[1].open('wb') as structmapxml:
            structmap.write(structmapxml,
                            pretty_print=True,
                            xml_declaration=True,
                            encoding='UTF-8')