def test_get_dublincore_none_found(self):
        """It should not create a Dublin Core element if no info found."""
        sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'

        dc_elem = archivematicaCreateMETS2.getDublinCore(
            self.siptypeuuid, sipuuid)
        assert dc_elem is None
Exemplo n.º 2
0
def update_dublincore(mets, sip_uuid):
    """
    Add new dmdSec for updated Dublin Core info relating to entire SIP.

    Case: No DC in DB, no DC in METS: Do nothing
    Case: No DC in DB, DC in METS: Mark as deleted.
    Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing
    Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC
    Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated
    """

    # Check for DC in DB with METADATA_STATUS_UPDATED or METADATA_STATUS_ORIGINAL
    untouched = models.DublinCore.objects.filter(
        metadataappliestoidentifier=sip_uuid,
        metadataappliestotype_id=createmets2.SIPMetadataAppliesToType,
        status=models.METADATA_STATUS_REINGEST).exists()
    if untouched:
        # No new or updated DC found - return early
        print('No updated or new DC metadata found')
        return mets

    # Get structMap element related to SIP DC info
    objects_div = mets.get_file(label='objects', type='Directory')
    print('Existing dmdIds for DC metadata:', objects_div.dmdids)

    # Create element
    dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType,
                                        sip_uuid)

    if dc_elem is None:
        if objects_div.dmdsecs:
            print('DC metadata was deleted')
            # Create 'deleted' DC element
            dc_elem = etree.Element(ns.dctermsBNS + "dublincore",
                                    nsmap={
                                        "dcterms": ns.dctermsNS,
                                        'dc': ns.dcNS
                                    })
            dc_elem.set(
                ns.xsiBNS + "schemaLocation", ns.dctermsNS +
                " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd"
            )
        else:
            # No new or updated DC found - return early
            print('No updated or new DC metadata found')
            return mets
    dmdsec = objects_div.add_dublin_core(dc_elem)
    print('Adding new DC in dmdSec with ID', dmdsec.id_string())
    if len(objects_div.dmdsecs) > 1:
        objects_div.dmdsecs[-2].replace_with(dmdsec)

    return mets
Exemplo n.º 3
0
    def test_get_dublincore(self):
        sipuuid = '8b891d7c-5bd2-4249-84a1-2f00f725b981'
        siptypeuuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761'

        # Generate DC element from DB
        dc_elem = archivematicaCreateMETS2.getDublinCore(siptypeuuid, sipuuid)

        # Verify created correctly
        assert dc_elem
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 15
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
        assert dc_elem[0].text == 'Yamani Weapons'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator'
        assert dc_elem[1].text == 'Keladry of Mindelan'
        assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject'
        assert dc_elem[2].text == 'Glaives'
        assert dc_elem[
            3].tag == '{http://purl.org/dc/elements/1.1/}description'
        assert dc_elem[3].text == 'Glaives are cool'
        assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher'
        assert dc_elem[4].text == 'Tortall Press'
        assert dc_elem[
            5].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[5].text == 'Yuki'
        assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date'
        assert dc_elem[6].text == '2015'
        assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type'
        assert dc_elem[7].text == 'Archival Information Package'
        assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format'
        assert dc_elem[8].text == 'parchement'
        assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier'
        assert dc_elem[9].text == '42/1'
        assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source'
        assert dc_elem[10].text == "Numair's library"
        assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation'
        assert dc_elem[11].text == 'None'
        assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language'
        assert dc_elem[12].text == 'en'
        assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights'
        assert dc_elem[13].text == 'Public Domain'
        assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf'
        assert dc_elem[14].text == 'AIC#42'
    def test_get_dublincore(self):
        """It should create a Dublin Core element from the database info."""
        # Generate DC element from DB
        dc_elem = archivematicaCreateMETS2.getDublinCore(
            self.siptypeuuid, self.sipuuid)

        # Verify created correctly
        assert dc_elem is not None
        assert dc_elem.tag == '{http://purl.org/dc/terms/}dublincore'
        assert len(dc_elem) == 15
        assert dc_elem[0].tag == '{http://purl.org/dc/elements/1.1/}title'
        assert dc_elem[0].text == 'Yamani Weapons'
        assert dc_elem[1].tag == '{http://purl.org/dc/elements/1.1/}creator'
        assert dc_elem[1].text == 'Keladry of Mindelan'
        assert dc_elem[2].tag == '{http://purl.org/dc/elements/1.1/}subject'
        assert dc_elem[2].text == 'Glaives'
        assert dc_elem[
            3].tag == '{http://purl.org/dc/elements/1.1/}description'
        assert dc_elem[3].text == 'Glaives are cool'
        assert dc_elem[4].tag == '{http://purl.org/dc/elements/1.1/}publisher'
        assert dc_elem[4].text == 'Tortall Press'
        assert dc_elem[
            5].tag == '{http://purl.org/dc/elements/1.1/}contributor'
        assert dc_elem[5].text == 'Yuki'
        assert dc_elem[6].tag == '{http://purl.org/dc/elements/1.1/}date'
        assert dc_elem[6].text == '2015'
        assert dc_elem[7].tag == '{http://purl.org/dc/elements/1.1/}type'
        assert dc_elem[7].text == 'Archival Information Package'
        assert dc_elem[8].tag == '{http://purl.org/dc/elements/1.1/}format'
        assert dc_elem[8].text == 'parchement'
        assert dc_elem[9].tag == '{http://purl.org/dc/elements/1.1/}identifier'
        assert dc_elem[9].text == '42/1'
        assert dc_elem[10].tag == '{http://purl.org/dc/elements/1.1/}source'
        assert dc_elem[10].text == "Numair's library"
        assert dc_elem[11].tag == '{http://purl.org/dc/elements/1.1/}relation'
        assert dc_elem[11].text == 'None'
        assert dc_elem[12].tag == '{http://purl.org/dc/elements/1.1/}language'
        assert dc_elem[12].text == 'en'
        assert dc_elem[13].tag == '{http://purl.org/dc/elements/1.1/}rights'
        assert dc_elem[13].text == 'Public Domain'
        assert dc_elem[14].tag == '{http://purl.org/dc/terms/}isPartOf'
        assert dc_elem[14].text == 'AIC#42'
    def test_get_dublincore_none_found(self):
        sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'

        dc_elem = archivematicaCreateMETS2.getDublinCore(self.siptypeuuid, sipuuid)
        assert dc_elem is None
Exemplo n.º 6
0
def create_mets_file(aic, aips):
    """ Create AIC METS file with AIP information. """

    # Prepare constants
    nsmap = {
        'mets': ns.metsNS,
        'xlink': ns.xlinkNS,
        'xsi': ns.xsiNS,
    }
    now = timezone.now().strftime("%Y-%m-%dT%H:%M:%S")

    # Set up structure
    E = ElementMaker(namespace=ns.metsNS, nsmap=nsmap)
    mets = (
        E.mets(
            E.metsHdr(CREATEDATE=now),
            E.dmdSec(
                E.mdWrap(
                    E.xmlData(),
                    MDTYPE="DC",  # mdWrap
                ),
                ID='dmdSec_1',  # dmdSec
            ),
            E.fileSec(E.fileGrp(), ),
            E.structMap(
                E.div(
                    TYPE="Archival Information Collection",
                    DMDID="dmdSec_1",
                ),
                TYPE='logical',  # structMap
            ),
        ))
    mets.attrib['{{{ns}}}schemaLocation'.format(
        ns=nsmap['xsi']
    )] = "http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version18/mets.xsd"

    # Add Dublin Core info
    xml_data = mets.find('mets:dmdSec/mets:mdWrap/mets:xmlData',
                         namespaces=ns.NSMAP)
    dublincore = archivematicaCreateMETS2.getDublinCore(
        archivematicaCreateMETS2.SIPMetadataAppliesToType, aic['uuid'])
    # Add <extent> with number of AIPs
    extent = etree.SubElement(dublincore, ns.dctermsBNS + 'extent')
    extent.text = "{} AIPs".format(len(aips))
    xml_data.append(dublincore)

    # Add elements for each AIP
    file_grp = mets.find('mets:fileSec/mets:fileGrp', namespaces=ns.NSMAP)
    struct_div = mets.find('mets:structMap/mets:div', namespaces=ns.NSMAP)
    for aip in aips:
        file_id = '{name}-{uuid}'.format(name=aip['name'], uuid=aip['uuid'])
        etree.SubElement(file_grp, ns.metsBNS + 'file', ID=file_id)

        label = aip['label'] or aip['name']
        div = etree.SubElement(struct_div, ns.metsBNS + 'div', LABEL=label)
        etree.SubElement(div, ns.metsBNS + 'fptr', FILEID=file_id)

    print etree.tostring(mets, pretty_print=True)

    # Write out the file
    file_uuid = str(uuid.uuid4())
    basename = os.path.join('metadata', "METS.{}.xml".format(file_uuid))
    filename = os.path.join(aic['dir'], basename)
    with open(filename, 'w') as f:
        f.write(etree.tostring(mets, pretty_print=True))
    fileOperations.addFileToSIP(
        filePathRelativeToSIP='%SIPDirectory%' + basename,
        fileUUID=file_uuid,
        sipUUID=aic['uuid'],
        taskUUID=str(uuid.uuid4()),  # Unsure what should go here
        date=now,
        sourceType="aip creation",
        use='metadata')
    # To make this work with the createMETS2 (for SIPs)
    databaseFunctions.insertIntoDerivations(file_uuid, file_uuid)

    # Insert the count of AIPs in the AIC into UnitVariables, so it can be
    # indexed later
    UnitVariable.objects.create(unittype="SIP",
                                unituuid=aic['uuid'],
                                variable="AIPsinAIC",
                                variablevalue=str(len(aips)))
Exemplo n.º 7
0
def update_dublincore(root, sip_uuid, now):
    """
    Add new dmdSec for updated Dublin Core info relating to entire SIP.

    Case: No DC in DB: Do nothing
    Case: DC in DB is untouched (METADATA_STATUS_REINGEST): Do nothing
    Case: New DC in DB with METADATA_STATUS_ORIGINAL: Add new DC
    Case: DC in DB with METADATA_STATUS_UPDATED: mark old, create updated
    """

    # Check for DC in DB with METADATA_STATUS_UPDATED
    updated = models.DublinCore.objects.filter(
        metadataappliestoidentifier=sip_uuid,
        metadataappliestotype_id=createmets2.SIPMetadataAppliesToType,
        status=models.METADATA_STATUS_UPDATED).exists()

    # If no updated DC, check for a newly added DC
    if not updated:
        new = models.DublinCore.objects.filter(
            metadataappliestoidentifier=sip_uuid,
            metadataappliestotype_id=createmets2.SIPMetadataAppliesToType,
            status=models.METADATA_STATUS_ORIGINAL).exists()
        if new:
            updated = False
        else:
            # No new or updated DC found - return early
            print('No updated or new DC metadata found')
            return root

    print('DC form metadata was updated:', updated)

    # Get structMap element related to SIP DC info
    objects_div = root.find(
        'mets:structMap/mets:div[@TYPE="Directory"]/mets:div[@TYPE="Directory"][@LABEL="objects"]',
        namespaces=ns.NSMAP)
    ids = objects_div.get('DMDID', '')
    print('Existing dmdIds for DC metadata:', ids)

    # Create element
    dc_elem = createmets2.getDublinCore(createmets2.SIPMetadataAppliesToType,
                                        sip_uuid)
    count_dmdsecs = int(root.xpath('count(mets:dmdSec)', namespaces=ns.NSMAP))
    dmdid = "dmdSec_%d" % (count_dmdsecs + 1
                           )  # DMDID should be larger than any existing one
    dmd_sec = etree.Element(ns.metsBNS + "dmdSec", ID=dmdid, CREATED=now)
    print('Adding new DC in dmdSec with ID', dmdid)
    if updated:
        dmd_sec.set('STATUS', 'updated')
        # Update old DC
        # Get dmdSecs associated with the SIP
        search_ids = ' or '.join(['@ID="%s"' % x for x in ids.split()])
        dmdsecs = root.xpath('mets:dmdSec[%s]' % search_ids,
                             namespaces=ns.NSMAP)
        # Set status=original if none
        for d in dmdsecs:
            # If no status (not updated), set to original
            status = d.get('STATUS')
            if not status:
                print(d.get('ID'), 'status set to original')
                d.set('STATUS', 'original')

    mdWrap = etree.SubElement(dmd_sec, ns.metsBNS + "mdWrap", MDTYPE="DC")
    xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData")
    xmlData.append(dc_elem)

    # Append to document
    try:
        add_after = root.findall('mets:dmdSec', namespaces=ns.NSMAP)[-1]
    except IndexError:
        add_after = root.find('mets:metsHdr', namespaces=ns.NSMAP)
    add_after.addnext(dmd_sec)

    # Update structMap
    ids = ids + ' ' + dmdid if ids else dmdid
    objects_div.set('DMDID', ids)

    return root
Exemplo n.º 8
0
    def test_get_dublincore_none_found(self):
        sipuuid = 'dnednedn-5bd2-4249-84a1-2f00f725b981'
        siptypeuuid = '3e48343d-e2d2-4956-aaa3-b54d26eb9761'

        dc_elem = archivematicaCreateMETS2.getDublinCore(siptypeuuid, sipuuid)
        assert dc_elem is None