def event(event_id, event_type, event_date_time, event_detail, child_elements=None, linking_objects=None, linking_agents=None): """Create PREMIS event element. :event_id: PREMIS event identifier :event_type: Type for the event :event_date_time: Event time :event_detail: Event details :child_elements: Any child elements appended to the event (default=None) :linking_objects: Any linking objects appended to the event (default=None) Returns the following ElementTree structure:: <premis:event> <premis:eventType>digital signature validation</premis:eventType> <premis:eventDateTime>2015-02-03T13:04:25</premis:eventDateTime> <premis:eventDetail> Submission information package digital signature validation </premis:eventDetail> {{ child elements }} </premis:event> """ _event = _element('event') _event.append(event_id) _event_type = _subelement(_event, 'eventType') _event_type.text = decode_utf8(event_type) _event_date_time = _subelement(_event, 'eventDateTime') _event_date_time.text = decode_utf8(event_date_time) _event_detail = _subelement(_event, 'eventDetail') _event_detail.text = decode_utf8(event_detail) if child_elements: for elem in child_elements: _event.append(elem) if linking_agents: for _agent in linking_agents: linking_agent = identifier( _agent.findtext('.//' + premis_ns('agentIdentifierType')), _agent.findtext('.//' + premis_ns('agentIdentifierValue')), 'linkingAgent') _event.append(linking_agent) if linking_objects: for _object in linking_objects: linking_object = identifier( _object.findtext('.//' + premis_ns('objectIdentifierType')), _object.findtext('.//' + premis_ns('objectIdentifierValue')), 'linkingObject') _event.append(linking_object) return _event
def test_decode_utf8(): """ Test that byte strings are decoded to Unicode, while Unicode strings are returned as is """ assert u.decode_utf8(b't\xc3\xa4hti') == "tähti" assert u.decode_utf8("tähti") == "tähti"
def delimfileformat(recordseparator, fieldseparatingchar, quotingchar=None): """Creates the ADDML delimFileFormat section. :recordseparator: the charcter separating the records :fieldseparatingchar: the character separating the fields :quotingchar: the quoting character used around the fields (default=None) Returns the following lxml.etree strucure: <addml:delimFileFormat> <addml:recordSeparator>CR+LF</addml:recordSeparator> <addml:fieldSeparatingChar>;</addml:fieldSeparatingChar> <addml:quotingChar>'</addml:quotingChar> </addml:delimFileFormat> """ delimfileformat_el = _element('delimFileFormat') recordseparator_el = _subelement(delimfileformat_el, 'recordSeparator') recordseparator_el.text = h.decode_utf8(recordseparator) fieldseparatingchar_el = _subelement(delimfileformat_el, 'fieldSeparatingChar') fieldseparatingchar_el.text = h.decode_utf8(fieldseparatingchar) if quotingchar: quotingchar_el = _subelement(delimfileformat_el, 'quotingChar') quotingchar_el.text = h.decode_utf8(quotingchar) return delimfileformat_el
def parse_element_with_id(root, identifier, section=None): """Return single element with given ID from given section. If no section is given, ID is searched from everywhere in the METS, which is extremely slow if file is large. ID is single unqiue reference to one of the following elements:: <techMD>, <sourceMD>, <rightsMD>, <digiprovMD> :root: Root element :identifier: ID as string :returns: References element """ identifier = decode_utf8(identifier) if section: section = decode_utf8(section) if section == "amdSec": query = "/mets:mets/mets:amdSec/*[@ID='{}']".format(identifier) elif section == "dmdSec": query = "/mets:mets/mets:dmdSec[@ID='{}']".format(identifier) elif section == "fileSec": query = "/mets:mets/mets:fileSec/mets:fileGrp/" "mets:file[@ID='{}']".format(identifier) else: query = "//*[@ID='%s']" % identifier results = root.xpath(query, namespaces=NAMESPACES) if len(results) == 1: return results[0] else: return None
def mets(profile='local', objid=None, label=None, namespaces=None, child_elements=None): """Create METS Element""" if namespaces is None: namespaces = NAMESPACES if objid is None: objid = six.text_type(uuid.uuid4()) _mets = _element('mets', ns=namespaces) _mets.set( xsi_ns('schemaLocation'), 'http://www.loc.gov/METS/ ' 'http://www.loc.gov/standards/mets/mets.xsd') _mets.set('PROFILE', decode_utf8(profile)) _mets.set('OBJID', decode_utf8(objid)) if label: _mets.set('LABEL', decode_utf8(label)) if child_elements: for elem in child_elements: _mets.append(elem) return _mets
def environment(characteristic=None, purposes=None, notes=None, child_elements=None): """Return the PREMIS environment structure. :param characteristic: PREMIS environment characteristic as a string :param purposes: A list of environment purposes to be appended :param notes: A list of environment notes to be appended :param child_elements: A list of child elements to be appended :returns: ElementTree DOM tree """ _environment = _element('environment') if characteristic: char_elem = _subelement(_environment, 'environmentCharacteristic') char_elem.text = decode_utf8(characteristic) if purposes: for purpose in purposes: purpose_elem = _subelement(_environment, 'environmentPurpose') purpose_elem.text = decode_utf8(purpose) if notes: for note in notes: note_elem = _subelement(_environment, 'environmentNote') note_elem.text = decode_utf8(note) if child_elements: for elem in child_elements: _environment.append(elem) return _environment
def mptr(loctype=None, xlink_href=None, xlink_type=None): """Return the fptr element""" _mptr = _element('mptr', ns={'xlink': XLINK_NS}) _mptr.set('LOCTYPE', decode_utf8(loctype)) _mptr.set(xlink_ns('href'), decode_utf8(xlink_href)) _mptr.set(xlink_ns('type'), decode_utf8(xlink_type)) return _mptr
def structmap(type_attr=None, label=None): """Return the structmap element""" _structmap = _element('structMap') # _structMap.append(div_element) if type_attr: _structmap.set('TYPE', decode_utf8(type_attr)) if label: _structmap.set('LABEL', decode_utf8(label)) return _structmap
def format_registry(registry_name, registry_key): """ :param registry_name: :param registry_key: :return: Element object for format registry. """ format_registry_el = _element('formatRegistry') registry_name_el = _subelement(format_registry_el, 'formatRegistryName') registry_name_el.text = decode_utf8(registry_name) registry_key_el = _subelement(format_registry_el, 'formatRegistryKey') registry_key_el.text = decode_utf8(registry_key) return format_registry_el
def outcome(outcome, detail_note=None, detail_extension=None, single_extension_element=False): """Create PREMIS event outcome DOM structure. :outcome: Event outcome (success, failure) :detail_note: String description for the event outcome :detail_extension: List of detail extension etree elements :single_extension_element: True: all element trees in detail_extension are placed in a single eventOutcomeDetailExtension element. False: each element tree in detail_extension is placed in a separate eventOutcomeDetailExtension element. Returns the following ElementTree structure:: <premis:eventOutcomeInformation> <premis:eventOutcome>success</premis:eventOutcome> <premis:eventOutcomeDetail> <premis:eventOutcomeDetailNote> mets.xml sha1 4d0c38dedcb5e5fc93586cfa2b7ebedbd63 OK </premis:eventOutcomeDetailNote> </premis:eventOutcomeDetail> </premis:eventOutcomeInformation> """ outcome_information = _element('eventOutcomeInformation') _outcome = _subelement(outcome_information, 'eventOutcome') _outcome.text = decode_utf8(outcome) detail = _subelement(outcome_information, 'eventOutcomeDetail') if detail_note is not None: _detail_note = _subelement(detail, 'eventOutcomeDetailNote') _detail_note.text = decode_utf8(detail_note) if detail_extension: if single_extension_element: # Add all extensions into one eventOutcomeDetailExtension element _detail_extension = _subelement(detail, 'eventOutcomeDetailExtension') for extension in detail_extension: _detail_extension.append(extension) else: # Separate eventOutcomeDetailExtension element for each extension for extension in detail_extension: _detail_extension = _subelement(detail, 'eventOutcomeDetailExtension') _detail_extension.append(extension) return outcome_information
def format_designation(format_name, format_version=None): """ :param format_name: :param format_version: :return: Element object for format designation. """ format_designation_el = _element('formatDesignation') format_name_el = _subelement(format_designation_el, 'formatName') format_name_el.text = decode_utf8(format_name) if format_version: format_version_el = _subelement(format_designation_el, 'formatVersion') format_version_el.text = decode_utf8(format_version) return format_designation_el
def dmdsec(element_id, child_elements=None, created_date=None): """Return the dmdSec element""" if created_date is None: created_date = current_iso_datetime() dmdsec_elem = _element('dmdSec') dmdsec_elem.set('ID', decode_utf8(element_id)) dmdsec_elem.set('CREATED', decode_utf8(created_date)) if child_elements: for elem in child_elements: dmdsec_elem.append(elem) return dmdsec_elem
def parse_wrap_mdtype(wrap): """Return the MDTYPE, OTHERMDTYPE and MDTYPEVERSION attributes from an element. """ mdtype = wrap.attrib.get('MDTYPE', None) other = wrap.attrib.get('OTHERMDTYPE', None) version = wrap.attrib.get('MDTYPEVERSION', None) if mdtype is not None: mdtype = decode_utf8(mdtype) if other is not None: other = decode_utf8(other) if version is not None: version = decode_utf8(version) return {'mdtype': mdtype, 'othermdtype': other, 'mdtypeversion': version}
def premis_ns(tag, prefix=""): """Prefix ElementTree tags with PREMIS namespace. object -> {info:lc...premis}object :tag: Tag name as string :returns: Prefixed tag """ tag = decode_utf8(tag) if prefix: prefix = decode_utf8(prefix) tag = tag[0].upper() + tag[1:] return '{%s}%s%s' % (PREMIS_NS, prefix, tag) return '{%s}%s' % (PREMIS_NS, tag)
def techmd(element_id, created_date=None, child_elements=None): """Return the techMD element""" if created_date is None: created_date = current_iso_datetime() _techmd = _element('techMD') _techmd.set('ID', decode_utf8(element_id)) _techmd.set('CREATED', decode_utf8(created_date)) if child_elements: for elem in child_elements: _techmd.append(elem) return _techmd
def mets_ns(tag, prefix=""): """Prefix ElementTree tags with METS namespace. object -> {http://...}object :tag: Tag name as string :returns: Prefixed tag """ tag = decode_utf8(tag) if prefix: prefix = decode_utf8(prefix) tag = tag[0].upper() + tag[1:] return '{%s}%s%s' % (METS_NS, prefix, tag) return '{%s}%s' % (METS_NS, tag)
def fptr(fileid=None): """Return the fptr element""" _fptr = _element('fptr') _fptr.set('FILEID', decode_utf8(fileid)) return _fptr
def parse_note(agent): """ :param agent: Agent Element object. :return: Unicode string """ return decode_utf8( agent.xpath(".//premis:agentNote/text()", namespaces=NAMESPACES)[0])
def parse_identifier_type_value(id_elem, prefix='object'): """Return identifierType and IdentifierValue from given PREMIS id. If segment contains multiple identifiers, returns first occurrence. :id_elem: Premis identifier :returns: (identifier_type, identifier_value) """ prefix = decode_utf8(prefix) if prefix == 'relatedObject': if id_elem.tag != premis_ns('relatedObjectIdentification'): id_elem = id_elem.find(premis_ns('relatedObjectIdentification')) if id_elem is not None: return ( id_elem.find( './' + premis_ns('relatedObjectIdentifierType')).text, id_elem.find( './' + premis_ns('relatedObjectIdentifierValue')).text) return None if id_elem.tag != premis_ns('Identifier', prefix): id_elem = id_elem.find(premis_ns('Identifier', prefix)) if id_elem is not None: return ( id_elem.find('./' + premis_ns('IdentifierType', prefix)).text, id_elem.find('./' + premis_ns('IdentifierValue', prefix)).text) return None
def date_created(date): """ :param date: :return: Element object for date created. """ date_el = _element('dateCreatedByApplication') date_el.text = decode_utf8(date) return date_el
def parse_objid(mets_el): """Return mets:OBJID from given `mets` document :mets: ElementTree document :returns: objid """ return decode_utf8(mets_el.get("OBJID"))
def relationship(relationship_type, relationship_subtype, related_object): """Create PREMIS relationship DOM segment. :relationship_type: Relationship type from PREMIS vocabulary :relationship_subtype: Relationship subtype from PREMIS vocabulary :related_object: Related object linked to relationship :returns: ElementTree DOM tree Produces the following PREMIS segment:: <premis:relationship> <premis:relationshipType>structural</premis:relationshipType> <premis:relationshipSubType> is included in </premis:relationshipSubType> {{ premis_identifier(prefix=related) }} </premis:relationship> """ if related_object is None: return None _relationship = _element('relationship') _type = _subelement(_relationship, 'relationshipType') _type.text = decode_utf8(relationship_type) _subtype = _subelement(_relationship, 'relationshipSubType') _subtype.text = decode_utf8(relationship_subtype) (related_type, related_value) = parse_identifier_type_value(related_object) related_identifier = identifier(related_type, related_value, prefix='relatedObject') _relationship.append(related_identifier) return _relationship
def addml_basic_elem(tag, contents): """Creates ADDML basic elems that are elements which contain text as values. Only create elements if the supplied tag value is inlcuded in the tags list. """ tags = ['charset', 'dataType'] if tag in tags: addml_el = _element(tag) addml_el.text = h.decode_utf8(contents) return addml_el return None
def filegrp(use=None, child_elements=None): """Return the fileGrp element""" _filegrp = _element('fileGrp') if use: _filegrp.set('USE', decode_utf8(use)) if child_elements: for elem in child_elements: _filegrp.append(elem) return _filegrp
def parse_identifier(section, prefix='object'): """ :param section: :param prefix: :return: Element object. """ prefix = decode_utf8(prefix) if prefix == 'relatedObject': return section.find('.//' + premis_ns('Identification', prefix)) return section.find('.//' + premis_ns('Identifier', prefix))
def mdwrap(mdtype, mdtypeversion, othermdtype="", child_elements=None): """Create an mdWrap element with the mandatory attributes and append the child elements to the element. :mdtype: value for the MDTYPE attribute :mdtypeversion: value for the MDTYPEVERSION attribute :othermdtype: value for the optional OTHERMDTYPE attribute (use if MDTYPE='OTHER') :child_elements: the child elements as a list :returns: the mets:mdWrap element as XML """ mdwrap_e = _element('mdWrap') mdwrap_e.set('MDTYPE', decode_utf8(mdtype)) mdwrap_e.set('MDTYPEVERSION', decode_utf8(mdtypeversion)) if mdtype == 'OTHER': mdwrap_e.set('OTHERMDTYPE', decode_utf8(othermdtype)) if child_elements: for elem in child_elements: mdwrap_e.append(elem) return mdwrap_e
def fixity(message_digest, digest_algorithm='MD5'): """ :param message_digest: :param digest_algorithm: :return: Element object for fixity. """ fixity_el = _element('fixity') fixity_algorithm = _subelement(fixity_el, 'messageDigestAlgorithm') fixity_algorithm.text = decode_utf8(digest_algorithm) fixity_checksum = _subelement(fixity_el, 'messageDigest') fixity_checksum.text = message_digest return fixity_el
def agent(agent_id, agent_name, agent_type, note=None): """Returns PREMIS agent element :agent_id: PREMIS identifier for the agent :agent_name: Agent name :agent_type: Agent type Returns the following ElementTree structure:: <premis:agent> <premis:agentIdentifier> <premis:agentIdentifierType> preservation-agent-id</premis:agentIdentifierType> <premis:agentIdentifierValue> preservation-agent-check_virus_clamscan.py-0.63-1422 </premis:agentIdentifierValue> </premis:agentIdentifier> <premis:agentName>check_virus_clamscan.py</premis:agentName> <premis:agentType>software</premis:agentType> </premis:agent> """ _agent = _element('agent') _agent.append(agent_id) _agent_name = _subelement(_agent, 'agentName') _agent_name.text = decode_utf8(agent_name) _agent_type = _subelement(_agent, 'agentType') _agent_type.text = decode_utf8(agent_type) if note is not None: _agent_type = _subelement(_agent, 'agentNote') _agent_type.text = decode_utf8(note) return _agent
def object_characteristics(composition_level='0', child_elements=None): """ :param composition_level: :param child_elements: :return: Element object for object characteristics. """ object_char = _element('objectCharacteristics') composition = _subelement(object_char, 'compositionLevel') composition.text = decode_utf8(composition_level) if child_elements: for elem in child_elements: object_char.append(elem) return object_char
def agents_with_type(agents, agent_type='organization'): """Return all agents from list of `agents` with given `agent_type`. :task_report: Report to search from :returns: Generator object which iterates all (agent_type, agent_name) """ agent_type = decode_utf8(agent_type) for _agent in agents: agent_name = _agent.findtext(premis_ns('agentName')) _agent_type = _agent.findtext(premis_ns('agentType')) if _agent_type == agent_type: yield (agent_type, agent_name)