def createDMDIDSFromCSVParsedMetadataPart2(keys, values): global globalDmdSecCounter global dmdSecs dc = None other = None ret = [] for i in range(1, len(keys)): key = keys[i] value = values[i] if key.startswith("dc.") or key.startswith("dcterms."): #print "dc item: ", key, value if dc == None: globalDmdSecCounter += 1 dmdSec = etree.Element("dmdSec") dmdSecs.append(dmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() ret.append(ID) dmdSec.set("ID", ID) mdWrap = newChild(dmdSec, "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = newChild(mdWrap, "xmlData") dc = etree.Element("dublincore", nsmap={None: dctermsNS}) dc.set( xsiBNS + "schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd" ) xmlData.append(dc) if key.startswith("dc."): key2 = key.replace("dc.", "", 1) elif key.startswith("dcterms."): key2 = key.replace("dcterms.", "", 1) value = value.decode('utf-8') etree.SubElement(dc, key2).text = value else: #not a dublin core item #print "non dc: ", key, value if other == None: globalDmdSecCounter += 1 dmdSec = etree.Element("dmdSec") dmdSecs.append(dmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() ret.append(ID) dmdSec.set("ID", ID) mdWrap = newChild(dmdSec, "mdWrap") mdWrap.set("MDTYPE", "OTHER") mdWrap.set("OTHERMDTYPE", "CUSTOM") other = newChild(mdWrap, "xmlData") etree.SubElement(other, normalizeNonDcElementName(key)).text = value return " ".join(ret)
def createDMDIDSFromCSVParsedMetadataPart2(keys, values): global globalDmdSecCounter global dmdSecs dc = None other = None ret = [] for i in range(1, len(keys)): key = keys[i] value = values[i] if key.startswith("dc.") or key.startswith("dcterms."): #print "dc item: ", key, value if dc == None: globalDmdSecCounter += 1 dmdSec = etree.Element("dmdSec") dmdSecs.append(dmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() ret.append(ID) dmdSec.set("ID", ID) mdWrap = newChild(dmdSec, "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = newChild(mdWrap, "xmlData") dc = etree.Element( "dublincore", nsmap = {None: dctermsNS} ) dc.set(xsiBNS+"schemaLocation", dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") xmlData.append(dc) if key.startswith("dc."): key2 = key.replace("dc.", "", 1) elif key.startswith("dcterms."): key2 = key.replace("dcterms.", "", 1) value = value.decode('utf-8') etree.SubElement(dc, key2).text = value else: #not a dublin core item #print "non dc: ", key, value if other == None: globalDmdSecCounter += 1 dmdSec = etree.Element("dmdSec") dmdSecs.append(dmdSec) ID = "dmdSec_" + globalDmdSecCounter.__str__() ret.append(ID) dmdSec.set("ID", ID) mdWrap = newChild(dmdSec, "mdWrap") mdWrap.set("MDTYPE", "OTHER") mdWrap.set("OTHERMDTYPE", "CUSTOM") other = newChild(mdWrap, "xmlData") etree.SubElement(other, normalizeNonDcElementName(key)).text = value return " ".join(ret)
def createDmdSecsFromCSVParsedMetadata(metadata): """ Create dmdSec(s) from the provided metadata. :param metadata: OrderedDict with the metadata keys and a list of values :return: List of dmdSec Elements created """ global globalDmdSecCounter global dmdSecs dc = None pbcore = None other = None ret = [] # Archivematica does not support refined Dublin Core, e.g. # multitiered terms in the format dc.description.abstract # If these terms are encountered, an element with only the # last portion of the name will be added. # e.g., dc.description.abstract is mapped to <dc:abstract> refinement_regex = re.compile('\w+\.(.+)') for key, value in metadata.iteritems(): if key.startswith("dc.") or key.startswith("dcterms."): if dc is None: globalDmdSecCounter += 1 ID = "dmdSec_" + globalDmdSecCounter.__str__() dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID) dmdSecs.append(dmdSec) ret.append(dmdSec) mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") dc = etree.Element(ns.dctermsBNS + "dublincore", nsmap={"dcterms": ns.dctermsNS, 'dc': ns.dcNS}) dc.set(ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") xmlData.append(dc) elem_namespace = "" if key.startswith("dc."): key = key.replace("dc.", "", 1) elem_namespace = ns.dcBNS elif key.startswith("dcterms."): key = key.replace("dcterms.", "", 1) elem_namespace = ns.dctermsBNS match = re.match(refinement_regex, key) if match: key, = match.groups() for v in value: try: etree.SubElement(dc, elem_namespace + key).text = v.decode('utf-8') except UnicodeDecodeError: print >> sys.stderr, "Skipping DC value; not valid UTF-8: {}".format(v) elif key.startswith("pbcore."): if pbcore is None: globalDmdSecCounter += 1 ID = "dmdSec_" + globalDmdSecCounter.__str__() dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID) dmdSecs.append(dmdSec) ret.append(dmdSec) mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "PBCore") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") pbcore = etree.Element(ns.pbcoreBNS + "pbcoreInstantiationDocument", nsmap={'pbcore': ns.pbcoreNS}) pbcore.set(ns.xsiBNS + "schemaLocation", "http://www.pbcore.org/PBCore/PBCoreNamespace.html http://pbcore.org/xsd/pbcore-2.0.xsd") xmlData.append(pbcore) elem_namespace = "" if key.startswith("pbcore."): key = key.replace("pbcore.", "", 1) elem_namespace = ns.pbcoreBNS match = re.match(refinement_regex, key) if match: key, = match.groups() for v in value: try: etree.SubElement(pbcore, elem_namespace + key).text = v.decode('utf-8') except UnicodeDecodeError: print >> sys.stderr, "Skipping pbcore value; not valid UTF-8: {}".format(v) else: # not a dublin core item if other is None: globalDmdSecCounter += 1 ID = "dmdSec_" + globalDmdSecCounter.__str__() dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID) dmdSecs.append(dmdSec) ret.append(dmdSec) mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "OTHER") mdWrap.set("OTHERMDTYPE", "CUSTOM") other = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") for v in value: try: etree.SubElement(other, normalizeNonDcElementName(key)).text = v.decode('utf-8') except UnicodeDecodeError: print >> sys.stderr, "Skipping DC value; not valid UTF-8: {}".format(v) return ret
def createDmdSecsFromCSVParsedMetadata(metadata): """ Create dmdSec(s) from the provided metadata. :param metadata: OrderedDict with the metadata keys and a list of values :return: List of dmdSec Elements created """ global globalDmdSecCounter global dmdSecs dc = None other = None ret = [] # Archivematica does not support refined Dublin Core, e.g. # multitiered terms in the format dc.description.abstract # If these terms are encountered, an element with only the # last portion of the name will be added. # e.g., dc.description.abstract is mapped to <dc:abstract> refinement_regex = re.compile('\w+\.(.+)') for key, value in metadata.items(): if key.startswith("dc.") or key.startswith("dcterms."): if dc is None: globalDmdSecCounter += 1 ID = "dmdSec_" + globalDmdSecCounter.__str__() dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID) dmdSecs.append(dmdSec) ret.append(dmdSec) mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "DC") xmlData = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") dc = etree.Element(ns.dctermsBNS + "dublincore", nsmap={"dcterms": ns.dctermsNS, 'dc': ns.dcNS}) dc.set(ns.xsiBNS + "schemaLocation", ns.dctermsNS + " http://dublincore.org/schemas/xmls/qdc/2008/02/11/dcterms.xsd") xmlData.append(dc) elem_namespace = "" if key.startswith("dc."): key = key.replace("dc.", "", 1) elem_namespace = ns.dcBNS elif key.startswith("dcterms."): key = key.replace("dcterms.", "", 1) elem_namespace = ns.dctermsBNS match = re.match(refinement_regex, key) if match: key, = match.groups() for v in value: try: etree.SubElement(dc, elem_namespace + key).text = v.decode('utf-8') except UnicodeDecodeError: print("Skipping DC value; not valid UTF-8: {}".format(v), file=sys.stderr) else: # not a dublin core item if other is None: globalDmdSecCounter += 1 ID = "dmdSec_" + globalDmdSecCounter.__str__() dmdSec = etree.Element(ns.metsBNS + "dmdSec", ID=ID) dmdSecs.append(dmdSec) ret.append(dmdSec) mdWrap = etree.SubElement(dmdSec, ns.metsBNS + "mdWrap") mdWrap.set("MDTYPE", "OTHER") mdWrap.set("OTHERMDTYPE", "CUSTOM") other = etree.SubElement(mdWrap, ns.metsBNS + "xmlData") for v in value: try: etree.SubElement(other, normalizeNonDcElementName(key)).text = v.decode('utf-8') except UnicodeDecodeError: print("Skipping DC value; not valid UTF-8: {}".format(v), file=sys.stderr) return ret