Пример #1
0
	def fixRelations (self):
		"""
		get relation elements and then fix each in turn
		-- if href:
			url = value
			type = Has part
		-- else
			label = value
			type = Is related
		"""
		nodes  = self.getFieldElements ("relation")
		if not nodes: return
		
		print "\n%s" % self.getId()
		for r in nodes:
			value = XmlUtils.getText(r)
			if not value: return
			XmlUtils.setText (r,"")
			if value.startswith ("http://"):
				r.setAttribute ("type", "Has part")
				r.setAttribute ("url", value)
			else:
				r.setAttribute ("type", "Is related")
				r.setAttribute ("title", value)
			print r.toxml()
		if 0:
			self.write()
			print "wrote record"
Пример #2
0
def action(osmRecord):
    """
	- remember the status value
	- remove the status node
	- ?? create an empty date element if one does not exist for this status value ??
	"""

    if verbose:
        print '\n-- task 11 action ---'

    modified = False

    copyrightNotice = osmRecord.selectSingleNode(
        osmRecord.dom, "record/rights/copyrightNotice")
    if not copyrightNotice:
        # raise Exception, "I have to have a copyright!"
        rights = osmRecord.selectSingleNode(osmRecord.dom, 'record/rights')
        if not rights:
            rights = XmlUtils.addElement(osmRecord.dom, osmRecord.doc,
                                         'rights')
        copyrightNotice = XmlUtils.addElement(osmRecord.dom, rights,
                                              'copyrightNotice')
        copyrightNotice.setAttribute('holder', 'UCAR')
        copyrightNotice.setAttribute('url', termsOfUseUrl)

    if verbose:
        print copyrightNotice.toxml()
    XmlUtils.setText(copyrightNotice, copyrightBlurb)
    modified = True

    return modified
Пример #3
0
def replaceVocabTerm(badTerm, goodTerm, vocab, osmRec):
    """
	field used to obtain vocab info from vocab_data
	"""
    vocabField = getVocabField(vocab)

    # say the field is 'instName'
    # print 'type of indexField: %s' % type(vocabField)
    # print vocabField
    if type(vocabField) == type('') or type(vocabField) == type(u''):
        vocabField = [vocabField]

    for indexField in vocabField:
        xpath = getFieldXpath(indexField)
        # print 'xpath: %s' % xpath
        vocabNodes = osmRec.selectNodes(osmRec.dom, xpath)
        # print '%d vocabNodes found' % len(vocabNodes)
        for node in vocabNodes:
            value = XmlUtils.getText(node)
            if value == badTerm:
                # print 'old:', value
                XmlUtils.setText(node, goodTerm)
                # print 'new:', XmlUtils.getText(node).encode('utf-8')
                print ' .. replaced'
    return osmRec
Пример #4
0
	def finalizeXml (self):
		self.doc.setAttribute ("xmlns:"+self.schema_instance_namespace, \
								self.SCHEMA_INSTANCE_URI)
		self.setNoNamespaceSchemaLocation ( \
			"http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd")
		
		accessionNum = self.getAccessionNum ()
		
		url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum
		urlElement = self.dom.createElement ("url")
		XmlUtils.setText(urlElement, url)
		try:
			id = makeId (accessionNum, self.prefix)
		except:
			id = "ERROR"
			msg = "Error processing " + self.url
			print msg
			print sys.exc_info()[0], sys.exc_info()[1]
			
		idElement = self.dom.createElement ("recordID")
		XmlUtils.setText(idElement, id)
		
		children = XmlUtils.getChildElements (self.doc)
		self.doc.insertBefore (urlElement, children[0])
		self.doc.insertBefore (idElement, urlElement)
Пример #5
0
	def rewriteUrls(self, selectFn, urlTestFn, rewriteFn):
		"""
		NOT TESTED!
		for each node selected by selectFn (e.g., getBSCSUrlNodes)
		- if url at that node passes 
		rewrite each Url that matches testFn with a falue that
		is computed by writeFn:
		  - base_protected_url + self.collection + protectedAssetFileName
		  
		returns True if a change was made, False otherwise
		  
		 """
		recordChanged = False
		for urlNode in selectFn(self):
			url = XmlUtils.getText(urlNode)
			
			assetPath = getAssetPath (url)
	
			newProtectedCollPath = os.path.join (getReorgProtectedDir(), self.collection)
			newAssetPath = os.path.join (newProtectedCollPath, os.path.basename(assetPath))
			newProtectedUrl = os.path.join (base_protected_url, self.collection, os.path.basename(assetPath))

			if self.verbose:
				print '\n- assetPath:', assetPath
				print '- newAssetPath:', newAssetPath
				print '- oldUrl:', url
				print '- newProtectedUrl:', newProtectedUrl
				
			if urlTestFn(self, url):
				new_url = writeFn (self, url)
				XmlUtils.setText(urlNode, new_url)
				recordChanged = True
				
		return recordChanged	
Пример #6
0
    def finalizeXml(self):
        """
		doctor the metadata with information contained in the folderNode
		"""
        if not self.node:
            return

        # get TN issue from title, or if not from parent's title
        self.tn_issue = self.getTN() or self.tn_issue
        if self.tn_issue:
            print "ADDING %s" % self.tn_issue
            tnElement = XmlUtils.addElement(self.dom, self.doc, "tn_isssue")
            XmlUtils.setText(tnElement, self.tn_issue)
            self.title = webcatUtils.stripIssue(self.title, self.tn_issue)
            self.setFieldValue("title", self.title)

        childrenElement = XmlUtils.addElement(self.dom, self.doc, "children")
        for child in self.node.children:
            # XmlUtils.addChild  (self.dom, "child", child.title, childrenElement)

            md = child.getMetadata(None)
            id = md.getAccessionNum()
            print id
            childElement = XmlUtils.addChild(self.dom, "child", child.title,
                                             childrenElement)
            childElement.setAttribute("accessionNum", id)
        children = XmlUtils.getChildElements(self.doc)
        self.doc.appendChild(childrenElement)
Пример #7
0
    def processRecord(self, rec):
        """
			add namespace info
			add RecordID, Url elements
		"""

        rec.doc.setAttribute ("xmlns:"+rec.schema_instance_namespace, \
              rec.SCHEMA_INSTANCE_URI)
        rec.setNoNamespaceSchemaLocation ( \
         "http://www.dls.ucar.edu/people/ostwald/Metadata/webcat/webcat-record.xsd")

        accessionNum = self.getAccessionNum(rec)
        # print "%d (%s)" % (idNum, type(idNum))
        # print accessionNum, id

        url = "http://www.library.ucar.edu/uhtbin/hyperion-image/" + accessionNum
        urlElement = rec.dom.createElement("Url")
        XmlUtils.setText(urlElement, url)

        id = makeId(accessionNum)
        idElement = rec.dom.createElement("RecordID")
        XmlUtils.setText(idElement, id)

        children = XmlUtils.getChildElements(rec.doc)
        rec.doc.insertBefore(urlElement, children[0])
        rec.doc.insertBefore(idElement, urlElement)

        # print rec
        rec.write()
        print accessionNum
Пример #8
0
    def doFormulas(self, parent):
        """
		formulas are expressed as attribute values. 
		e.g., <TxtHeight F="Height*0.861111">
		
		variables used are 'Width' and 'Height', so these must 
		be available to eval
		
		all elements having formulaty ("F") attrs are assigned a text value that
		is the result of evaluating the formula
		"""
        Width = getattr(self, 'Width')
        Height = self.Height
        # print 'Width is a %s' % type(Width)
        for child in XmlUtils.getChildElements(parent):
            f = child.getAttribute("F")
            if f:
                # print '- %s - "%s"' % (child.tagName, f)
                if f.startswith('NURBS'):
                    val = f
                else:
                    val = eval(f)
                # print " -> ", val
                XmlUtils.setText(child, str(val))
            if XmlUtils.getChildElements(child):
                self.doFormulas(child)
Пример #9
0
 def asCatalogElement(self):
     element = XmlUtils.createElement('asnDocument')
     element.setAttribute("id", self.asnUri)
     for attr in ['title', 'topic', 'author', 'created', 'status']:
         child = XmlUtils.createElement(attr)
         val = getattr(self, attr) or ''
         ## print "val: %s (%s)" % (val, type(val))
         XmlUtils.setText(child, val)
         element.appendChild(child)
     return element
Пример #10
0
    def asElement(self):
        """
		render this PubNameSpec as an XML element so it can be put in an XML
		document containing multiple change PubNameSpecs
		"""
        element = XmlUtils.createElement("pubNameSpec")
        for attr in ['recId', 'collection', 'xmlFormat', 'pubType']:
            element.setAttribute(attr, getattr(self, attr))
            XmlUtils.setText(element, self.term)
        return element
Пример #11
0
    def updateOsmRecord(self, osmRecord, before, after):

        for xpath in [self.person_field, self.org_field]:
            nodes = osmRecord.selectNodes(osmRecord.dom, xpath)
            for node in nodes:
                value = XmlUtils.getText(node)
                if self.dataTable.beforeMap.has_key(value):
                    XmlUtils.setText(node, self.dataTable.getAfter(value))
                    # print '- before: %s, after: %s\n' % (before, XmlUtils.getText(node))

        return osmRecord
Пример #12
0
 def asElement(self):
     element = XmlUtils.createElement('relation')
     element.setAttribute('relationship', self.relationship)
     if self.num:
         element.setAttribute('num', self.num)
     element.setAttribute('objectTitle', unicode(self.objectTitle))
     element.setAttribute('object', self.object)
     idEl = element.appendChild(XmlUtils.createElement('id'))
     # idEl = XmlUtils.addElement(doc, parent, tagName)
     # idEl.setAttribute ('id', self.id)
     XmlUtils.setText(idEl, self.id)
     idEl.setAttribute('type', self.idType)
     return element
Пример #13
0
	def addInstDivVocab (self, instDivVocab):
		"""
		add all the segments of the provided instDivVocab (that
		do not already exist) to this Affliation.
		NOTE: we don't add the first split by itself, the first
		split we add is [0:1]
		"""
		splits = instDivVocab.split(":")
		for i in range (1, len(splits)):
			vocab = ':'.join(splits[:i+1])
			if not self.getInstDiv (vocab):
				instDiv = XmlUtils.createElement("instDivision")
				XmlUtils.setText (instDiv, vocab)
				self.element.appendChild (instDiv)
Пример #14
0
def action(itemRecord):
    """
	replace	terms of use urls
	"""

    if verbose:
        print "-- %s action --" % task_name

    rec_changed = False
    for licenseUrlEl in itemRecord.getLicenseUrlNodes():
        val = XmlUtils.getText(licenseUrlEl)
        if val.find(find_str) != find_str:
            XmlUtils.setText(licenseUrlEl, val.replace(find_str, replace_str))
            rec_changed = True
Пример #15
0
    def asElement(self):
        element = XmlUtils.createElement('person')
        element.setAttribute('role', 'Author')

        if self.authororder is not None:
            element.setAttribute('order', str(self.authororder))

        for attr in self.attrs:
            tag = attr
            value = getattr(self, attr)
            if value:
                child = element.appendChild(XmlUtils.createElement(tag))
                XmlUtils.setText(child, value)
        return element
Пример #16
0
    def toXml(self):
        """
		return an "enumeration" element populated with code and description
		"""
        # enum = XmlUtils.createElement (qp("enumeration"), XSD_NAMESPACE_URI)
        enum = createSchemaElement("enumeration")
        enum.setAttribute("value", self.value)

        anno = enum.appendChild(createSchemaElement("annotation"))

        doc = anno.appendChild(createSchemaElement("documentation"))
        ## XmlUtils.setText (doc, unicode(self.description, 'utf-8')) # description is already unicode
        XmlUtils.setText(doc, self.description)

        return enum
Пример #17
0
    def getUnknownCopyrightNoticeElement(self):
        """<copyrightNotice type="Unknown" holder="Unknown" url="http://www.ucar.edu/legal/terms_of_use.shtml">
				Copyright information is unknown. Please contact the creator, author or publisher for further information.
			</copyrightNotice>
		"""

        el = XmlUtils.createElement("copyrightNotice")
        XmlUtils.setText(
            el,
            "Copyright information is unknown. Please contact the creator, author or publisher for further information."
        )
        el.setAttribute('type', 'Unknown')
        el.setAttribute('holder', 'Unknown')
        el.setAttribute('url', 'http://www.ucar.edu/legal/terms_of_use.shtml')
        return el
Пример #18
0
def action(itemRecord):
    """
	
	For each entry, move the contents of /record/resources/relation/@title and
	@description to a new /record/resource/description field. The description
	field should have the title content, then a colon followed by description
	content. If there is only description content and no title content, skip the
	title content and colon.
	"""

    if verbose:
        print "-- %s action --" % task_name

    for contrib in itemRecord.getContributorElements('Publisher'):
        if XmlUtils.getText(contrib) == find_str:
            XmlUtils.setText(contrib, replace_str)
Пример #19
0
	def getAffiliation (self, instName):
		"""
		affilations are associated with ONE instName
		returns Affilation instance for provided instName, creating if necessary
		"""
		affiliationEl = None
		for node in self.getAffiliationElements():
			instNameEl = XmlUtils.getChild('instName', node)
			if XmlUtils.getText(instNameEl) == instName:
				affiliationEl = node
				break
		if not affiliationEl:
			affiliationEl = self.element.appendChild (XmlUtils.createElement('affiliation'))
			instNameEl = affiliationEl.appendChild (XmlUtils.createElement('instName'))
			XmlUtils.setText(instNameEl, instName)
		return Affiliation (affiliationEl)
Пример #20
0
def action(osmRecord):
    """
	1 - set the copyright blurb
	2 - set /record/rights/copyrightNotice/@url to http://www.ucar.edu/legal/terms_of_use.shtml
	"""
    if verbose:
        print '\n-- task 6 action ---'

    copyrightNotice = osmRecord.selectSingleNode(
        osmRecord.dom, "record/rights/copyrightNotice")
    if not copyrightNotice:
        return False  # but we would expect there to be one ....

    if verbose:
        print copyrightNotice.toxml()
    XmlUtils.setText(copyrightNotice, copyrightBlurb)
    copyrightNotice.setAttribute("url", termsOfUseUrl)

    return True
Пример #21
0
	def setDate (self, dateStr, dateType):
		coverageNode = self.selectSingleNode (self.dom, 'record/coverage')
		if not coverageNode:
			#raise Exception, "no coverage node found"
			coverageNode = XmlUtils.addElement(self.dom, self.doc, "coverage")
		targetDateElement = None
		dateNodes = self.getDateNodes()
		if dateNodes:
			for node in dateNodes:
				if node.hasAttribute (dateType):
					targetDateElement = node
		if targetDateElement is None:
			targetDateElement = XmlUtils.createElement ("date")
			targetDateElement.setAttribute ("type", dateType)
			coverageChildren = XmlUtils.getChildElements (coverageNode)
			if coverageChildren:
				firstChild = coverageChildren[0]
				coverageNode.insertBefore (targetDateElement, firstChild)
			else:
				coverageNode.appendChild (targetDateElement)
		XmlUtils.setText (targetDateElement, dateStr)
		return targetDateElement
    def rewriteProtectedUrls(self):
        """
		rewrite each protectected Url in this record with a new_url:
		  - base_protected_url + self.collection + protectedAssetFileName
		  
		returns True if a change was made, False otherwise
		  
		 """
        recordChanged = False
        for urlNode in self.getProtectedUrlNodes():
            url = XmlUtils.getText(urlNode)

            assetPath = getAssetPath(url)
            fileName = os.path.basename(url)

            collection = systemGeneratedKeysMap.has_key(self.collection) and \
                systemGeneratedKeysMap[self.collection]['key'] or \
                self.collection

            newAssetPath = os.path.join(getReorgProtectedDir(), collection,
                                        fileName)
            newProtectedUrl = os.path.join(base_protected_url, collection,
                                           fileName)

            if self.verbose:
                print '\n- assetPath:', assetPath
                print '- newAssetPath:', newAssetPath
                print '- oldUrl:', url
                print '- newProtectedUrl:', newProtectedUrl
                print '- self.collection: ' + self.collection
                sys.exit()
            if url != newProtectedUrl:
                XmlUtils.setText(urlNode, newProtectedUrl)
                recordChanged = True

        return recordChanged
Пример #23
0
def makeTerm(title, docCount):
    element = XmlUtils.createElement('term')
    element.setAttribute('docCount', str(docCount))
    element.setAttribute('termCount', '0')  # we don't care about termCount
    XmlUtils.setText(element, title)
    return Term(element)
Пример #24
0
                unique_values.append(value)

    return unique_values


if __name__ == '__main__':
    from bppb_rules import BPPBMappings
    from nsdl.formats import NcsItemRecord
    mappings = BPPBMappings()
    print 'table has %d entries' % len(mappings)

    unique_values = []

    colDir = '/Users/ostwald/Documents/Work/NSDL/TNS Transition-Fall-2011/repo/ncs_item/1239144881424/'
    for filename in os.listdir(colDir):
        path = os.path.join(colDir, filename)
        rec = NcsItemRecord(path=path)
        rec_changed = False
        for node in rec.getIsPartOfUrlNodes():
            value = XmlUtils.getText(node)
            mapping = mappings.getMapping(value)
            if mapping is None:
                print "NO mapping", value
            else:
                print "MAPPING", mapping
                XmlUtils.setText(node, mapping)
                rec_changed = True
        if rec_changed:
            # print 'WOULD HAVE WRITTEN', rec.getId()
            rec.write()
Пример #25
0
    def update(record):
        """
		- rewrite all the protectedUrls in this record
		- move the cataloged assets to the new protected directory
		"""
        recordChanged = False

        for urlNode in record.getProtectedUrlNodes():
            url = XmlUtils.getText(urlNode)

            filename = os.path.basename(url)

            assetPath = getAssetPath(url)

            if 0:
                print '\n- assetPath:', assetPath
                print '- oldUrl:', url

            # Now copy the asset to new protectedDir at newAssetPath

            # did the protected url in metadata resolve to an existing asset?
            if not os.path.exists(assetPath):
                ## Missing Asset
                # print 'asset does NOT exist at %s' % assetPath
                # raise AssetNotFoundException, assetPath
                missing_assets.append(url)
                continue
            """
			We only want to store one copy of each asset.
			- where dups are determined by filename
			
			existing_assets holds the assets that have been written
			to the reorgProctedDir. 
			"""
            if existing_assets.has_key(filename):
                newAssetPath = existing_assets[filename]
                # print 'asset already exists for %s:\n\t%s' % (filename, newAssetPath)
                already_existing.append(filename)
            else:
                # newAssetPath = os.path.join(getNewProtectedDir(), record.collection, filename)
                newAssetPath = os.path.join(getReorgProtectedDir(),
                                            record.collection, filename)

            # newProtectedUrl = os.path.join (base_protected_url, record.collection, os.path.basename(assetPath))
            newProtectedUrl = getProtectedUrlForPath(newAssetPath, True)

            if 0:
                print '- newProtectedUrl:', newProtectedUrl
                print '- assetPath:', assetPath
                print '- newAssetPath:', newAssetPath
                print '- DOWRITES:', dowrites

            # update the url if necessary
            if newProtectedUrl != url:
                XmlUtils.setText(urlNode, newProtectedUrl)
                recordChanged = True

            if dowrites and not os.path.exists(newAssetPath):

                try:
                    newProtectedCollPath = os.path.dirname(newAssetPath)
                    if not os.path.exists(newProtectedCollPath):
                        os.mkdir(newProtectedCollPath)
                except Exception, msg:
                    print 'ERROR: %s' % msg
                    print ' -- newProtectedCollPath: %s' % newProtectedCollPath
                    sys.exit()

                # copy the asset to newAssetPath
                try:
                    shutil.copyfile(assetPath, newAssetPath)
                    existing_assets[filename] = newAssetPath
                except OSError, msg:
                    print '- ERROR: could not copy asset: %s' % msg
                    print ' - assetPath: ', assetPath
                    print ' - newAssetPath:', newAssetPath

                print 'wrote to newAssetPath:', newAssetPath
Пример #26
0
def action (osmRecord):
	"""
	
	For each entry, move the contents of 
		/record/resources/relation/@title and @description to a new
	/record/general/description element
		
	The description content should have the title content, then a colon followed
	by description content. If there is only description content and no title
	content, skip the title content and colon.
	
	e.g., 
		<description>f:this is great</description>

	
	then DELETE the original relation
	
	"""
	
	
	if verbose > 1:
		print '%s HAS IMAGE: %s' % (getModuleName(), osmRecord.getId())
		
	# print osmRecord.__class__.__name__
	# if not osmRecord.getId():
		# print osmRecord
	modified = False
	
	relations = osmRecord.selectNodes (osmRecord.dom, 'record/resources/relation')
	if verbose:
		print '\n-- task 2 action fired ---'
		if verbose > 1:
			print "%d relations found" % len(relations)
	
	for relation in relations:
		if relation.getAttribute ("type") == 'Has image':
			title = ''
			if relation.hasAttribute ('title'):
				title = relation.getAttribute ("title")
				
			description = ''
			if relation.hasAttribute('description'):
				description = relation.getAttribute ("description")
				
			if verbose > 1:
				print 'title: "%s"' % title
				print 'description: "%s"' % description
				
			if title or description:
				msg = ''
				if title and description:
					msg = title + ':' + description
				else:
					msg = title + description
					
				description = osmRecord.addGeneralChild('description')
				XmlUtils.setText(description, msg)
			
			if verbose > 1:
				print 'WOULD HAVE DELETED RELATION'
				print "\n", relation.toxml()
				
			## delete the 'Has image' relation
			osmRecord.deleteElement(relation)
			modified = True
				
	return modified
Пример #27
0
	def setChildElementValue (self, tag, value):
		# print "TAG:%s, VALUE: %s" % (tag, value)
		child = XmlUtils.getChild (tag, self.element)
		if not child:
			child = self.element.appendChild(XmlUtils.createElement(tag))
		XmlUtils.setText (child, value)
Пример #28
0
 def setNameLast(self, name):
     node = XmlUtils.getChild('nameLast', self.element)
     if not node:
         node = XmlUtils.createElement('nameLast')
         self.element.appendChild(node)
     XmlUtils.setText(node, name)