Пример #1
0
class RecordMaker:
    """
	create a valid collection record by 
	- extracting certain fields from the provided (invalid) record
	- writing them into a valid, "template" record
	- changing ID and writing new recor to disk
	"""

    old_prefix = "DCS-COLLECTION"
    new_prefix = "NCAR-COLLECTION"

    def __init__(self, path):
        """
		path to invalide record
		"""
        print path
        self.path = path
        self.recname = os.path.basename(path)
        self.baseRec = DleseCollectRecord(
            path=os.path.join(recs, self.recname))
        self.fullTitle = self.baseRec.getFullTitle()
        self.shortTitle = self.baseRec.getShortTitle()
        self.description = self.baseRec.getDescription()
        self.key = self.baseRec.getKey()
        self.id = self.baseRec.getId()
        self.newId = self.getNewId(self.id)

        self.newRec = self._makeNewRec()

    def report(self, rec):
        """
		print out key fields of provided record
		"""
        print 'id: ', rec.getId()
        print 'key: ', rec.getKey()
        print 'fullTitle: ', rec.getFullTitle()
        print 'shortTitle: ', rec.getShortTitle()
        print 'description: ', rec.getDescription()

    def getNewId(self, id):
        return id.replace(self.old_prefix, self.new_prefix)

    def _makeNewRec(self):
        newRec = DleseCollectRecord(path=templatepath)
        newRec.setId(self.newId)
        newRec.setFullTitle(self.fullTitle)
        newRec.setShortTitle(self.shortTitle)
        newRec.setDescription("NCAR Library " + self.shortTitle)
        newRec.setKey(self.key)
        return newRec

    def write(self, path=None):
        if path is None:
            path = os.path.join(os.path.dirname(self.path),
                                self.newId + ".xml")
        DleseCollectRecord.write(self.newRec, path)
        print "wrote to " + path
Пример #2
0
    def __init__(self, instance):
        UserDict.__init__(self)
        self.instance = instance
        self.collectPath = os.path.join(instance.path, "dlese_collect",
                                        "collect")

        for filename in os.listdir(self.collectPath):
            if not filename.lower().endswith(".xml"): continue
            rec = DleseCollectRecord(
                path=os.path.join(self.collectPath, filename))
            self[rec.getKey()] = rec
Пример #3
0
def findDleseCollectionRecord(field, value):
    """
	returns first DleseCollectRecord having the specified value for specified field
	"""
    dlese_collect_dir = os.path.join(dlese_records, 'dlese_collect', 'collect')
    for filename in filter(lambda x: x.endswith('xml'),
                           os.listdir(dlese_collect_dir)):
        path = os.path.join(dlese_collect_dir, filename)
        rec = DleseCollectRecord(path=path)
        if (rec.get(field) == value):
            return rec
Пример #4
0
    def __init__(self, path):
        """
		path to invalide record
		"""
        print path
        self.path = path
        self.recname = os.path.basename(path)
        self.baseRec = DleseCollectRecord(
            path=os.path.join(recs, self.recname))
        self.fullTitle = self.baseRec.getFullTitle()
        self.shortTitle = self.baseRec.getShortTitle()
        self.description = self.baseRec.getDescription()
        self.key = self.baseRec.getKey()
        self.id = self.baseRec.getId()
        self.newId = self.getNewId(self.id)

        self.newRec = self._makeNewRec()
Пример #5
0
    def merge(self):
        """
		ignore records with collection key containing 'hsbio'
		
		collisions are reported when dowrites is False.
		when dowrites is True, collisions raise an Exception
		"""
        for filename in filter(self.acceptItem, os.listdir(self.src_dir)):
            src_path = os.path.join(self.src_dir, filename)
            src_rec = DleseCollectRecord(path=src_path)
            src_key = src_rec.getKey()

            if not self.acceptCollectionKey(src_key):
                if verbose:
                    print 'SKIPPING:', src_key
                continue

            dst_path = os.path.join(self.dst_dir, src_key + '.xml')

            if os.path.exists(dst_path):
                ## COLLISION
                if dowrites:
                    raise Exception, 'dst_path exists at %s' % dst_path
                else:  # debugging - print out collisions
                    print 'COLLISION: dst_path exists at %s' % dst_path
                    continue

            if dowrites:
                src_rec.setId(src_key)
                src_rec.write(dst_path)
            else:
                if 1 or verbose:
                    print 'would have copied %s to ...\n\t%s' % (
                        os.path.basename(src_path), dst_path)
    def __init__(self, dcr_path, id=None):
        self.initialized = False
        self.dcr = DleseCollectRecord(path=dcr_path)
        self.ncr = NCSCollectRecord(path=self.ncs_collect_template)

        self.process_field_mappings()
        self.injectContributors()
        xmlFormat = self.dcr.get('libraryFormat')
        if (xmlFormat == 'adn'):
            self.ncr.addViewContext('DLESECollections')
            self.ncr.set('metadataPrefix', 'nsdl_dc')
        elif (xmlFormat == 'dlese_anno'):
            self.ncr.addViewContext('DLESEAnnotations')
            self.ncr.set('metadataPrefix', 'comm_anno')

        if id:
            self.ncr.setId(id)

        #set the destination (ncr) path for writing
        self.ncr.path = os.path.join(self.tmp_output_path,
                                     self.ncr.getId() + '.xml')
        self.initialized = True
Пример #7
0
 def write(self, path=None):
     if path is None:
         path = os.path.join(os.path.dirname(self.path),
                             self.newId + ".xml")
     DleseCollectRecord.write(self.newRec, path)
     print "wrote to " + path
Пример #8
0
 def _makeNewRec(self):
     newRec = DleseCollectRecord(path=templatepath)
     newRec.setId(self.newId)
     newRec.setFullTitle(self.fullTitle)
     newRec.setShortTitle(self.shortTitle)
     newRec.setDescription("NCAR Library " + self.shortTitle)
     newRec.setKey(self.key)
     return newRec
Пример #9
0
	def updateCollectionRecord(self, new_key, new_name=None):
		"""
		- find the collection record with this collections prefix
		  -- we look through them one by one
		- update the prefix
		- if dowrites:
		  - write collection record
	  """
		collect = os.path.join (self.repo, "dlese_collect", "collect")
		for filename in filter (lambda x:x.endswith('xml'), os.listdir(collect)):
			# print filename
			path = os.path.join(collect, filename)
			rec = DleseCollectRecord(path=path)
			oldKey = rec.getKey()
			if oldKey == self.key:
				print 'old key: %s' % rec.getKey()
				rec.setKey(new_key)
				rec.setId(new_key)
				if new_name:
					rec.setShortTitle (new_name)
					rec.setFullTitle(new_name)
				
				if self.dowrites:
					rec.write()
					os.rename(path, os.path.join(collect, new_key+'.xml'))
					print 'wrote collection record: %s' % rec.getId()
				else:
					print rec
					print 'WOULD have written collection record: %s' % rec.getId()
				return
class DleseToNcsCollectTransform:

    ncs_collect_template = '/Users/ostwald/devel/python/python-lib/uconn/ncs_collect_template.xml'
    tmp_output_path = '/Users/ostwald/tmp/dlese_to_ncs_collect'
    """
	field_mappings documentation
	"""
    field_mappings = [
        # dlese_collect -> ncs_collect
        'id',  # we do id by hand
        'description',
        ['collectionLocation', 'url'],
        ['fullTitle', 'title'],
        ['id', 'collSetSpec'],
        ['created', 'dateTime'],  # involves a massage
        'libraryFormat',
        ['key', 'oaiSetSpec']
    ]

    def __init__(self, dcr_path, id=None):
        self.initialized = False
        self.dcr = DleseCollectRecord(path=dcr_path)
        self.ncr = NCSCollectRecord(path=self.ncs_collect_template)

        self.process_field_mappings()
        self.injectContributors()
        xmlFormat = self.dcr.get('libraryFormat')
        if (xmlFormat == 'adn'):
            self.ncr.addViewContext('DLESECollections')
            self.ncr.set('metadataPrefix', 'nsdl_dc')
        elif (xmlFormat == 'dlese_anno'):
            self.ncr.addViewContext('DLESEAnnotations')
            self.ncr.set('metadataPrefix', 'comm_anno')

        if id:
            self.ncr.setId(id)

        #set the destination (ncr) path for writing
        self.ncr.path = os.path.join(self.tmp_output_path,
                                     self.ncr.getId() + '.xml')
        self.initialized = True

    def injectField(self, dcr_field, ncr_field=None):
        if ncr_field is None:
            ncr_field = dcr_field

        # get value from dcr_field
        try:
            value = self.dcr.get(dcr_field)
            if not value:
                # msg = 'WARN: injectField - no value for "%s" in %s' % (dcr_field, self.dcr.getId())
                # print msg
                raise Exception, "no value in metadata"
        except Exception, msg:
            print "WARN ingest did NOT get value for %s at '%s': %s" % (
                self.dcr.getId(), dcr_field, msg)
            value = ""

        # kludges for certain fields
        if dcr_field == 'created':
            value += 'T00:00:00Z'

        if ncr_field == 'collSetSpec':
            value = 'ncs-' + value

        # inject value in ncr_field
        try:
            print 'setting "%s" at "%s"' % (value, ncr_field)
            self.ncr.set(ncr_field, value)
        except Exception, msg:
            print "ERROR setting value at '%s': %s" % (ncr_field, msg)