Exemplo n.º 1
0
    def processDescription(self):
        """
		technotes:
			pdf - remove "pdf" but keep rest of description
			jpg - remove entire description
		manuscripts:
			pdf - remove entire description
			jpg - remove entire description
		"""
        rec = self.lib_dc_rec
        desc_field = "dc:description"
        issue_field = "library_dc:issue"
        altTitle_field = "library_dc:altTitle"
        for field in [desc_field, issue_field, altTitle_field]:
            utils.validateField(field)
        vals = rec.getFieldValues(desc_field)
        new_descriptions = []
        for desc in vals:
            if desc is None:
                continue
            desc = desc.strip()
            if desc.lower().endswith('pdf'):
                desc = desc[:-3]
                if self.collection == 'manuscripts':
                    continue
                if self.collection == 'monographs' and 'ASR' in desc:
                    self.lib_dc_rec.addFieldValue(altTitle_field, desc.strip())
                    continue
            elif desc and desc.lower().endswith('.jpg'):
                continue
            if desc:
                new_descriptions.append(desc)
        rec.setFieldValues(desc_field, new_descriptions)
Exemplo n.º 2
0
    def toTitleCase(self, field):
        """
		put the field into title-case
		"""
        utils.validateField(field)
        values = self.lib_dc_rec.getFieldValues(field)
        newValues = []
        for val in values:
            newValues.append(titlecase(val.strip()))
        if newValues:
            self.lib_dc_rec.setFieldValues(field, newValues)
Exemplo n.º 3
0
def showVocabs(rp):
    print "\n%s" % rp.recId
    rec = rp.lib_dc_rec
    field_list = [
        "library_dc:instName", 'library_dc:instDivision',
        'library_dc:libraryType'
    ]

    for field in field_list:
        utils.validateField(field)
        showFieldValues(field, rec.getFieldValues(field))
Exemplo n.º 4
0
def periods(rp, field="dc:title"):
    rec = rp.lib_dc_rec
    utils.validateField(field)
    field_vals = rec.getFieldValues(field)
    period_vals = []
    for val in field_vals:
        if val[-1] == '.':
            period_vals.append(val)
    if period_vals:
        print "\n%s (%s)" % (rp.recId, field)
        for val in period_vals:
            print "\t'%s'" % val
Exemplo n.º 5
0
    def dedup(self, field):
        """
		eliminate duplicate values for each field.
		THIS DOES NOT NEED TO BE USED
		"""
        utils.validateField(field)
        nodups = []
        rec = self.lib_dc_rec
        vals = rec.getFieldValues(field)
        for val in vals:
            if not val in nodups:
                nodups.append(val)
        rec.setFieldValues(field, nodups)
Exemplo n.º 6
0
def multiFields(rp):
    rec = rp.lib_dc_rec

    multis = {}
    for field in field_list:
        utils.validateField(field)
        vals = rec.getFieldValues(field)
        if len(vals) > 1:
            multis[field] = vals
    if multis:
        print "\n", rp.recId
        for field in multis.keys():
            showFieldValues(field, multis[field])
Exemplo n.º 7
0
    def normalizeField(self, field, fn):
        """
		function takes a single value and returns 
		the normalized value for this field
		"""
        rec = self.lib_dc_rec
        utils.validateField(field)
        vals = rec.getFieldValues(field)
        normalized = []
        for val in vals:
            normalized_val = fn(val)
            if normalized_val:
                normalized.append(normalized_val)
        rec.setFieldValues(field, normalized)
Exemplo n.º 8
0
    def removeDupValues(self, field1, field2):
        """
		remove values in field1 that are found in field2
		"""
        utils.validateField(field1)
        utils.validateField(field2)
        rec = self.lib_dc_rec
        vals1 = rec.getFieldValues(field1)
        vals2 = map(string.upper, rec.getFieldValues(field2))
        cleaned = []
        for val in vals1:
            if not val.upper() in vals2:
                cleaned.append(val)
        rec.setFieldValues(field1, cleaned)
Exemplo n.º 9
0
def fieldsHaveMatchingValues(rp, field1, field2, verbose=True):
    utils.validateField(field1)
    utils.validateField(field2)
    rec = rp.lib_dc_rec
    vals1 = map(string.upper, rec.getFieldValues(field1))
    vals2 = map(string.upper, rec.getFieldValues(field2))
    for val in vals1:
        if val in vals2:
            if verbose:
                print "\n", rp.recId
                showFieldValues(field1, vals1)
                showFieldValues(field2, vals2)
            return 1
    return 0
Exemplo n.º 10
0
    def normalizeDateDigitized(self):
        """
		Ensure this field has a SINGLE VALUE
		FOR NOW (as of 10/27/08) take LATEST date
		"""

        field = "library_dc:date_digitized"
        rec = self.lib_dc_rec
        utils.validateField(field)
        vals = rec.getFieldValues(field)
        touse = -1
        if vals:
            for val in vals:
                year = int(val)
                touse = max(touse, year)
            rec.removeField(field)
            rec.setFieldValue(field, str(touse))
Exemplo n.º 11
0
    def massageTitleAndAltTitle(self):
        """
		often, we need to swap title and alt title values. but the
		rules for this are different for each framework
		we don't process 'technotes' or 'theses'
		"""
        rec = self.lib_dc_rec
        title_field = 'dc:title'
        altTitle_field = 'library_dc:altTitle'
        utils.validateField(title_field)
        utils.validateField(altTitle_field)

        title_vals = rec.getFieldValues(title_field)
        altTitle_vals = rec.getFieldValues(altTitle_field)
        if title_vals and altTitle_vals:

            # manuscripts case
            if self.collection == "manuscripts" and len(
                    title_vals) == 1 and len(altTitle_vals) == 1:
                title = title_vals[0]
                alt = altTitle_vals[0]
                if title.startswith(
                        alt[:-1]):  # some altTitles end in period, some don't
                    rec.setFieldValue(title_field, alt)
                    rec.setFieldValue(altTitle_field, title)

            if self.collection == "monographs":
                ## is there a title element containing 'ASR'?
                ASRTitle = None
                for title in title_vals:
                    if "ASR" in title:
                        ASRTitle = title
                        continue
                ## is there an altTitle containing 'Annual Scientific Report'?
                ASRAltTitle = None
                pat = 'Annual Scientific Report'
                for altTitle in altTitle_vals:
                    if pat in altTitle:
                        ASRAltTitle = altTitle
                        continue
                if ASRTitle and ASRAltTitle:
                    rec.removeField(altTitle_field)
                    rec.setFieldValue(title_field, ASRAltTitle)
                    rec.setFieldValue(altTitle_field, ASRTitle)
Exemplo n.º 12
0
def showTitleStuff(rp):
    print "\n%s" % rp.recId
    rec = rp.lib_dc_rec
    for field in field_list:
        utils.validateField(field)
        showFieldValues(field, rec.getFieldValues(field))