class BookNumber(AlphaNumericSymbol): definition = ('information that helps differentiate items with the same ' 'class stem from each other, such as volume numbers or ' 'agency-specific designators') short_description = ('a string containing any characters; letters sort ' 'before numbers') Component = AlphaNumericSymbol.derive( classname='BookNumber.Component', groups=[{ 'name': 'parts', 'min': 1, 'max': None, 'possible_types': [LettersFirst, Numeric, Dash, FormattingNoSlash] }]) template = CompoundTemplate(groups=[ { 'name': 'parts', 'min': 0, 'max': None, 'inner_sep_type': Slash, 'type': Component }, ])
class DateString(BaseDate): definition = 'any string that represents a date, in a non-specific format' template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'min': 1, 'max': 1, 'name': 'date', 'possible_types': [ DateMDY, DateDMY, DateYMD, DateYDM, DateMY, DateYM, Month.AlphaMonthLong, Month.AbbreviatedMonth ] }]) def _get_date_prop(self, prop): if hasattr(self.date, prop): return getattr(self.date, prop) elif self.date.category == prop: return self.date else: return None
class Day(CompoundDatePart): short_description = ('cardinal or ordinal number, from 1 to 31, ' 'representing the day of the month') category = 'day' NumericDay = NumericDatePart.derive( classname='NumericDay', short_description='numeric day of the month, 1 to 31', min_length=1, max_length=1, base_pattern=r'(?:[1-2][0-9]|3[0-1]|0?[1-9])', category='day') template = CompoundTemplate(groups=[{ 'min': 1, 'max': 1, 'name': 'wholenumber', 'type': NumericDay }, { 'min': 0, 'max': 1, 'name': 'suffix', 'type': OrdinalNumber.OrdinalSuffix }]) @property def value(self): return self.wholenumber.value
class LcClass(AlphaNumericSymbol): short_description = ('a string with 1 to 3 letters followed by a number ' 'with up to 4 digits and optionally up to 4 ' 'decimal places; the alphabetic and numeric parts ' 'may optionally be separated by whitespace') ClassLetters = Alphabetic.derive(classname='LcClass.ClassLetters', min_length=1, max_length=3) ClassNumber = Number.derive( classname='LcClass.ClassNumber', definition=('A number between 1 and 9999.9999, with 0 to 4 decimal ' 'places'), max_val=9999.9999, max_decimal_places=4, thousands=None) template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'min': 1, 'max': 1, 'name': 'letters', 'type': ClassLetters }, { 'min': 1, 'max': 1, 'name': 'number', 'type': ClassNumber }])
class WholeNumUSGB1000sSep(BaseCompoundNumber): min_val = 1000 template = CompoundTemplate( short_description=('a string representing a whole number that is ' '>=1000 and uses a comma as the thousands ' 'separator'), groups=[{ 'name': 'thousand1', 'min': 1, 'max': 1, 'type': UpToThreeDigits__1To999 }, { 'name': 'thousand_sep1', 'min': 1, 'max': 1, 'type': USGBThousandsSeparator, 'is_separator': True }, { 'name': 'last_groups', 'min': 1, 'max': None, 'type': ThreeDigits, 'inner_sep_type': USGBThousandsSeparator }]) @classmethod def string_to_value(cls, cnstr): return int(''.join(cnstr.split(','))) def for_sort(self): return str(int(self.value)).zfill(self.numeric_zfill)
class Dewey(AlphaNumericSymbol): definition = ('a Dewey Decimal call number') template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'classification', 'min': 1, 'max': 1, 'type': DeweyClass }, { 'name': 'cutters', 'min': 1, 'max': 2, 'inner_sep_type': DEFAULT_SEPARATOR_TYPE, 'type': DeweyCutter }, { 'name': 'edition', 'min': 0, 'max': 1, 'type': Edition }, { 'name': 'item', 'min': 0, 'max': 1, 'type': Item }])
class Cutter(AlphaNumericSymbol): definition = ('a compact alphanumeric code used to arrange things ' 'alphabetically') Letters = Alphabetic.derive( classname='Cutter.Letters', short_description='1 to 3 letters', min_length=1, max_length=3 ) StringNumber = Numeric.derive( classname='Cutter.StringNumber', short_description='a number that sorts as a decimal', max_val=.99999999 ) template = CompoundTemplate( short_description=('a string with 1 to 3 letters followed by a ' 'number; the alphabetic and numeric portions ' 'can be separated by optional whitespace'), separator_type=DEFAULT_SEPARATOR_TYPE, groups=[ {'name': 'letters', 'min': 1, 'max': 1, 'type': Letters}, {'name': 'number', 'min': 1, 'max': 1, 'type': StringNumber} ] )
class SuDoc(AlphaNumericSymbol): definition = ('a call number that uses the US Superintendent of Documents ' 'Classification scheme') template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'stem', 'min': 1, 'max': 1, 'possible_types': [XjhsAgency, AgencyDotSeries] }, { 'name': 'colon', 'min': 1, 'max': 1, 'type': Colon }, { 'name': 'book_number', 'min': 1, 'max': 1, 'type': BookNumber }]) def __init__(self, cnstr, name='default', **options): super(SuDoc, self).__init__(cnstr, name, **options) if hasattr(self.stem, 'series'): if not getattr(self.stem.series, 'related_series'): blank_related_series = Formatting.derive( min_length=0, for_sort=lambda x: CompoundUnit.sort_break)('') self.stem.series._parts.append(blank_related_series)
class NumericSymbol(CompoundUnit): options_defaults = Numeric.options_defaults.copy() options_defaults.update(Formatting.options_defaults) template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'parts', 'min': 1, 'max': None, 'possible_types': [Numeric, Formatting] }])
class AlphaNumeric(CompoundUnit): options_defaults = Alphabetic.options_defaults.copy() options_defaults.update(Numeric.options_defaults) template = CompoundTemplate(separator_type=None, groups=[{ 'name': 'parts', 'min': 1, 'max': None, 'possible_types': [Alphabetic, Numeric] }])
class Local(AlphaNumericSymbol): definition = 'a local call number with a non-specific structure' template = CompoundTemplate( short_description=AlphaNumericSymbol.template.short_description, groups=[{ 'name': 'parts', 'min': 1, 'max': None, 'inner_sep_type': DEFAULT_SEPARATOR_TYPE, 'possible_types': [Alphabetic, Number, Formatting] }])
class Series(AlphaNumericSymbol): short_description = ('a Cutter number or a number denoting the category/' 'series of the publication, optionally followed ' 'by a forward slash and one or two alphabetic or ' 'numeric codes separated by a hyphen, which denote ' 'a related series'), NumericSeries = Numeric.derive(classname='Series.NumericSeries') RelatedSeries = AlphaNumericSymbol.derive(classname='Series.RelatedSeries', groups=[{ 'name': 'parts', 'min': 1, 'max': None, 'possible_types': [LettersFirst, Numeric], 'inner_sep_type': Dash }]) template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'main_series', 'min': 1, 'max': 1, 'possible_types': [Cutter, NumericSeries] }, { 'name': 'slash', 'min': 0, 'max': 1, 'type': Slash, 'is_separator': True }, { 'name': 'related_series', 'min': 0, 'max': 1, 'type': RelatedSeries }])
class BaseDate(AlphaNumericSymbol): template = CompoundTemplate( separator_type=None, groups=[ {'min': 1, 'max': 1, 'name': 'prop_month', 'type': SimpleUnit}, {'min': 0, 'max': 1, 'name': 'prop_day', 'type': SimpleUnit}, {'min': 1, 'max': 1, 'name': 'prop_year', 'type': SimpleUnit} ] ) def _get_date_prop(self, prop): prop = 'prop_{}'.format(prop) if hasattr(self, prop): return getattr(self, prop) else: return None @property def year(self): return self._get_date_prop('year') @property def month(self): return self._get_date_prop('month') @property def day(self): return self._get_date_prop('day') @property def normalized_datestring(self): if not hasattr(self, '_normalized_datestring'): parts = [] for category in ['year', 'month', 'day']: part = getattr(self, category) or 0 if part: part = part.value parts.append(part) self._normalized_datestring = '{:04d}{:02d}{:02d}'.format(*parts) return self._normalized_datestring def for_sort(self): numeric_opts = Numeric.filter_valid_useropts(self.options) return Numeric(self.normalized_datestring, **numeric_opts).for_sort()
class Edition(AlphaNumeric): definition = 'information identifying the edition of an item' Year = Numeric.derive( classname='Edition.Year', min_length=4, max_length=4 ) template = CompoundTemplate( short_description=('a 4-digit year, optionally followed by one or ' 'more letters (no whitespace between them)'), separator_type=None, groups=[ {'min': 1, 'max': 1, 'name': 'year', 'type': Year}, {'min': 0, 'max': 1, 'name': 'letters', 'type': Alphabetic} ] )
class LC(AlphaNumericSymbol): definition = 'a Library of Congress call number' Space = DEFAULT_SEPARATOR_TYPE.derive(min_length=1) template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'classification', 'min': 1, 'max': 1, 'type': LcClass }, { 'name': 'period', 'min': 0, 'max': 1, 'type': CutterPeriod, 'is_separator': True }, { 'name': 'cutters', 'min': 1, 'max': None, 'inner_sep_type': DEFAULT_SEPARATOR_TYPE, 'type': Cutter }, { 'name': 'edition', 'min': 0, 'max': 1, 'type': Edition }, { 'name': 'space', 'min': 1, 'max': 1, 'type': Space, 'is_separator': True }, { 'name': 'item', 'min': 0, 'max': 1, 'type': Item }])
class OrdinalNumber(BaseCompoundNumber): definition = 'an ordinal number (1st, 2nd, 3rd, 4th ... 1,000th, etc.)' min_val = 1 max_decimal_places = 0 WholeNumber = Number.derive( classname='WholeNumber', definition=('a non-negative whole number, formatted based on US or ' 'British conventions, with a comma as an optional ' 'thousands separator'), max_decimal_places=0, min_val=1, ) OrdinalSuffix = Alphabetic.derive( classname='OrdinalSuffix', short_description=('a 2-character string: \'st\', \'nd\', \'rd\', or ' '\'th\''), min_length=1, max_length=1, base_pattern=r'(?:[sS][tT]|[nNrR][dD]|[tT][hH])', for_sort=lambda x: '', ) template = CompoundTemplate(separator_type=None, groups=[{ 'min': 1, 'max': 1, 'name': 'wholenumber', 'type': WholeNumber }, { 'min': 1, 'max': 1, 'name': 'suffix', 'type': OrdinalSuffix }])
class Number(BaseCompoundNumber): definition = ('a non-negative integer or floating point number, formatted ' 'based on US or British conventions: a period is used as a ' 'decimal point, if needed, and commas may be used as ' 'thousands separators (but are optional)') max_numeric_zfill = settings.DEFAULT_MAX_NUMERIC_ZFILL min_val = 0 max_val = float(math.pow(10, max_numeric_zfill) - 1) + .999999999 min_interval = .000000001 is_whole_number = False min_decimal_places = 0 max_decimal_places = 9 template = CompoundTemplate(separator_type=USGBDecimalSeparator, groups=[{ 'name': 'wholenumber', 'min': 1, 'max': 1, 'possible_types': [WholeNumUSGB1000sSep, Numeric] }, { 'name': 'decimal', 'min': 0, 'max': 1, 'type': Decimal }]) @classmethod def string_to_value(cls, cnstr): val = float(''.join(cnstr.split(','))) if val.is_integer(): return int(val) return cls.create_decimal(val) @classmethod def derive(cls, **attr): thousands = attr.pop('thousands', 'optional') min_dec_places = attr.pop('min_decimal_places', 0) max_dec_places = attr.pop('max_decimal_places', 9) mn, mx = str(attr.get('min_val', 0)), str(attr.get('max_val', None)) min_val, min_dec = mn.split('.') if '.' in mn else (mn, '0') max_val, max_dec = mx.split('.') if '.' in mx else (mx, 'None') min_val, min_dec = int(min_val), int(min_dec) max_val = None if max_val == 'None' else int(max_val) max_dec = None if max_dec == 'None' else int(max_dec) separator_type = USGBDecimalSeparator groups = copy.deepcopy(cls.template.groups) types = [] if thousands in ('required', 'optional'): if max_val is None and min_val < 1000: types.append(WholeNumUSGB1000sSep) elif max_val > 999 and min_val < 1000: types.append(WholeNumUSGB1000sSep.derive(max_val=max_val)) elif max_val > 999 and min_val > 999: types.append( WholeNumUSGB1000sSep.derive(max_val=max_val, min_val=min_val)) if min_val == 0: types.append(UpToThreeDigits) elif min_val < 1000 and (max_val is None or max_val > 999): types.append(UpToThreeDigits.derive(min_val=min_val)) elif min_val < 1000 and max_val < 1000: types.append( UpToThreeDigits.derive(min_val=min_val, max_val=max_val)) if thousands in (None, 'optional'): types.append(Numeric.derive(min_val=min_val, max_val=max_val)) groups[0] = groups[0] = { 'name': 'wholenumber', 'min': 1, 'max': 1, 'possible_types': types } if max_dec_places == 0: separator_type = None del groups[1] else: attr['max_decimal_places'] = max_dec_places min_max_text = u.min_max_to_text(min_dec_places or 1, max_dec_places) min_val = 0 if min_val == 0 else float('.{}'.format(min_dec)) max_val = None if max_dec is None else float('.{}'.format(max_dec)) NewDecimal = Decimal.derive( short_description=('a numeric string representing {} decimal ' 'places'.format(min_max_text)), min_length=min_dec_places or 1, max_length=max_dec_places, min_val=min_val, max_val=max_val) group_min = 0 if min_dec_places == 0 else 1 groups[1] = { 'name': 'decimal', 'min': group_min, 'max': 1, 'type': NewDecimal } attr['separator_type'] = separator_type attr['groups'] = groups return super(Number, cls).derive(**attr) def for_sort(self): sortval = super(Number, self).for_sort() if '.' in sortval: (whole, dec) = sortval.split('.') if int(dec) == 0: return whole return sortval
class Month(CompoundDatePart): short_description = 'a numeric or alphabetic month' category = 'month' Period = Formatting.derive(classname='Period', short_description='a period', min_length=1, max_length=1, base_pattern=r'\.') NumericMonth = NumericDatePart.derive( classname='NumericMonth', short_description='a numeric month, 1 to 12', min_length=1, max_length=1, base_pattern=r'(?:1[0-2]|0?[1-9])', post_pattern=r'(?![0-9])', category='month') AlphaMonthLong = AlphaMonth.derive(classname='AlphaMonthLong', months={ 'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, 'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12, 'winter': 1, 'spring': 3, 'summer': 6, 'fall': 9 }, category='month') AlphaMonthShort = AlphaMonth.derive(classname='AlphaMonthShort', months={ 'jan': 1, 'feb': 2, 'febr': 2, 'mar': 3, 'apr': 4, 'jun': 6, 'jul': 7, 'aug': 8, 'sep': 9, 'sept': 9, 'oct': 10, 'nov': 11, 'dec': 12, 'win': 1, 'wint': 1, 'spr': 3, 'sum': 6, 'summ': 6 }, category='month') AbbreviatedMonth = CompoundDatePart.derive( classname='AbbreviatedMonth', short_description='a month abbreviation and optional period', separator_type=None, groups=[{ 'min': 1, 'max': 1, 'name': 'alphamonth', 'type': AlphaMonthShort }, { 'min': 0, 'max': 1, 'name': 'period', 'type': Period }], category='month', value=property(lambda x: x.alphamonth.value)) template = CompoundTemplate(groups=[ { 'min': 1, 'max': 1, 'name': 'fullmonth', 'possible_types': [NumericMonth, AlphaMonthLong, AbbreviatedMonth] } ]) @property def value(self): return self.fullmonth.value
class Item(AlphaNumericSymbol): definition = ('information such as volume number, copy number, opus ' 'number, etc., that may be included at the end of a call ' 'number to help further differentiate an item from others ' 'with the same call number') Separator = Formatting.derive( min_length=1, max_length=None ) FormattingNoSpace = Formatting.derive( classname='Item.FormattingNoSpace', short_description='any non-alphanumeric, non-whitespace character', min_length=1, max_length=None, base_pattern=r'[^A-Za-z0-9\s]', use_formatting_in_sort=False, use_formatting_in_search=False ) SpaceOrPeriod = Formatting.derive( classname='Item.SpaceOrPeriod', short_description='a period followed by optional whitespace', min_length=1, max_length=1, base_pattern=r'(?:(?:\.|\s)\s*)' ) Space = Formatting.derive( classname='Item.Space', short_description='whitespace', min_length=1, max_length=1, base_pattern=r'\s' ) AnythingButSpace = AlphaNumericSymbol.derive( classname='Item.AnythingButSpace', short_description=('any combination of letters, symbols, and numbers ' 'with no whitespace'), groups=[ {'min': 1, 'max': None, 'name': 'parts', 'inner_sep_type': None, 'possible_types': [Alphabetic, Numeric, FormattingNoSpace]} ] ) Label = Alphabetic.derive( classname='Item.Label', for_sort=lambda x: '', for_search=lambda x: '' ) IdString = AlphaNumericSymbol.derive( classname='Item.IdString', short_description=('a string with at least one number; can have any ' 'characters except whitespace'), separator_type=None, groups=[ {'min': 0, 'max': None, 'name': 'pre_number', 'inner_sep_type': None, 'possible_types': [Alphabetic, FormattingNoSpace]}, {'min': 1, 'max': 1, 'name': 'first_number', 'type': Number}, {'min': 0, 'max': None, 'name': 'everything_else', 'inner_sep_type': None, 'possible_types': [Alphabetic, Number, FormattingNoSpace]} ] ) LabelThenNumber = AlphaNumericSymbol.derive( classname='Item.LabelThenNumber', short_description=('a string with a one-word label (which can contain ' 'formatting), followed by a period and/or ' 'whitespace, followed by one or more numbers (and ' 'possibly letters and formatting), such as ' '\'Op. 1\', \'volume 1a\', or \'no. A-1\'; when ' 'sorting, the label is ignored so that, e.g., ' '\'Volume 1\' sorts before \'vol 2\''), separator_type=SpaceOrPeriod, groups=[ {'min': 0, 'max': None, 'name': 'label', 'inner_sep_type': None, 'possible_types': [Label, FormattingNoSpace]}, {'min': 1, 'max': 1, 'name': 'number', 'type': IdString}, ], for_sort=lambda x: '{}{}'.format(CompoundUnit.sort_break, AlphaNumericSymbol.for_sort(x)) ) NumberThenLabel = AlphaNumericSymbol.derive( classname='Item.NumberThenLabel', short_description=('a string with an ordinal number and then a one-' 'word label, like \'101st Congress\' or \'2nd ' 'vol.\'; when sorting, the label is ignored so ' 'that, e.g., \'1st Congress\' sorts before \'2nd ' 'CONG.\''), separator_type=Space, groups=[ {'min': 1, 'max': 1, 'name': 'number', 'type': OrdinalNumber}, {'min': 1, 'max': 1, 'name': 'label', 'type': Label} ], for_sort=lambda x: '{}{}'.format(CompoundUnit.sort_break, AlphaNumericSymbol.for_sort(x)) ) template = CompoundTemplate( short_description=('a string (any string) that gets parsed into ' 'groups of labeled numbers and other groups of ' 'words, symbols, and dates, where labels are ' 'ignored for sorting; \'Volume 1 Copy 1\' sorts ' 'before \'v. 1 c. 2\', which sorts before ' '\'VOL 1 CP 2 SUPP\'; dates are normalized to ' 'YYYYMMDD format for sorting'), groups=[ {'min': 1, 'max': None, 'name': 'parts', 'inner_sep_type': Separator, 'possible_types': [DateString, NumberThenLabel, LabelThenNumber, AnythingButSpace]} ] )
class Agency(AlphaNumericSymbol): short_description = ('either \'X/A\' (for the Congressional Record) or ' 'a 1- to 4-letter alphabetic department code and ' 'optional numeric code for a subordinate office') Department = Alphabetic.derive( classname='Agency.Department', min_length=1, max_length=4, ) XaDepartment = AlphaNumericSymbol.derive( classname='Agency.XaDepartment', short_description='\'X/A\'', separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'x', 'min': 1, 'max': 1, 'type': Alphabetic.derive(min_length=1, max_length=1, base_pattern=r'[Xx]') }, { 'name': 'slash', 'min': 1, 'max': 1, 'type': Slash }, { 'name': 'a', 'min': 1, 'max': 1, 'type': LettersFirst.derive(min_length=1, max_length=1, base_pattern=r'[Aa]') }]) Office = Numeric.derive(classname='Agency.Office') template = CompoundTemplate(separator_type=DEFAULT_SEPARATOR_TYPE, groups=[{ 'name': 'department', 'min': 1, 'max': 1, 'possible_types': [XaDepartment, Department] }, { 'name': 'office', 'min': 0, 'max': 1, 'type': Office }])