示例#1
0
文件: model.py 项目: rerowep/rero-ils
def marc21_to_subjects_imported(self, key, value):
    """Get subject and genreForm_imported imported from 919 (L53, L54)."""
    specific_contains_regexp = \
        re.compile(r'\[(carte postale|affiche|document photographique)\]')
    contains_specific_voc_regexp = re.compile(
        r'^(chrero|rerovoc|ram|rameau|gnd|rerovoc|gatbegr|gnd-content)$')

    subfields_2 = utils.force_list(value.get('2'))
    term_string = ''
    data_imported = None
    field_key = 'subjects_imported'
    if subfields_2:
        subfield_2 = subfields_2[0]
        match = contains_specific_voc_regexp.search(subfield_2)
        if match:
            add_data_imported = False
            if subfield_2 == 'chrero':
                subfields_9 = utils.force_list(value.get('9'))
                subfield_9 = subfields_9[0]
                subfields_v = utils.force_list(value.get('v'))
                if subfields_v:
                    subfield_v = subfields_v[0]
                    match = specific_contains_regexp.search(subfield_v)
                    if match:
                        contains_655_regexp = re.compile(r'655')
                        match = contains_655_regexp.search(subfield_9)
                        add_data_imported = True
                        if match:
                            field_key = 'genreForm_imported'
            else:
                add_data_imported = True
                if subfield_2 == 'gatbegr' or subfield_2 == 'gnd-content':
                    field_key = 'genreForm_imported'
            if add_data_imported:
                term_string = build_string_from_subfields(
                    value, 'abcdefghijklmnopqrstuvwxyz', ' - ')
                data_imported = {
                    'type': 'bf:Topic',
                    'source': subfield_2,
                    'term': term_string
                }
    else:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuvwxyz', ' - ')
        if term_string:
            data_imported = {'type': 'bf:Topic', 'term': term_string}
    if data_imported:
        subjects_or_genre_form_imported_imported = self.get(field_key, [])
        subjects_or_genre_form_imported_imported.append(data_imported)
        self[field_key] = subjects_or_genre_form_imported_imported
示例#2
0
def marc21_to_subjects_imported(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """
    def perform_subdivisions(field):
        """Perform subject subdivisions from MARC field."""
        subdivisions = {
            'v': 'genreForm_subdivisions',
            'x': 'topic_subdivisions',
            'y': 'temporal_subdivisions',
            'z': 'place_subdivisions'
        }
        for code, subdivision in subdivisions.items():
            for subfield_value in utils.force_list(value.get(code, [])):
                field.setdefault(subdivision, []).append(subfield_value)

    type_per_tag = {
        '600': 'bf:Person',
        '610': 'bf:Organisation',
        '611': 'bf:Organisation',
        '600t': 'bf:Work',
        '610t': 'bf:Work',
        '611t': 'bf:Work',
        '630': 'bf:Work',
        '650': 'bf:Topic',  # or bf:Temporal, changed by code
        '651': 'bf:Place',
        '655': 'bf:Topic'
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    conference_per_tag = {'610': False, '611': True}
    source_per_indicator_2 = {'7': 'LCSH', '2': 'MeSH'}

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = subfields_2[0] if subfields_2 else None

    if subfield_2 == 'lcsh' or indicator_2 in ['0', '2', '7']:
        term_string = build_string_from_subfields(value,
                                                  'abcdefghijklmnopqrstuw',
                                                  ' - ')
        if term_string:
            source = 'LCSH' if subfield_2 == 'lcsh' else \
                source_per_indicator_2[indicator_2]
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source,
                field_data_per_tag[tag_key]: term_string.rstrip('.')
            }
            perform_subdivisions(subject_imported)
            if tag_key in ['610', '611']:
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported
示例#3
0
文件: model.py 项目: rerowep/rero-ils
def marc21_to_subjects(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """
    type_per_tag = {
        '600': 'bf:Person',
        '610': 'bf:Organization',
        '611': 'bf:Organization',
        '600t': 'bf:Work',
        '610t': 'bf:Work',
        '611t': 'bf:Work',
        '630': 'bf:Work',
        '650': 'bf:Topic',  # or bf:Temporal, changed by code
        '651': 'bf:Place',
        '655': 'bf:Topic'
    }

    ref_link_per_tag = {
        '600': 'IdRef agent',
        '610': 'IdRef agent',
        '611': 'IdRef agent',
        '600t': 'IdRef work',
        '610t': 'IdRef work',
        '611t': 'IdRef work',
        '630': 'IdRef work',
        '650': 'RERO RAMEAU concept',
        '651': 'Idref place',
        '655': 'RERO RAMEAU concept'
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    subfield_code_per_tag = {
        '600': 'abcd',
        '610': 'ab',
        '611': 'acden',
        '600t': 'tpn',
        '610t': 'tpn',
        '611t': 't',
        '630': 'apn',
        '650': 'a',
        '651': 'a',
        '655': 'a'
    }

    conference_per_tag = {'610': False, '611': True}
    source_per_indicator_2 = {'0': 'LCSH', '2': 'MeSH'}

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = None
    if subfields_2:
        subfield_2 = subfields_2[0]
    subfields_a = utils.force_list(value.get('a', []))

    if subfield_2 == 'rero':
        has_dollar_t = value.get('t')

        if tag_key in ('600', '610', '611') and has_dollar_t:
            tag_key += 't'
        data_type = type_per_tag[tag_key]

        start_with_digit = False
        if tag_key == '650':
            for subfield_a in subfields_a:
                start_with_digit_regexp = re.compile(r'^\d')
                match = start_with_digit_regexp.search(subfield_a)
                if match:
                    data_type = 'bf:Temporal'
                    break

        subject = {
            'type': data_type,
        }

        string_build = build_string_from_subfields(
            value, subfield_code_per_tag[tag_key])
        if (tag_key == '655'):
            # remove the square brackets
            string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build)
        subject[field_data_per_tag[tag_key]] = string_build

        if tag_key in ('610', '611'):
            subject['conference'] = conference_per_tag[tag_key]

        if tag_key in ('600t', '610t', '611t'):
            creator_tag_key = tag_key[:3]  # to keep only tag:  600, 610, 611
            subject['creator'] = remove_trailing_punctuation(
                build_string_from_subfields(
                    value, subfield_code_per_tag[creator_tag_key]), '.', '.')
        field_key = 'subjects'
        if tag_key == '655':
            field_key = 'genreForm'

        subfields_0 = utils.force_list(value.get('0'))
        if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
            ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
                                        subfields_0[0], key)
            if ref:
                subject = {
                    '$ref': ref,
                    'type': data_type,
                }
        if not subject.get('$ref'):
            identifier = build_identifier(value)
            if identifier:
                subject['identifiedBy'] = identifier

        if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]):
            subjects = self.get(field_key, [])
            subjects.append(subject)
            self[field_key] = subjects
    elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuvwxyz', ' - ')
        if term_string:
            if subfield_2 == 'rerovoc':
                source = 'rerovoc'
            else:
                source = source_per_indicator_2[indicator_2]
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source
            }
            subject_imported[field_data_per_tag[tag_key]] = term_string
            if tag_key in ('610', '611'):
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported
示例#4
0
文件: model.py 项目: rerowep/rero-ils
def marc21_to_subjects_imported(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """
    type_per_tag = {
        '600': 'bf:Person',
        '610': 'bf:Organization',
        '611': 'bf:Organization',
        '600t': 'bf:Work',
        '610t': 'bf:Work',
        '611t': 'bf:Work',
        '630': 'bf:Work',
        '650': 'bf:Topic',  # or bf:Temporal, changed by code
        '651': 'bf:Place',
        '655': 'bf:Topic'
    }

    ref_link_per_tag = {
        '600': 'IdRef agent',
        '610': 'IdRef agent',
        '611': 'IdRef agent',
        '600t': 'IdRef work',
        '610t': 'IdRef work',
        '611t': 'IdRef work',
        '630': 'IdRef work',
        '650': 'RERO RAMEAU concept',
        '651': 'Idref place',
        '655': 'RERO RAMEAU concept'
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    subfield_code_per_tag = {
        '600': 'abcd',
        '610': 'ab',
        '611': 'acden',
        '600t': 'tpn',
        '610t': 'tpn',
        '611t': 't',
        '630': 'apn',
        '650': 'a',
        '651': 'a',
        '655': 'a'
    }

    conference_per_tag = {'610': False, '611': True}
    source_per_indicator_2 = {'7': 'LCSH', '2': 'MeSH'}

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = None
    if subfields_2:
        subfield_2 = subfields_2[0]
    subfields_a = utils.force_list(value.get('a', []))

    if subfield_2 == 'lcsh' or indicator_2 in ['0', '2', '7']:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuvwxyz', ' - ')
        if term_string:
            if subfield_2 == 'lcsh':
                source = 'LCSH'
            else:
                source = source_per_indicator_2[indicator_2]
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source
            }
            subject_imported[field_data_per_tag[tag_key]] = \
                term_string.rstrip('.')
            if tag_key in ('610', '611'):
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported
示例#5
0
def marc21_to_series(self, key, value):
    """Get series.

    series.name: [490$a repetitive]
    series.number: [490$v repetitive]
    """
    if key[:3] == '490':
        marc21.extract_series_statement_from_marc_field(key, value, self)
    else:
        # we have a 440 series statement
        """
        440$a = seriesTitle

        1. if $p:
            440$n$p = subseriesTitle
            440$v = subseriesEnumeration

        2. if $v and $n but no $p:
            440$n,$v = seriesEnumeration

        3 if $n but no $p and no $v:
            440$n = subseriesTitle

        4. if no $p and no $n:
            440$v = seriesEnumeration

        remove final punctuation "." or "," or ";" in seriesTitle
        """
        if value.get('a'):
            series = {}
            subseriesStatement = {}
            subfield_a = remove_punctuation(utils.force_list(
                value.get('a'))[0],
                                            with_dot=True)
            series['seriesTitle'] = [{'value': subfield_a}]
            if value.get('p'):
                """
                440$n$p = subseriesTitle
                440$v = subseriesEnumeration
                """
                string_build = build_string_from_subfields(value, 'np', ' ')
                subseriesStatement['subseriesTitle'] = [{
                    'value':
                    remove_punctuation(string_build, with_dot=True).rstrip()
                }]
                if value.get('v'):
                    parts = []
                    for subfield_v in utils.force_list(value.get('v')):
                        parts.append(remove_punctuation(subfield_v))
                    subseriesStatement['subseriesEnumeration'] = [{
                        'value':
                        '/'.join(parts)
                    }]
                series['subseriesStatement'] = [subseriesStatement]
            else:
                if value.get('n'):
                    if value.get('v'):
                        string_build = build_string_from_subfields(value, 'nv')
                        series['seriesEnumeration'] = [{
                            'value':
                            remove_punctuation(string_build,
                                               with_dot=True).rstrip()
                        }]
                    else:
                        if value.get('n'):
                            subseriesStatement['subseriesTitle'] = [{
                                'value':
                                ''.join(utils.force_list(value.get('n')))
                            }]
                            series['subseriesStatement'] = [subseriesStatement]
                elif value.get('v'):
                    parts = []
                    for subfield_v in utils.force_list(value.get('v')):
                        parts.append(remove_punctuation(subfield_v))

                    series['seriesEnumeration'] = [{'value': '/'.join(parts)}]
                # marc21.extract_series_statement_from_440_field(value, self)

            self['seriesStatement'] = self.get('seriesStatement', [])
            self['seriesStatement'].append(series)
    return None
示例#6
0
def marc21_to_subjects_6XX(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """

    def perform_subdivisions(field):
        """Perform subject subdivisions from MARC field."""
        subdivisions = {
            'v': 'genreForm_subdivisions',
            'x': 'topic_subdivisions',
            'y': 'temporal_subdivisions',
            'z': 'place_subdivisions'
        }
        for code, subdivision in subdivisions.items():
            for subfield_value in utils.force_list(value.get(code, [])):
                field.setdefault(subdivision, []).append(subfield_value)

    type_per_tag = {
        '600': DocumentSubjectType.PERSON,
        '610': DocumentSubjectType.ORGANISATION,
        '611': DocumentSubjectType.ORGANISATION,
        '600t': DocumentSubjectType.WORK,
        '610t': DocumentSubjectType.WORK,
        '611t': DocumentSubjectType.WORK,
        '630': DocumentSubjectType.WORK,
        '650': DocumentSubjectType.TOPIC,  # or bf:Temporal, changed by code
        '651': DocumentSubjectType.PLACE,
        '655': DocumentSubjectType.TOPIC
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    subfield_code_per_tag = {
        '600': 'abcd',
        '610': 'ab',
        '611': 'acden',
        '600t': 'tpn',
        '610t': 'tpn',
        '611t': 't',
        '630': 'apn',
        '650': 'a',
        '651': 'a',
        '655': 'a'
    }

    conference_per_tag = {
        '610': False,
        '611': True
    }
    source_per_indicator_2 = {
        '0': 'LCSH',
        '2': 'MeSH'
    }

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = subfields_2[0] if subfields_2 else None
    subfields_a = utils.force_list(value.get('a', []))

    if subfield_2 in ['rero', 'gnd', 'idref']:
        if tag_key in ['600', '610', '611'] and value.get('t'):
            tag_key += 't'
        data_type = type_per_tag[tag_key]

        # `data_type` is Temporal if tag is 650 and a $a start with digit.
        if tag_key == '650':
            for subfield_a in subfields_a:
                if subfield_a[0].isdigit():
                    data_type = 'bf:Temporal'
                    break

        subject = {
            'type': data_type,
        }

        string_build = build_string_from_subfields(
            value, subfield_code_per_tag[tag_key])
        if tag_key == '655':
            # remove the square brackets
            string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build)
        subject[field_data_per_tag[tag_key]] = string_build

        if tag_key in ['610', '611']:
            subject['conference'] = conference_per_tag[tag_key]

        if tag_key in ['600t', '610t', '611t']:
            creator_tag_key = tag_key[:3]  # to keep only tag:  600, 610, 611
            subject['creator'] = remove_trailing_punctuation(
                build_string_from_subfields(
                    value, subfield_code_per_tag[creator_tag_key]), '.', '.')
        field_key = 'genreForm' if tag_key == '655' else 'subjects'
        subfields_0 = utils.force_list(value.get('0'))
        if data_type in [DocumentSubjectType.PERSON,
                         DocumentSubjectType.ORGANISATION] and subfields_0:
            ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
                                        subfields_0[0], key)
            if ref:
                subject = {
                    '$ref': ref,
                    'type': data_type,
                }
        if not subject.get('$ref'):
            identifier = build_identifier(value)
            if identifier:
                subject['identifiedBy'] = identifier
            perform_subdivisions(subject)

        if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]):
            subjects = self.get(field_key, [])
            subjects.append(subject)
            self[field_key] = subjects
    elif indicator_2 in ['0', '2']:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuw', ' - ')
        if term_string:
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source_per_indicator_2[indicator_2],
                field_data_per_tag[tag_key]: term_string.rstrip('.')
            }
            perform_subdivisions(subject_imported)
            if tag_key in ['610', '611']:
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported
示例#7
0
文件: model.py 项目: rerowep/rero-ils
def marc21_to_tableOfContents(self, key, value):
    """Get tableOfContents from repetitive field 464."""
    table_of_contents = build_string_from_subfields(value, 't')
    if table_of_contents:
        self.setdefault('tableOfContents', []).append(table_of_contents)