示例#1
0
def unimarc_title(self, key, value):
    """Get title data.

    field 200: non repetitive
        $a : repetitive
        $e : repetitive
        $f : repetitive
        $g : repetitive
        $h : repetitive
        $i : repetitive
    field 510,512,514,515,516,517,518,519,532: repetitive
        $a : non repetitive
        $e : repetitive
        $h : repetitive
        $i : repetitive
    """
    title_list = []
    title = self.get('title', [])
    # this function will be called for each fields 200, but as we already
    # process all of them in the first run and the tittle is already build,
    # there is nothing to do if the title has already been build.
    if not title:
        language = unimarc.lang_from_101
        responsibilites = []
        for tag in [
                '200', '510', '512', '514', '515', '516', '517', '518', '519',
                '532'
        ]:
            for field in unimarc.get_alt_graphic_fields(tag=tag):
                title_data = {}
                part_list = TitlePartList(part_number_code='h',
                                          part_name_code='i')
                subfields_6 = unimarc.get_subfields(field, '6')
                subfields_7 = unimarc.get_subfields(field, '7')
                subfields_a = unimarc.get_subfields(field, 'a')
                subfields_e = unimarc.get_subfields(field, 'e')
                language_script_code = ''
                if subfields_7:
                    language_script_code = \
                        unimarc.get_language_script(subfields_7[0])
                title_type = 'bf:VariantTitle'
                if tag == '200':
                    title_type = 'bf:Title'
                elif tag == '510':
                    title_type = 'bf:ParallelTitle'
                # build title parts
                index = 1
                link = ''
                if subfields_6:
                    link = subfields_6[0]
                items = get_field_items(field['subfields'])
                for blob_key, blob_value in items:
                    if blob_key == 'a':
                        value_data = \
                            unimarc.build_value_with_alternate_graphic(
                                tag, blob_key, blob_value,
                                index, link, ',.', ':;/-=')
                        title_data['mainTitle'] = value_data
                    if blob_key == 'e':
                        value_data = \
                            unimarc.build_value_with_alternate_graphic(
                                tag, blob_key, blob_value,
                                index, link, ',.', ':;/-=')
                        title_data['subtitle'] = value_data
                    if blob_key in ['f', 'g'] and tag == '200':
                        value_data = \
                            unimarc.build_value_with_alternate_graphic(
                                tag, blob_key, blob_value,
                                index, link, ',.', ':;/-=')
                        responsibilites.append(value_data)
                    if blob_key in ['h', 'i']:
                        part_list.update_part([dict(value=blob_value)],
                                              blob_key, blob_value)
                    if blob_key != '__order__':
                        index += 1
                title_data['type'] = title_type
                the_part_list = part_list.get_part_list()
                if the_part_list:
                    title_data['part'] = the_part_list
                if title_data:
                    title_list.append(title_data)

        # extract responsibilities
        if responsibilites:
            new_responsibility = self.get('responsibilityStatement', [])
            for resp in responsibilites:
                new_responsibility.append(resp)
            self['responsibilityStatement'] = new_responsibility
    return title_list or None
示例#2
0
    field_245_a_end_with_colon = re.search(r'\s*:\s*$', subfield_245_a)
    field_245_a_end_with_semicolon = re.search(r'\s*;\s*$', subfield_245_a)
    field_245_b_contains_equal = re.search(r'=', subfield_245_b)

    fields_246 = marc21.get_fields(tag='246')
    subfield_246_a = ''
    if fields_246:
        if subfields_246_a := marc21.get_subfields(fields_246[0], 'a'):
            subfield_246_a = subfields_246_a[0]

    tag_link, link = get_field_link_data(value)
    items = get_field_items(value)
    index = 1
    title_list = []
    title_data = {}
    part_list = TitlePartList(part_number_code='n', part_name_code='p')
    parallel_titles = []
    pararalel_title_data_list = []
    pararalel_title_string_set = set()
    responsibility = {}

    subfield_selection = {'a', 'b', 'c', 'n', 'p'}
    for blob_key, blob_value in items:
        if blob_key in subfield_selection:
            value_data = marc21.build_value_with_alternate_graphic(
                '245', blob_key, blob_value, index, link, ',.', ':;/-=')
            if blob_key in {'a', 'b', 'c'}:
                subfield_selection.remove(blob_key)
            if blob_key == 'a':
                title_data['mainTitle'] = value_data
            elif blob_key == 'b':
示例#3
0
def marc21_to_title(self, key, value):
    """Get title data.

    The title data are extracted from the following fields:
    field 245:
        $a : non repetitive
        $b : non repetitive
        $c : non repetitive
        $n : repetitive
        $p : repetitive
        $6 : non repetitive
    field 246:
        $a : non repetitive
        $n : repetitive
        $p : repetitive
        $6 : non repetitive
    """
    # extraction and initialization of data for further processing
    subfield_245_a = ''
    subfield_245_b = ''
    fields_245 = marc21.get_fields(tag='245')
    if fields_245:
        subfields_245_a = marc21.get_subfields(fields_245[0], 'a')
        subfields_245_b = marc21.get_subfields(fields_245[0], 'b')
        if subfields_245_a:
            subfield_245_a = subfields_245_a[0]
        if subfields_245_b:
            subfield_245_b = subfields_245_b[0]
    field_245_a_end_with_equal = re.search(r'\s*=\s*$', subfield_245_a)
    field_245_a_end_with_colon = re.search(r'\s*:\s*$', subfield_245_a)
    field_245_a_end_with_semicolon = re.search(r'\s*;\s*$', subfield_245_a)
    field_245_b_contains_equal = re.search(r'=', subfield_245_b)

    fields_246 = marc21.get_fields(tag='246')
    subfield_246_a = ''
    if fields_246:
        subfields_246_a = marc21.get_subfields(fields_246[0], 'a')
        if subfields_246_a:
            subfield_246_a = subfields_246_a[0]

    tag_link, link = get_field_link_data(value)
    items = get_field_items(value)
    index = 1
    title_list = []
    title_data = {}
    part_list = TitlePartList(part_number_code='n', part_name_code='p')
    parallel_titles = []
    pararalel_title_data_list = []
    pararalel_title_string_set = set()
    responsibility = {}

    # parse field 245 subfields for extracting:
    # main title, subtitle, parallel titles and the title parts
    subfield_selection = {'a', 'b', 'c', 'n', 'p'}
    for blob_key, blob_value in items:
        if blob_key in subfield_selection:
            value_data = marc21.build_value_with_alternate_graphic(
                '245', blob_key, blob_value, index, link, ',.', ':;/-=')
            if blob_key in {'a', 'b', 'c'}:
                subfield_selection.remove(blob_key)
            if blob_key == 'a':
                if value_data:
                    title_data['mainTitle'] = value_data
            elif blob_key == 'b':
                if subfield_246_a:
                    subtitle, parallel_titles, pararalel_title_string_set = \
                        extract_subtitle_and_parallel_titles_from_field_245_b(
                            value_data, field_245_a_end_with_equal)
                    if subtitle:
                        title_data['subtitle'] = subtitle
                elif not subfield_246_a and value_data:
                    title_data['subtitle'] = value_data
            elif blob_key == 'c':
                responsibility = build_responsibility_data(value_data)
            elif blob_key in ['n', 'p']:
                part_list.update_part(value_data, blob_key, blob_value)

        if blob_key != '__order__':
            index += 1
    title_data['type'] = 'bf:Title'
    the_part_list = part_list.get_part_list()
    if the_part_list:
        title_data['part'] = the_part_list
    if title_data:
        title_list.append(title_data)
    for parallel_title in parallel_titles:
        title_list.append(parallel_title)

    # extract variant titles
    variant_title_list = \
        marc21.build_variant_title_data(pararalel_title_string_set)
    for variant_title_data in variant_title_list:
        title_list.append(variant_title_data)

    # extract responsibilities
    if responsibility:
        new_responsibility = self.get('responsibilityStatement', [])
        for resp in responsibility:
            new_responsibility.append(resp)
        self['responsibilityStatement'] = new_responsibility
    return title_list or None
示例#4
0
def marc21_to_work_access_point(self, key, value):
    """Get work access point."""
    """
    * "date_of_work": "[130$f|730$f]"
    * "miscellaneous_information": "[130$g|130$s|730$g|730$s]"
    * "language": "[130$l|730$l]"
    * "form_subdivision": ["[130$k|730$k]"]
    * "medium_of_performance_for_music": ["[130$m|730$m]"]
    * "arranged_statement_for_music": "[130$o|730$o]"
    * "key_for_music": "[130$r|730$r]"

    [1] Nettoyer la chaîne: supprimer la ponctuation finale "/:;.,=",
    supprimer en particulier la chaine ". - "
    """

    work = {}
    tag_key = key[:3]
    part_list = TitlePartList(part_number_code='n', part_name_code='p')
    part_selection = {'n', 'p'}
    items = get_field_items(value)
    if tag_key in ['130', '730']:
        work_selection = {'a', 'f', 'k', 'l', 'm', 'o', 'r'}

        miscellaneous_selection = {'g', 's'}
        miscellaneous_parts = []
        # _WORK_ACCESS_POINT.get(subfield_code)
        for blob_key, blob_value in items:
            if blob_key in work_selection:

                if blob_key in {'k', 'm'}:
                    datas = work.get(_WORK_ACCESS_POINT.get(blob_key), [])
                    datas.append(blob_value)
                    work[_WORK_ACCESS_POINT.get(blob_key)] = datas
                else:
                    work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value
            if blob_key in miscellaneous_selection:
                miscellaneous_parts.append(blob_value)
            if blob_key in part_selection:
                part_list.update_part(blob_value, blob_key, blob_value)
        if miscellaneous_parts:
            work['miscellaneous_information'] = '. '.join(miscellaneous_parts)
    if tag_key == '240':
        for blob_key, blob_value in items:
            if blob_key in {'a'}:
                # work[_WORK_ACCESS_POINT.get('a')] = value.get('a')
                work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value

            if blob_key in part_selection:
                part_list.update_part(blob_value, blob_key, blob_value)

        field_100 = marc21.get_fields('100')
        if field_100:
            agent = {}
            for blob_key, blob_value in field_100[0].get('subfields').items():
                agent['type'] = 'bf:Person'
                if blob_key == 'a':
                    # numeration = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
                    agent['preferred_name'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'b':
                    # numeration = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
                    agent['numeration'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'c':
                    # qualifier = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'c')
                    agent['qualifier'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'd':
                    # date = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'd')
                    date = blob_value.rstrip(',')
                    dates = remove_trailing_punctuation(date).split('-')
                    try:
                        date_of_birth = dates[0].strip()
                        if date_of_birth:
                            agent['date_of_birth'] = date_of_birth
                    except Exception:
                        pass
                    try:
                        date_of_death = dates[1].strip()
                        if date_of_death:
                            agent['date_of_death'] = date_of_death
                    except Exception:
                        pass
                if blob_key == 'q':
                    # fuller_form_of_name = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'q')
                    agent['fuller_form_of_name'] = remove_trailing_punctuation(
                        blob_value).lstrip('(').rstrip(')')
            work['agent'] = agent

    the_part_list = part_list.get_part_list()
    if the_part_list:
        work['part'] = the_part_list

    if work:
        work_access_points = self.get('work_access_point', [])
        work_access_points.append(work)
        self['work_access_point'] = work_access_points