Exemplo n.º 1
0
def delete_field(rec, fnum, findex):
    """Delete a specific field.
    @param rec: a record dictionary structure
    @param fnum: a 3 characters long string indicating field tag number
    @param findex: the rec field position in the group of fields it belongs
    """
    record_delete_field(rec, fnum, field_position_local=findex)
Exemplo n.º 2
0
 def process_record(self, record):
     """@see: BaseFieldCommand.process_record"""
     if self._condition:
         self._delete_field_condition(record)
     else:
         bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2)
         self._modifications += 1
Exemplo n.º 3
0
def delete_field(rec, fnum, findex):
    """Delete a specific field.
    @param rec: a record dictionary structure
    @param fnum: a 3 characters long string indicating field tag number
    @param findex: the rec field position in the group of fields it belongs
    """
    record_delete_field(rec, fnum, field_position_local=findex)
Exemplo n.º 4
0
def compare_references(test, a, b):
    from invenio.legacy.bibrecord import create_record, record_xml_output, \
        record_delete_field
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertEqual(a, b)
Exemplo n.º 5
0
def compare_references(test, a, b):
    from invenio.legacy.bibrecord import create_record, record_xml_output, \
        record_delete_field
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertEqual(a, b)
Exemplo n.º 6
0
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value,
                     field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(
                                    record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(
                                record, tag, ind1, ind2)
        return record
Exemplo n.º 7
0
def main():
    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibupload.engine import (
        bibupload,
    )
    from invenio.legacy.bibrecord import (
        record_add_field,
        record_delete_field,
    )

    # Loop through list of records
    for r in RECORDS:
        old_rec = get_record(r)
        rec = get_record(r)

        if not rec:
            break

        print('Processing record: {0}'.format(r))
        # pprint(rec)

        old_690 = [f[0] for f in rec.get('690', [])]
        new_690 = []
        for f in old_690:
            a = f[0]
            b = f[1]
            t = [a, (b[0], VALUES.get(r))] if (a[0] == 'a' and
                                               a[1] == 'language_code' and
                                               b[0] == 'b' and
                                               VALUES.get(r)) \
                else f
            new_690.append(t)

        if not new_690 == old_690:
            record_delete_field(rec, '690')
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            # pprint(rec)
            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('\nNew 690:')
            pprint(rec.get('690'))

            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'delete')
                sleep(5)
                bibupload(rec, 'replace')
Exemplo n.º 8
0
    def _filter_fields(self, record, output_fields):
        """Removes from the record all the fields
        that are not output_fields.

        @param record: record structure (@see: bibrecord.py for details)
        @param output_fields: list of fields that should remain in the record

        @return: record containing only fields among output_fields
        """
        # Tibor's new implementation:
        for tag in record.keys():
            if tag not in output_fields:
                bibrecord.record_delete_fields(record, tag)
        return record

        # Rado's old implementation that leads to bibrecord-related
        # bug, see <https://savannah.cern.ch/task/?10267>:
        record_keys = record.keys()

        # Check if any of the tags, fields or subfields match
        # any value in output_fields. In case of match we leave
        # the element and its children in the record.
        #
        # If the element and all its children are not among the
        # output fields, it is deleted
        for tag in record_keys:
            tag = tag.lower()
            if tag not in output_fields:
                for (subfields, ind1, ind2, value, field_number) in record[tag]:
                    current_field = tag + ind1.strip() + ind2.strip()
                    current_field = current_field.lower()
                    if current_field not in output_fields:
                        delete_parents = True

                        for (code, value) in subfields:
                            current_subfield = current_field + code
                            current_subfield = current_subfield.lower()
                            if current_subfield not in output_fields:
                                bibrecord.record_delete_subfield(record, tag, code, ind1, ind2)
                            else:
                                delete_parents = False

                        if delete_parents:
                            bibrecord.record_delete_field(record, tag, ind1, ind2)
        return record
Exemplo n.º 9
0
 def _delete_field_condition(self, record):
     """Checks if a subfield meets the condition for the
     field to be deleted
     """
     try:
         for field in record[self._tag]:
             subfield_exists = False
             for subfield in field[0]:
                 if subfield[0] == self._conditionSubfield:
                     subfield_exists = True
                     if self._condition_does_not_exist == True:
                         break
                     if self._condition_exact_match:
                         if self._condition == subfield[1]:
                             bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4])
                             self._modifications += 1
                             break
                     else:
                         if self._condition in subfield[1]:
                             bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4])
                             self._modifications += 1
                             break
             if subfield_exists == False and self._condition_does_not_exist:
                 bibrecord.record_delete_field(record, self._tag, self._ind1, self._ind2, field_position_global=field[4])
                 self._modifications += 1
     except KeyError:
         pass
Exemplo n.º 10
0
    def compare_records(self, record1, record2, opt_mode=None):
        """
        Compares two records to identify added/modified/deleted tags.

        The records are either the upload record or existing record or
        record archived.

        Returns a Tuple of Dictionaries(For modified/added/deleted tags).
        """
        def remove_control_tag(tag_list):
            """
            Returns the list of keys without any control tags
            """

            cleaned_list = [
                item for item in tag_list
                if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS
            ]
            return cleaned_list

        def group_record_tags():
            """
            Groups all the tags in a Record as Common/Added/Deleted tags.
            Returns a Tuple of 3 lists for each category mentioned above.
            """
            rec1_keys = record1.keys()
            rec2_keys = record2.keys()

            com_tag_lst = [key for key in rec1_keys if key in rec2_keys]
            # tags in record2 not present in record1
            del_tag_lst = [key for key in rec2_keys if key not in rec1_keys]
            # additional tags in record1
            add_tag_lst = [key for key in rec1_keys if key not in rec2_keys]

            return (com_tag_lst, add_tag_lst, del_tag_lst)

        # declaring dictionaries to hold the identified patch
        mod_patch = {}
        add_patch = {}
        del_patch = {}
        result = {}

        (common_tags, added_tags, deleted_tags) = group_record_tags()
        if common_tags:
            mod_patch = self.find_modified_tags(common_tags, record1, record2)

        if added_tags:
            for tag in added_tags:
                add_patch[tag] = record1[tag]

        # if record comes with correct, it should already have fields
        # marked with '0' code. If not deleted tag list will
        if deleted_tags and \
                opt_mode == 'replace' or opt_mode == 'delete':
            for tag in deleted_tags:
                del_patch[tag] = record2[tag]

        # returning back a result dictionary with all available patches
        if mod_patch:
            result['MOD'] = mod_patch

        if add_patch:
            result['ADD'] = add_patch

        if del_patch:
            # for a tag that has been deleted in the upload record in replace
            # mode, loop through all the fields of the tag and add additional
            # subfield with code '0' and value '__DELETE_FIELDS__'
            # NOTE Indicators taken into consideration while deleting fields
            for tag in del_patch:
                for data_tuple in del_patch[tag]:
                    ind1 = data_tuple[1]
                    ind2 = data_tuple[2]
                    record_delete_field(del_patch, tag, ind1, ind2)
                    record_add_field(del_patch, tag, ind1, ind2, "", [
                        (CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)
                    ])
            result['DEL'] = del_patch

        return result
Exemplo n.º 11
0
def merge_field_group(rec1,
                      rec2,
                      fnum,
                      ind1='',
                      ind2='',
                      merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, '%', '%')
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2) == 0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields,
                                 ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return  #nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[
        1]  #keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        #combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else:  #diffing for one indicator
        for diff in alldiffs:  #look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[
            1]  #keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False:  #merge non-conflicting fields
        for m in diff:  #for every match of fields in the diff
            if m[0] is not None:  #if rec1 has a field in the diff, keep it
                new_fields.append(deepcopy(fields1[m[0]]))
            else:  #else take the field from rec2
                new_fields.append(deepcopy(fields2[m[1]]))
    else:  #merge all fields
        for m in diff:  #for every match of fields in the diff
            if m[1] is not None:  #if rec2 has a field, add it
                new_fields.append(deepcopy(fields2[m[1]]))
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    #if the fields are not the same then add the field of rec1
                    new_fields.append(deepcopy(fields1[m[0]]))
            else:
                new_fields.append(deepcopy(fields1[m[0]]))

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator(
            rec1.get(fnum, []))
        #find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None:  #if no smaller indicator pair exists
            insert_index = 0  #insertion will take place at the beginning
        else:  #else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return
Exemplo n.º 12
0
def merge_field_group(rec1, rec2, fnum, ind1="", ind2="", merge_conflicting_fields=False):
    """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag.
    the second record.
    @param rec1: First record (a record dictionary structure)
    @param rec2: Second record (a record dictionary structure)
    @param fnum: a 3 characters long string indicating field tag number
    @param ind1: a 1 character long string
    @param ind2: a 1 character long string
    @param merge_conflicting_fields: whether to merge conflicting fields or not
    """
    ### Check if merging goes for all indicators and set a boolean
    merging_all_indicators = not ind1 and not ind2

    ### check if there is no field in rec2 to be merged in rec1
    if not record_has_field(rec2, fnum):
        return

    ### get fields of rec2
    if merging_all_indicators:
        fields2 = record_get_field_instances(rec2, fnum, "%", "%")
    else:
        fields2 = record_get_field_instances(rec2, fnum, ind1, ind2)
    if len(fields2) == 0:
        return

    ### check if field in rec1 doesn't even exist
    if not record_has_field(rec1, fnum):
        record_add_fields(rec1, fnum, fields2)
        return

    ### compare the fields, get diffs for given indicators
    alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2)

    ### check if fields are the same
    if alldiffs is None:
        return  # nothing to merge

    ### find the diffing for the fields of the given indicators

    alldiffs = alldiffs[1]  # keep only the list of diffs by indicators (without the 'c')

    if merging_all_indicators:
        # combine the diffs for each indicator to one list
        diff = _combine_diffs(alldiffs)
    else:  # diffing for one indicator
        for diff in alldiffs:  # look for indicator pair in diff result
            if diff[0] == (ind1, ind2):
                break
        else:
            raise Exception, "Indicators not in diff result."
        diff = diff[1]  # keep only the list of diffs (without the indicator tuple)

    ### proceed to merging fields in a new field list
    fields1, fields2 = rec1[fnum], rec2[fnum]
    new_fields = []
    if merge_conflicting_fields == False:  # merge non-conflicting fields
        for m in diff:  # for every match of fields in the diff
            if m[0] is not None:  # if rec1 has a field in the diff, keep it
                new_fields.append(deepcopy(fields1[m[0]]))
            else:  # else take the field from rec2
                new_fields.append(deepcopy(fields2[m[1]]))
    else:  # merge all fields
        for m in diff:  # for every match of fields in the diff
            if m[1] is not None:  # if rec2 has a field, add it
                new_fields.append(deepcopy(fields2[m[1]]))
                if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]:
                    # if the fields are not the same then add the field of rec1
                    new_fields.append(deepcopy(fields1[m[0]]))
            else:
                new_fields.append(deepcopy(fields1[m[0]]))

    ### delete existing fields
    record_delete_field(rec1, fnum, ind1, ind2)
    ## find where the new_fields should be inserted in rec1 (insert_index)
    if merging_all_indicators:
        insert_index = 0
    else:
        insert_index = None
        ind_pair = (ind1, ind2)
        first_last_dict = _first_and_last_index_for_each_indicator(rec1.get(fnum, []))
        # find the indicator pair which is just before the one which will be inserted
        indicators = first_last_dict.keys()
        indicators.sort()
        ind_pair_before = None
        for pair in indicators:
            if pair > ind_pair:
                break
            else:
                ind_pair_before = pair
        if ind_pair_before is None:  # if no smaller indicator pair exists
            insert_index = 0  # insertion will take place at the beginning
        else:  # else insert after the last field index of the previous indicator pair
            insert_index = first_last_dict[ind_pair_before][1] + 1

    ### add the new (merged) fields in correct 'in_field_index' position
    record_add_fields(rec1, fnum, new_fields, insert_index)
    return
Exemplo n.º 13
0
    def compare_records(self, record1, record2, opt_mode=None):
        """
        Compares two records to identify added/modified/deleted tags.

        The records are either the upload record or existing record or
        record archived.

        Returns a Tuple of Dictionaries(For modified/added/deleted tags).
        """
        def remove_control_tag(tag_list):
            """
            Returns the list of keys without any control tags
            """

            cleaned_list = [item for item in tag_list
                    if item not in CFG_BIBUPLOAD_CONTROLFIELD_TAGS]
            return cleaned_list

        def group_record_tags():
            """
            Groups all the tags in a Record as Common/Added/Deleted tags.
            Returns a Tuple of 3 lists for each category mentioned above.
            """
            rec1_keys = record1.keys()
            rec2_keys = record2.keys()

            com_tag_lst = [key for key in rec1_keys if key in rec2_keys]
            # tags in record2 not present in record1
            del_tag_lst = [key for key in rec2_keys if key not in rec1_keys]
            # additional tags in record1
            add_tag_lst = [key for key in rec1_keys if key not in rec2_keys]

            return (com_tag_lst, add_tag_lst, del_tag_lst)

        # declaring dictionaries to hold the identified patch
        mod_patch = {}
        add_patch = {}
        del_patch = {}
        result = {}

        (common_tags, added_tags, deleted_tags) = group_record_tags()
        if common_tags:
            mod_patch = self.find_modified_tags(common_tags, record1, record2)

        if added_tags:
            for tag in added_tags:
                add_patch[tag] = record1[tag]

        # if record comes with correct, it should already have fields
        # marked with '0' code. If not deleted tag list will
        if deleted_tags and \
                opt_mode == 'replace' or opt_mode == 'delete':
            for tag in deleted_tags:
                del_patch[tag] = record2[tag]

        # returning back a result dictionary with all available patches
        if mod_patch:
            result['MOD'] = mod_patch

        if add_patch:
            result['ADD'] = add_patch

        if del_patch:
            # for a tag that has been deleted in the upload record in replace
            # mode, loop through all the fields of the tag and add additional
            # subfield with code '0' and value '__DELETE_FIELDS__'
            # NOTE Indicators taken into consideration while deleting fields
            for tag in del_patch:
                for data_tuple in del_patch[tag]:
                    ind1 = data_tuple[1]
                    ind2 = data_tuple[2]
                    record_delete_field(del_patch, tag, ind1, ind2)
                    record_add_field(del_patch, tag, ind1, ind2, "", [(CFG_BIBUPLOAD_DELETE_CODE, CFG_BIBUPLOAD_DELETE_VALUE)])
            result['DEL'] = del_patch

        return result
Exemplo n.º 14
0
def main():
    import invenio.modules.editor.models
    import invenio.modules.editor.views

    from invenio.legacy.search_engine import get_record
    from invenio.legacy.bibrecord import (
        record_delete_field,
        record_add_field,
    )
    from invenio.legacy.bibupload.engine import (
        bibupload, )

    for a in itertools.count(1):
        old_rec = get_record(a)
        rec = get_record(a)

        if not rec:
            break

        print('Processing record: {0}'.format(a))

        old_337 = [f[0] for f in rec.get('337', [])]
        new_337 = old_337[:]
        new_690 = []
        new_980 = []
        for f in rec.get('980', []):
            for sf in f[0]:
                if sf[0] == 'a' and sf[1] in TYPES:
                    if [sf] not in new_337:
                        new_337.append([sf])
                else:
                    if [sf] not in new_980:
                        new_980.append([sf])

        for f in rec.get('690', []):
            sfs = f[0]
            if sfs[0][0] == 'a' and sfs[0][1] == 'ling_resource_type':
                res_type = sfs[1][1]
                if res_type in TYPES:
                    if [('a', res_type)] not in new_337:
                        new_337.append([('a', res_type)])
                else:
                    print("Unrecognized 'ling_resource_type' value! '{0}'".
                          format(res_type))
            else:
                if sfs not in new_690:
                    new_690.append(sfs)

        if not new_337 == old_337:
            record_delete_field(rec, '337')
            record_delete_field(rec, '980')
            record_delete_field(rec, '690')
            for f in new_337:
                record_add_field(rec, '337', subfields=f)
            for f in new_980:
                record_add_field(rec, '980', subfields=f)
            for f in new_690:
                record_add_field(rec, '690', subfields=f)

            print('\nOld 337:')
            pprint(old_rec.get('337'))
            print('New 337:')
            pprint(rec.get('337'))

            print('\nOld 690:')
            pprint(old_rec.get('690'))
            print('New 690:')
            pprint(rec.get('690'))

            print('\nOld 980:')
            pprint(old_rec.get('980'))
            print('New 980:')
            pprint(rec.get('980'))
            if raw_input('Bibupload (y/n)? ') == 'y':
                bibupload(rec, 'replace')