def test_check_altered(self): """bibmatch - check altered match""" from invenio.legacy.bibrecord import record_has_field records = create_records(self.recxml4) self.assertTrue(not record_has_field(records[0][0], '001')) [dummy1, matchedrecs, dummy3, dummy4] = match_records(records, \ modify=1, \ verbose=0) self.assertTrue(record_has_field(matchedrecs[0][0], '001'))
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno): """Check if record matches any of the given IDs.""" if record_has_field(record, "001"): if record_get_field_value(record, "001", "%", "%") == str(recid): return True if record_has_field(record, OAIID_TAG[0:3]): if record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid: return True if record_has_field(record, SYSNO_TAG[0:3]): if record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno: return True return False
def _record_has_id_p(record, recid, rec_oaiid, rec_sysno): """Check if record matches any of the given IDs.""" if record_has_field(record, '001'): if record_get_field_value(record, '001', '%', '%') == str(recid): return True if record_has_field(record, OAIID_TAG[0:3]): if (record_get_field_value(record, OAIID_TAG[0:3], OAIID_TAG[3], OAIID_TAG[4], OAIID_TAG[5]) == rec_oaiid): return True if record_has_field(record, SYSNO_TAG[0:3]): if (record_get_field_value(record, SYSNO_TAG[0:3], SYSNO_TAG[3], SYSNO_TAG[4], SYSNO_TAG[5]) == rec_sysno): return True return False
def update_references(recid, overwrite=True): """Update references for a record First, we extract references from a record. Then, we are not updating the record directly but adding a bibupload task in -c mode which takes care of updating the record. Parameters: * recid: the id of the record """ if not overwrite: # Check for references in record record = get_record(recid) if record and record_has_field(record, '999'): raise RecordHasReferences('Record has references and overwrite ' 'mode is disabled: %s' % recid) if get_fieldvalues(recid, '999C59'): raise RecordHasReferences('Record has been curated: %s' % recid) # Parse references references_xml = extract_references_from_record_xml(recid) # Save new record to file (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR) temp_file = os.fdopen(temp_fd, 'w') temp_file.write(references_xml) temp_file.close() # Update record task_low_level_submission('bibupload', 'refextract', '-P', '4', '-c', temp_path)
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes(template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values(template_field_instance, code)[0] ) record_order_subfields(rec) return rec
def merge_record_with_template(rec, template_name, is_hp_record=False): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] # if the record is a holding pen record make all subfields volatile if is_hp_record: record_make_all_subfields_volatile(template_bibrec) for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes( template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield( field_instance, code, field_get_subfield_values( template_field_instance, code)[0]) record_order_subfields(rec) return rec
def merge_record_with_template(rec, template_name): """ Extend the record rec with the contents of the template and return it""" template = get_record_template(template_name) if not template: return template_bibrec = create_record(template)[0] for field_tag in template_bibrec: if not record_has_field(rec, field_tag): for field_instance in template_bibrec[field_tag]: record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0]) else: for template_field_instance in template_bibrec[field_tag]: subfield_codes_template = field_get_subfield_codes(template_field_instance) for field_instance in rec[field_tag]: subfield_codes = field_get_subfield_codes(field_instance) for code in subfield_codes_template: if code not in subfield_codes: field_add_subfield(field_instance, code, field_get_subfield_values(template_field_instance, code)[0]) return rec
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) #insert at the beginning return ### if findex1 is None and the fieldtag already exists #get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) #combine results in one list #find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] #finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
def add_field(rec1, rec2, fnum, findex1, findex2): """Adds the field of rec2 into rec1 in a position that depends on the diffing of rec1 with rec2. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param findex1: the rec1 field position in the group of fields it belongs @param findex2: the rec2 field position in the group of fields it belongs """ field_to_add = rec2[fnum][findex2] ### if findex1 indicates an existing field in rec1, insert the field of rec2 ### before the field of rec1 if findex1 is not None: record_add_fields(rec1, fnum, [field_to_add], findex1) return ### check if field tag does not exist in record1 if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, [field_to_add]) # insert at the beginning return ### if findex1 is None and the fieldtag already exists # get diffs for all indicators of the field. alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields) alldiffs = alldiffs[1] # keep only the list of diffs by indicators (without the 'c') diff = _combine_diffs(alldiffs) # combine results in one list # find the position of the field after which the insertion should take place findex1 = -1 for m in diff: if m[1] == findex2: break if m[0] is not None: findex1 = m[0] # finally add the field (one position after) record_add_fields(rec1, fnum, [field_to_add], findex1 + 1)
def merge_field_group(rec1, rec2, fnum, ind1='', ind2='', merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, '%', '%') else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return #nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[ 1] #keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: #combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: #diffing for one indicator for diff in alldiffs: #look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[ 1] #keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: #merge non-conflicting fields for m in diff: #for every match of fields in the diff if m[0] is not None: #if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: #else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: #merge all fields for m in diff: #for every match of fields in the diff if m[1] is not None: #if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: #if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator( rec1.get(fnum, [])) #find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: #if no smaller indicator pair exists insert_index = 0 #insertion will take place at the beginning else: #else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return
def merge_field_group(rec1, rec2, fnum, ind1="", ind2="", merge_conflicting_fields=False): """Merges non-conflicting fields from 'rec2' to 'rec1' for a specific tag. the second record. @param rec1: First record (a record dictionary structure) @param rec2: Second record (a record dictionary structure) @param fnum: a 3 characters long string indicating field tag number @param ind1: a 1 character long string @param ind2: a 1 character long string @param merge_conflicting_fields: whether to merge conflicting fields or not """ ### Check if merging goes for all indicators and set a boolean merging_all_indicators = not ind1 and not ind2 ### check if there is no field in rec2 to be merged in rec1 if not record_has_field(rec2, fnum): return ### get fields of rec2 if merging_all_indicators: fields2 = record_get_field_instances(rec2, fnum, "%", "%") else: fields2 = record_get_field_instances(rec2, fnum, ind1, ind2) if len(fields2) == 0: return ### check if field in rec1 doesn't even exist if not record_has_field(rec1, fnum): record_add_fields(rec1, fnum, fields2) return ### compare the fields, get diffs for given indicators alldiffs = record_field_diff(rec1[fnum], rec2[fnum], fnum, match_subfields, ind1, ind2) ### check if fields are the same if alldiffs is None: return # nothing to merge ### find the diffing for the fields of the given indicators alldiffs = alldiffs[1] # keep only the list of diffs by indicators (without the 'c') if merging_all_indicators: # combine the diffs for each indicator to one list diff = _combine_diffs(alldiffs) else: # diffing for one indicator for diff in alldiffs: # look for indicator pair in diff result if diff[0] == (ind1, ind2): break else: raise Exception, "Indicators not in diff result." diff = diff[1] # keep only the list of diffs (without the indicator tuple) ### proceed to merging fields in a new field list fields1, fields2 = rec1[fnum], rec2[fnum] new_fields = [] if merge_conflicting_fields == False: # merge non-conflicting fields for m in diff: # for every match of fields in the diff if m[0] is not None: # if rec1 has a field in the diff, keep it new_fields.append(deepcopy(fields1[m[0]])) else: # else take the field from rec2 new_fields.append(deepcopy(fields2[m[1]])) else: # merge all fields for m in diff: # for every match of fields in the diff if m[1] is not None: # if rec2 has a field, add it new_fields.append(deepcopy(fields2[m[1]])) if m[0] is not None and fields1[m[0]][0] != fields2[m[1]][0]: # if the fields are not the same then add the field of rec1 new_fields.append(deepcopy(fields1[m[0]])) else: new_fields.append(deepcopy(fields1[m[0]])) ### delete existing fields record_delete_field(rec1, fnum, ind1, ind2) ## find where the new_fields should be inserted in rec1 (insert_index) if merging_all_indicators: insert_index = 0 else: insert_index = None ind_pair = (ind1, ind2) first_last_dict = _first_and_last_index_for_each_indicator(rec1.get(fnum, [])) # find the indicator pair which is just before the one which will be inserted indicators = first_last_dict.keys() indicators.sort() ind_pair_before = None for pair in indicators: if pair > ind_pair: break else: ind_pair_before = pair if ind_pair_before is None: # if no smaller indicator pair exists insert_index = 0 # insertion will take place at the beginning else: # else insert after the last field index of the previous indicator pair insert_index = first_last_dict[ind_pair_before][1] + 1 ### add the new (merged) fields in correct 'in_field_index' position record_add_fields(rec1, fnum, new_fields, insert_index) return