def merge_consistent_groups(group_dict=None): print("Starting consistent groups merge") if group_dict is None: print("Extracting groups") group_dict = Group.get_dictionary() records_for_update = [] hypostases_for_update = [] groups_for_update = [] persons_to_delete = set() print("Iterating") for group, records in group_dict.items(): print(group.id) if not group.inconsistent and len(records) > 1: changed_records, changed_hypostases, unnecessary_persons = \ records[0].merge_records_by_hypostases(records[1:], save=False) group.person = records[0].person groups_for_update.append(group) hypostases_for_update.extend(changed_hypostases) records_for_update.extend(changed_records) persons_to_delete = persons_to_delete.union(unnecessary_persons) print("Saving") bulk_update(records_for_update, update_fields=['person']) bulk_update(hypostases_for_update, update_fields=['person']) bulk_update(list(group_dict.keys()), update_fields=['person']) for person in persons_to_delete: person.delete() print("Consistent groups merge: done")
def distribute_records_among_existing_groups(**kwargs): print("Starting distribution among existing groups") print("Extracting records") unresolved_records = list(GroupRecord.objects.filter(group__isnull=True)) print("Making dictionary of groups") group_dict = Group.get_dictionary() if len(group_dict) == 0: print("No groups found. Finishing") return records_to_update = [] groups_to_update = set() print("Handling records") ttl = len(unresolved_records) cntr = 0 now = time() for record in unresolved_records: cntr += 1 if cntr % 100 == 0: print("{} of {} records handled {}".format(cntr, ttl, time() - now)) now = time() suitable_group = record.seek_for_group(group_dict, **kwargs) if suitable_group is not None: record.group = suitable_group records_to_update.append(record) groups_to_update.add(suitable_group) print("Have {0} records to update".format(len(records_to_update))) if len(records_to_update) > 0: bulk_update(records_to_update, update_fields=['group']) print("Have {0} groups to update".format(len(groups_to_update))) if len(groups_to_update) > 0: mark_inconsistency(groups=list(groups_to_update)) print("Distribution among existing groups: done")
def update_persons_in_groups(): """Use in case some groups were merged, but person was not appropriately set.""" group_dict = Group.get_dictionary() cntr = 0 ttl = len(group_dict) for group, records in group_dict.items(): cntr += 1 print("{} of {}".format(cntr, ttl)) if group.person is None: unique_persons = set() for r in records: unique_persons.add(r.person) if len(unique_persons) == 1: group.person = unique_persons.pop() group.save()
def drop_from_group(): print("Droping one record from big groups") dct = Group.get_dictionary() records = [] i = 0 ttl = len(dct) for v in dct.values(): i += 1 if i % 100 == 0: print("{} of {}".format(i, ttl)) if len(v) > 2: v[0].group = None records.append(v[0]) print("Saving") bulk_update(records, update_fields=['group'], batch_size=1000) print("Done")
def mark_inconsistency(groups=None, group_dict=None): """Update inconsistency flag of chosen groups""" def check_group_consistency(group_record_list): """Returns True, if all records in list are fully equal""" first = group_record_list[0] for other in group_record_list[1:]: if not first.completely_equal_for_consistency( another_record=other): return False return True print("Starting procedure of inconsistency marking") if group_dict is None: print("Making dictionary of groups") group_dict = Group.get_dictionary() groups_to_update = set() print("Iterating through groups") if groups is None: groups = group_dict.keys() else: for group in groups: if not isinstance(group, Group): raise TypeError("groups must contain Group instances") for group in groups: records = group_dict[group] if check_group_consistency(group_record_list=records): if group.inconsistent: group.inconsistent = False groups_to_update.add(group) else: if not group.inconsistent: group.inconsistent = True groups_to_update.add(group) print("In-memory changes done") print("{} groups will be changed".format(len(groups_to_update))) bulk_update(list(groups_to_update), update_fields=['inconsistent']) print("Inconsistency marking: done")