Пример #1
0
def merge_consistent_groups(group_dict=None):
    print("Starting consistent groups merge")
    if group_dict is None:
        print("Extracting groups")
        group_dict = Group.get_dictionary()
    records_for_update = []
    hypostases_for_update = []
    groups_for_update = []
    persons_to_delete = set()

    print("Iterating")
    for group, records in group_dict.items():
        print(group.id)
        if not group.inconsistent and len(records) > 1:
            changed_records, changed_hypostases, unnecessary_persons = \
                records[0].merge_records_by_hypostases(records[1:], save=False)
            group.person = records[0].person
            groups_for_update.append(group)
            hypostases_for_update.extend(changed_hypostases)
            records_for_update.extend(changed_records)
            persons_to_delete = persons_to_delete.union(unnecessary_persons)
    print("Saving")
    bulk_update(records_for_update, update_fields=['person'])
    bulk_update(hypostases_for_update, update_fields=['person'])
    bulk_update(list(group_dict.keys()), update_fields=['person'])
    for person in persons_to_delete:
        person.delete()
    print("Consistent groups merge: done")
Пример #2
0
def distribute_records_among_existing_groups(**kwargs):
    print("Starting distribution among existing groups")
    print("Extracting records")
    unresolved_records = list(GroupRecord.objects.filter(group__isnull=True))
    print("Making dictionary of groups")
    group_dict = Group.get_dictionary()
    if len(group_dict) == 0:
        print("No groups found. Finishing")
        return
    records_to_update = []
    groups_to_update = set()
    print("Handling records")
    ttl = len(unresolved_records)
    cntr = 0
    now = time()
    for record in unresolved_records:
        cntr += 1
        if cntr % 100 == 0:
            print("{} of {} records handled {}".format(cntr, ttl,
                                                       time() - now))
            now = time()
        suitable_group = record.seek_for_group(group_dict, **kwargs)
        if suitable_group is not None:
            record.group = suitable_group
            records_to_update.append(record)
            groups_to_update.add(suitable_group)
    print("Have {0} records to update".format(len(records_to_update)))
    if len(records_to_update) > 0:
        bulk_update(records_to_update, update_fields=['group'])
    print("Have {0} groups to update".format(len(groups_to_update)))
    if len(groups_to_update) > 0:
        mark_inconsistency(groups=list(groups_to_update))
    print("Distribution among existing groups: done")
Пример #3
0
def update_persons_in_groups():
    """Use in case some groups were merged, but person was not appropriately set."""
    group_dict = Group.get_dictionary()
    cntr = 0
    ttl = len(group_dict)
    for group, records in group_dict.items():
        cntr += 1
        print("{} of {}".format(cntr, ttl))
        if group.person is None:
            unique_persons = set()
            for r in records:
                unique_persons.add(r.person)
            if len(unique_persons) == 1:
                group.person = unique_persons.pop()
                group.save()
Пример #4
0
def drop_from_group():
    print("Droping one record from big groups")
    dct = Group.get_dictionary()
    records = []
    i = 0
    ttl = len(dct)
    for v in dct.values():
        i += 1
        if i % 100 == 0:
            print("{} of {}".format(i, ttl))
        if len(v) > 2:
            v[0].group = None
            records.append(v[0])
    print("Saving")
    bulk_update(records, update_fields=['group'], batch_size=1000)
    print("Done")
Пример #5
0
def mark_inconsistency(groups=None, group_dict=None):
    """Update inconsistency flag of chosen groups"""
    def check_group_consistency(group_record_list):
        """Returns True, if all records in list are fully equal"""
        first = group_record_list[0]
        for other in group_record_list[1:]:
            if not first.completely_equal_for_consistency(
                    another_record=other):
                return False
        return True

    print("Starting procedure of inconsistency marking")
    if group_dict is None:
        print("Making dictionary of groups")
        group_dict = Group.get_dictionary()
    groups_to_update = set()
    print("Iterating through groups")
    if groups is None:
        groups = group_dict.keys()
    else:
        for group in groups:
            if not isinstance(group, Group):
                raise TypeError("groups must contain Group instances")
    for group in groups:
        records = group_dict[group]
        if check_group_consistency(group_record_list=records):
            if group.inconsistent:
                group.inconsistent = False
                groups_to_update.add(group)
        else:
            if not group.inconsistent:
                group.inconsistent = True
                groups_to_update.add(group)
    print("In-memory changes done")
    print("{} groups will be changed".format(len(groups_to_update)))
    bulk_update(list(groups_to_update), update_fields=['inconsistent'])
    print("Inconsistency marking: done")