示例#1
0
def group_daids_for_indexing_by_name(ibs, daid_list, num_indexers=8,
                                     verbose=True):
    """
    returns groups with only one annotation per name in each group
    """
    tup = ibs.group_annots_by_known_names(daid_list)
    aidgroup_list, invalid_aids = tup
    largest_groupsize = max(map(len, aidgroup_list))
    num_bins = min(largest_groupsize, num_indexers)
    if verbose or ut.VERYVERBOSE:
        print('[mindex] num_indexers = %d ' % (num_indexers,))
        print('[mindex] largest_groupsize = %d ' % (largest_groupsize,))
        print('[mindex] num_bins = %d ' % (num_bins,))
    # Group annotations for indexing according to the split criteria
    aids_list, overflow_aids = ut.sample_zip(
        aidgroup_list, num_bins, allow_overflow=True, per_bin=1)
    if __debug__:
        # All groups have the same name
        nidgroup_list = ibs.unflat_map(ibs.get_annot_name_rowids, aidgroup_list)
        for nidgroup in nidgroup_list:
            assert ut.allsame(nidgroup), 'bad name grouping'
    if __debug__:
        # All subsiquent indexer are subsets (in name/identity space)
        # of the previous
        nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list)
        prev_ = None
        for nids in nids_list:
            if prev_ is None:
                prev_ = set(nids)
            else:
                assert prev_.issuperset(nids), 'bad indexer grouping'
    return aids_list, overflow_aids, num_bins
示例#2
0
def group_daids_for_indexing_by_name(ibs, daid_list, num_indexers=8,
                                     verbose=True):
    """
    returns groups with only one annotation per name in each group
    """
    tup = ibs.group_annots_by_known_names(daid_list)
    aidgroup_list, invalid_aids = tup
    largest_groupsize = max(map(len, aidgroup_list))
    num_bins = min(largest_groupsize, num_indexers)
    if verbose or ut.VERYVERBOSE:
        print('[mindex] num_indexers = %d ' % (num_indexers,))
        print('[mindex] largest_groupsize = %d ' % (largest_groupsize,))
        print('[mindex] num_bins = %d ' % (num_bins,))
    # Group annotations for indexing according to the split criteria
    aids_list, overflow_aids = ut.sample_zip(
        aidgroup_list, num_bins, allow_overflow=True, per_bin=1)
    if __debug__:
        # All groups have the same name
        nidgroup_list = ibs.unflat_map(ibs.get_annot_name_rowids, aidgroup_list)
        for nidgroup in nidgroup_list:
            assert ut.allsame(nidgroup), 'bad name grouping'
    if __debug__:
        # All subsiquent indexer are subsets (in name/identity space)
        # of the previous
        nids_list = ibs.unflat_map(ibs.get_annot_name_rowids, aids_list)
        prev_ = None
        for nids in nids_list:
            if prev_ is None:
                prev_ = set(nids)
            else:
                assert prev_.issuperset(nids), 'bad indexer grouping'
    return aids_list, overflow_aids, num_bins
示例#3
0
    def __init__(split_index, ibs, daid_list, num_forests=8):
        print('[nnsindex] make HOTSMultiIndex over %d annots' % (len(daid_list),))
        # Remove unknown names
        aid_list = daid_list
        known_aids_list, unknown_aids = ibsfuncs.group_annots_by_known_names(ibs, aid_list)

        num_bins = min(max(map(len, known_aids_list)), num_forests)

        # Put one name per forest
        forest_aids, overflow_aids = utool.sample_zip(
            known_aids_list, num_bins, allow_overflow=True, per_bin=1)

        forest_indexes = []
        extra_indexes = []
        for tx, aids in enumerate(forest_aids):
            print('[nnsindex] building forest %d/%d with %d aids' %
                  (tx + 1, num_bins, len(aids)))
            if len(aids) > 0:
                hsindex = HOTSIndex(ibs, aids)
                forest_indexes.append(hsindex)

        if len(overflow_aids) > 0:
            print('[nnsindex] building overflow forest')
            overflow_index = HOTSIndex(ibs, overflow_aids)
            extra_indexes.append(overflow_index)
        if len(unknown_aids) > 0:
            print('[nnsindex] building unknown forest')
            unknown_index = HOTSIndex(ibs, unknown_aids)
            extra_indexes.append(unknown_index)
        #print('[nnsindex] building normalizer forest')  # TODO

        split_index.forest_indexes = forest_indexes
        split_index.extra_indexes = extra_indexes
示例#4
0
    def __init__(split_index, ibs, daid_list, num_forests=8):
        print('[nnsindex] make NNSplitIndex over %d annots' % (len(daid_list),))
        aid_list = daid_list
        nid_list = ibs.get_annot_nids(aid_list)
        #flag_list = ibs.get_annot_exemplar_flag(aid_list)
        nid2_aids = utool.group_items(aid_list, nid_list)
        key_list = nid2_aids.keys()
        aids_list = nid2_aids.values()
        isunknown_list = ibs.is_nid_unknown(key_list)

        known_aids  = utool.filterfalse_items(aids_list, isunknown_list)
        uknown_aids = utool.flatten(utool.filter_items(aids_list, isunknown_list))

        num_forests_ = min(max(map(len, aids_list)), num_forests)

        # Put one name per forest
        forest_aids, overflow_aids = utool.sample_zip(known_aids, num_forests_,
                                                      allow_overflow=True,
                                                      per_bin=1)

        forest_indexes = []
        extra_indexes = []
        for tx, aids in enumerate(forest_aids):
            print('[nnsindex] building forest %d/%d with %d aids' % (tx + 1, num_forests_, len(aids)))
            if len(aids) > 0:
                nn_index = NNIndex(ibs, aids)
                forest_indexes.append(nn_index)

        if len(overflow_aids) > 0:
            print('[nnsindex] building overflow forest')
            overflow_index = NNIndex(ibs, overflow_aids)
            extra_indexes.append(overflow_index)
        if len(uknown_aids) > 0:
            print('[nnsindex] building unknown forest')
            unknown_index = NNIndex(ibs, uknown_aids)
            extra_indexes.append(unknown_index)
        #print('[nnsindex] building normalizer forest')  # TODO

        split_index.forest_indexes = forest_indexes
        split_index.extra_indexes = extra_indexes