def _make_anygroup_hashes(annots, nids): """ helper function import wbia qreq_ = wbia.testdata_qreq_( defaultdb='PZ_MTEST', qaid_override=[1, 2, 3, 4, 5, 6, 10, 11], daid_override=[2, 3, 5, 6, 20, 21, 22, 23, 24], ) import wbia qreq_ = wbia.testdata_qreq_(defaultdb='PZ_Master1') %timeit qreq_._make_namegroup_data_hashes() %timeit qreq_._make_namegroup_data_uuids() """ # make sure items are sorted to ensure same assignment # gives same uuids # annots = qreq_.ibs.annots(sorted(qreq_.daids)) unique_nids, groupxs = vt.group_indices(nids) grouped_visual_uuids = ut.apply_grouping(annots.visual_uuids, groupxs) group_hashes = [ ut.combine_hashes(sorted(u.bytes for u in uuids), hasher=hashlib.sha1()) for uuids in grouped_visual_uuids ] nid_to_grouphash = dict(zip(unique_nids, group_hashes)) return nid_to_grouphash
def get_qreq_pcc_hashes(qreq_, aids): """ aids = [1, 2, 3] """ nids = qreq_.get_qreq_annot_nids(aids) b = ut.util_hash.b zero = b('\x00' * 16) # Should we just be combining with a hash that represents the entire # database PCC state? Maybe. # For now, only considers grouping of database names dannot_name_hashes = ut.dict_take(qreq_.dnid_to_grouphash, nids, zero) dannot_visual_uuids = qreq_.get_qreq_annot_visual_uuids(aids) dannot_visual_hashes = (u.bytes for u in dannot_visual_uuids) for vuuid, nuuid in zip(dannot_visual_hashes, dannot_name_hashes): bytes_ = ut.combine_hashes((vuuid, nuuid), hasher=hashlib.sha1()) yield bytes_
def get_qreq_pcc_hashid(qreq_, aids, prefix='', with_nids=False): """ Gets a combined hash of a group of aids. Each aid hash represents itself in the context of the query database. only considers grouping of database names CommandLine: python -m wbia.algo.hots.query_request --test-get_qreq_pcc_hashid:0 Example: >>> # ENABLE_DOCTEST >>> from wbia.algo.hots.query_request import * # NOQA >>> import wbia >>> p = ['default:K=2,nameknn=True'] >>> defaultdb = 'testdb1' >>> # Test that UUIDS change when you change the name lookup >>> new_ = ut.partial(wbia.testdata_qreq_, defaultdb=defaultdb, p=p, >>> verbose=False) >>> # All diff names >>> qreq1 = new_(daid_override=[2, 3, 5, 6], >>> qaid_override=[1, 2, 4], >>> custom_nid_lookup={a: a for a in range(14)}) >>> # All same names >>> qreq2 = new_(daid_override=[2, 3, 5, 6], >>> qaid_override=[1, 2, 4], >>> custom_nid_lookup={a: 1 for a in range(14)}) >>> # Change the PCC, removing a query (data should NOT change) >>> # because the thing being queried against is the same >>> qreq3 = new_(daid_override=[2, 3, 5, 6], >>> qaid_override=[1, 2], >>> custom_nid_lookup={a: 1 for a in range(14)}) >>> # Now remove a database object (query SHOULD change) >>> # because the results are different depending on >>> # nameing of database (maybe they shouldnt change...) >>> qreq4 = new_(daid_override=[2, 3, 6], >>> qaid_override=[1, 2, 4], >>> custom_nid_lookup={a: 1 for a in range(14)}) >>> print(qreq1.get_cfgstr(with_input=True, with_pipe=False)) >>> print(qreq2.get_cfgstr(with_input=True, with_pipe=False)) >>> print(qreq3.get_cfgstr(with_input=True, with_pipe=False)) >>> print(qreq4.get_cfgstr(with_input=True, with_pipe=False)) >>> assert qreq3.get_data_hashid() == qreq2.get_data_hashid() >>> assert qreq1.get_data_hashid() != qreq2.get_data_hashid() """ # TODO: pcc based hashing should only be used if name dependant # attributes are used in the pipeline. label = ''.join(('_', prefix, 'PCC_UUIDS')) pcc_hashes = qreq_.get_qreq_pcc_hashes(sorted(aids)) pcc_hash = ut.combine_hashes(pcc_hashes, hasher=hashlib.sha1()) pcc_hashstr = ut.convert_bytes_to_bigbase(pcc_hash) pcc_hashstr = pcc_hashstr[0:16] sep = '-' n_aids = 'a' + str(len(aids)) if with_nids: unique_nids = set(qreq_.get_qreq_annot_nids(aids)) n_nids = 'n' + str(len(unique_nids)) pcc_hashid = sep.join([label, n_aids, n_nids, pcc_hashstr]) else: pcc_hashid = sep.join([label, n_aids, pcc_hashstr]) return pcc_hashid