Exemplo n.º 1
0
def select_row_from_id(view, _id, scroll=False, collapse=True):
    """
    _id is from the iders function (i.e. an wbia rowid)
    selects the row in that view if it exists
    """
    with ut.Timer(
            '[api_item_view] select_row_from_id(id=%r, scroll=%r, collapse=%r)'
            % (_id, scroll, collapse)):
        qtindex, row = view.get_row_and_qtindex_from_id(_id)
        if row is not None:
            if isinstance(view, QtWidgets.QTreeView):
                if collapse:
                    view.collapseAll()
                select_model = view.selectionModel()
                select_flag = QtCore.QItemSelectionModel.ClearAndSelect
                # select_flag = QtCore.QItemSelectionModel.Select
                # select_flag = QtCore.QItemSelectionModel.NoUpdate
                with ut.Timer('[api_item_view] selecting name. qtindex=%r' %
                              (qtindex, )):
                    select_model.select(qtindex, select_flag)
                with ut.Timer('[api_item_view] expanding'):
                    view.setExpanded(qtindex, True)
            else:
                # For Table Views
                view.selectRow(row)
            # Scroll to selection
            if scroll:
                with ut.Timer('scrolling'):
                    view.scrollTo(qtindex)
            return row
    return None
Exemplo n.º 2
0
def load_oxford_wbia():
    import wbia

    ibs = wbia.opendb('Oxford')
    dim_size = None
    _dannots = ibs.annots(ibs.filter_annots_general(has_none='query'),
                          config=dict(dim_size=dim_size))
    _qannots = ibs.annots(ibs.filter_annots_general(has_any='query'),
                          config=dict(dim_size=dim_size))

    with ut.Timer('reading info'):
        vecs_list = _dannots.vecs
        kpts_list = _dannots.kpts
        nfeats_list = np.array(_dannots.num_feats)

    with ut.Timer('stacking info'):
        all_vecs = np.vstack(vecs_list)
        all_kpts = np.vstack(kpts_list)
        offset_list = np.hstack(([0], nfeats_list.cumsum())).astype(np.int64)
        # data_annots = reorder_annots(_dannots, data_uri_order)

    data_uri_order = get_annots_imgid(_dannots)
    query_uri_order = get_annots_imgid(_qannots)
    data = {
        'offset_list': offset_list,
        'all_kpts': all_kpts,
        'all_vecs': all_vecs,
        'data_uri_order': data_uri_order,
        'query_uri_order': query_uri_order,
    }
    return data
def set_part_verts(ibs,
                   part_rowid_list,
                   verts_list,
                   delete_thumbs=True,
                   notify_root=True):
    r"""
    Sets the vertices [(x, y), ...] of a list of part_rowid_list

    RESTful:
        Method: PUT
        URL:    /api/part/vert/
    """
    from vtool import geometry

    nInput = len(part_rowid_list)
    # Compute data to set
    if isinstance(verts_list, np.ndarray):
        verts_list = verts_list.tolist()
    for index, vert_list in enumerate(verts_list):
        if isinstance(vert_list, np.ndarray):
            verts_list[index] = vert_list.tolist()
    num_verts_list = list(map(len, verts_list))
    verts_as_strings = list(map(six.text_type, verts_list))
    id_iter1 = ((part_rowid, ) for part_rowid in part_rowid_list)
    # also need to set the internal number of vertices
    val_iter1 = ((num_verts, verts)
                 for (num_verts,
                      verts) in zip(num_verts_list, verts_as_strings))
    colnames = (
        PART_NUM_VERTS,
        PART_VERTS,
    )
    # SET VERTS in PART_TABLE
    ibs.db.set(const.PART_TABLE, colnames, val_iter1, id_iter1, nInput=nInput)
    # changing the vertices also changes the bounding boxes
    bbox_list = geometry.bboxes_from_vert_list(verts_list)  # new bboxes
    xtl_list, ytl_list, width_list, height_list = list(zip(*bbox_list))
    val_iter2 = zip(xtl_list, ytl_list, width_list, height_list)
    id_iter2 = ((part_rowid, ) for part_rowid in part_rowid_list)
    colnames = (
        'part_xtl',
        'part_ytl',
        'part_width',
        'part_height',
    )
    # SET BBOX in PART_TABLE
    ibs.db.set(const.PART_TABLE, colnames, val_iter2, id_iter2, nInput=nInput)

    with ut.Timer('set_annot_verts...thumbs'):
        if delete_thumbs:
            ibs.delete_part_chips(part_rowid_list)  # INVALIDATE THUMBNAILS

    with ut.Timer('set_annot_verts...roots'):
        if notify_root:
            ibs.depc_part.notify_root_changed(part_rowid_list,
                                              'verts',
                                              force_delete=True)
def test_simple_parallel():
    r"""
    CommandLine:
        python -m pyhesaff.tests.test_pyhesaff_simple_parallel --test-test_simple_parallel --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from pyhesaff.tests.test_pyhesaff_simple_parallel import *  # NOQA
        >>> import matplotlib as mpl
        >>> from matplotlib import pyplot as plt
        >>> img_fpaths, kpts_array, desc_array = test_simple_parallel()
        >>> ut.quit_if_noshow()
        >>> # Do not plot by default
        >>> fig = plt.figure()
        >>> for count, (img_fpath, kpts, desc) in enumerate(zip(img_fpaths, kpts_array,
        >>>                                                     desc_array)):
        >>>     if count > 3:
        >>>         break
        >>>     ax = fig.add_subplot(2, 2, count + 1)
        >>>     img = mpl.image.imread(img_fpath)
        >>>     plt.imshow(img)
        >>>     _xs, _ys = kpts.T[0:2]
        >>>     ax.plot(_xs, _ys, 'ro', alpha=.5)
        >>> ut.show_if_requested()
    """
    import pyhesaff
    test_fnames = ['carl.jpg', 'lena.png', 'zebra.png', 'ada.jpg', 'star.png']
    img_fpaths = list(map(ut.grab_test_imgpath, test_fnames)) * 2

    # Time parallel computation
    with ut.Timer('Timing Parallel'):
        kpts_array, desc_array = pyhesaff.detect_feats_list(img_fpaths)

    # Time serial computation
    kpts_list2 = []
    desc_list2 = []
    with ut.Timer('Timing Iterative'):
        for img_fpath in img_fpaths:
            kpts_, desc_ = pyhesaff.detect_feats(img_fpath)
            kpts_list2.append(kpts_)
            desc_list2.append(desc_)

    print('Checking for errors')
    for (kpts_, desc_, kpts, desc) in zip(kpts_list2, desc_list2, kpts_array,
                                          desc_array):
        print('shape(kpts, kpts_, desc, desc_) = %9r, %9r, %11r, %11r' %
              (kpts.shape, kpts_.shape, desc.shape, desc_.shape))
        try:
            assert np.all(kpts_ == kpts), 'parallel computation inconsistent'
            assert np.all(desc_ == desc), 'parallel computation inconsistent'
            assert len(kpts_) > 0, 'no kpts detected'
            #assert False, 'deliberate triggering to see printouts'
        except Exception as ex:
            ut.printex(ex)
            raise
    print('Keypoints seem consistent')
    return img_fpaths, kpts_array, desc_array
Exemplo n.º 5
0
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws,
                           smk_alpha, smk_thresh, verbose=False):
    """
    """
    if ut.DEBUG2:
        from ibeis.algo.hots.smk import smk_debug
        smk_debug.rrr()
        smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids)

    with ut.Timer('timer_orig1'):
        wx_sublist = np.array(wx2_drvecs.keys())
        if not ut.QUIET:
            print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight')
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh))
        # Get list of aids and rvecs w.r.t. words
        aids_list   = [wx2_aids[wx] for wx in wx_sublist]
        rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist]
        maws_list   = [wx2_dmaws[wx] for wx in wx_sublist]
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.assert_single_assigned_maws(maws_list)
        # Group by daids first and then by word index
        daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1)

        # For every daid, compute its sccw using pregrouped rvecs
        # Summation over words for each aid
        if ut.VERBOSE or verbose:
            print('[smk_index.sccw] SCCW Sum (over daid): ')
        # Get lists w.r.t daids
        aid_list = list(daid2_wx2_drvecs.keys())
        # list of mappings from words to rvecs foreach daid
        # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,]
        _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values())
        _aidwxs_iter   = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list)
        aidrvecs_list  = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list]
        aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter]

    with ut.Timer('timer_orig2'):
        if ut.DEBUG2:
            from ibeis.algo.hots.smk import smk_debug
            smk_debug.check_data_smksumm(aididf_list, aidrvecs_list)
        # TODO: implement database side soft-assign
        sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh)
                     for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)]

        daid2_sccw = dict(zip(aid_list, sccw_list))
    if ut.VERBOSE or verbose:
        print('[smk_index.sccw] L___ End Compute Data SCCW\n')
    return daid2_sccw
Exemplo n.º 6
0
def TIME_GEN_PREPROC_IMG(ibs):
    from ibeis.algo.preproc.preproc_image import add_images_params_gen
    print('[TIME_GEN_PREPROC_IMG]')
    gid_list = ibs.get_valid_gids()
    gpath_list = ibs.get_image_paths(gid_list)

    # STABILITY

    if not utool.get_argflag('--nostable'):
        # TEST 1
        with utool.Timer('parallel chunksize=1'):
            output1 = list(add_images_params_gen(gpath_list, chunksize=1))
        print(utool.truncate_str(str(output1), 80))
        assert len(output1) == len(gpath_list), 'chuncksize changes output'

        # TEST 2
        with utool.Timer('parallel chunksize=2'):
            output2 = list(add_images_params_gen(gpath_list, chunksize=2))
        print(utool.truncate_str(str(output2), 80))
        assert output1 == output2, 'chuncksize changes output'

        # TEST N
        with utool.Timer('parallel chunksize=None'):
            outputN = list(add_images_params_gen(gpath_list, chunksize=None))
        print(utool.truncate_str(str(output2), 80))
        assert outputN == output2, 'chuncksize changes output'

    # BENCHMARK

    setup = utool.unindent('''
        from ibeis.algo.preproc.preproc_image import add_images_params_gen
        genkw = dict(prog=False, verbose=True)
        gpath_list = %r
        ''' % (gpath_list, ))

    print(utool.truncate_str(str(gpath_list), 80))
    print('Processing %d images' % (len(gpath_list), ))
    timeit3 = partial(timeit2, setup=setup, number=3)
    timeit3('list(add_images_params_gen(gpath_list, chunksize=None, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=None, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=1, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=2, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=4, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=8, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=16, **genkw))')
    timeit3('list(add_images_params_gen(gpath_list, chunksize=32, **genkw))')

    print('[/TIME_GEN_PREPROC_IMG]')
    return locals()
 def reindex_step(count, count_list, time_list_reindex):
     daids = all_randomize_daids_[0:count]
     vecs = np.vstack(ibs.get_annot_vecs(daids))
     with ut.Timer(verbose=False) as t:
         flann = make_flann_index(vecs, flann_params)  # NOQA
     count_list.append(count)
     time_list_reindex.append(t.ellapsed)
Exemplo n.º 8
0
def subindexer_time_experiment():
    """
    builds plot of number of annotations vs indexer build time.

    TODO: time experiment
    """
    import ibeis
    import utool as ut
    import pyflann
    import plottool as pt
    ibs = ibeis.opendb(db='PZ_Master0')
    daid_list = ibs.get_valid_aids()
    count_list = []
    time_list = []
    flann_params = ibs.cfg.query_cfg.flann_cfg.get_flann_params()
    for count in ut.ProgressIter(range(1, 301)):
        daids_ = daid_list[:]
        np.random.shuffle(daids_)
        daids = daids_[0:count]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        with ut.Timer(verbose=False) as t:
            flann = pyflann.FLANN()
            flann.build_index(vecs, **flann_params)
        count_list.append(count)
        time_list.append(t.ellapsed)
    count_arr = np.array(count_list)
    time_arr = np.array(time_list)
    pt.plot2(count_arr, time_arr, marker='-', equal_aspect=False,
             x_label='num_annotations', y_label='FLANN build time')
Exemplo n.º 9
0
 def get_buildtime_data(**kwargs):
     flann_params = vt.get_flann_params(**kwargs)
     print('flann_params = %r' % (ut.dict_str(flann_params), ))
     data_list = []
     num = 1000
     print('-----')
     for count in ut.ProgressIter(itertools.count(),
                                  nTotal=-1,
                                  freq=1,
                                  autoadjust=False):
         num = int(num * 1.2)
         print('num = %r' % (num, ))
         #if num > 1E6:
         #    break
         data = pool.get_testdata(num)
         print('object size ' + ut.get_object_size_str(data, 'data'))
         flann = pyflann.FLANN(**flann_params)
         with ut.Timer(verbose=False) as t:
             flann.build_index(data)
         print('t.ellapsed = %r' % (t.ellapsed, ))
         if t.ellapsed > 5 or count > 1000:
             break
         data_list.append((count, num, t.ellapsed))
         print('-----')
     return data_list, flann_params
 def addition_step(count, flann, count_list2, time_list_addition):
     daids = all_randomize_daids_[count:count + 1]
     vecs = np.vstack(ibs.get_annot_vecs(daids))
     with ut.Timer(verbose=False) as t:
         flann.add_points(vecs)
     count_list2.append(count)
     time_list_addition.append(t.ellapsed)
Exemplo n.º 11
0
def gridsearch_coverage_grid():
    """
    CommandLine:
        python -m vtool.coverage_grid --test-gridsearch_coverage_grid --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.coverage_grid import *  # NOQA
        >>> import plottool as pt
        >>> gridsearch_coverage_grid()
        >>> pt.show_if_requested()
    """
    import plottool as pt
    fname = None  # 'easy1.png'
    kpts, chipsize, weights = coverage_kpts.testdata_coverage(fname)
    if len(kpts) > 100:
        kpts = kpts[::100]
        weights = weights[::100]
    cfgdict_list, cfglbl_list = get_coverage_grid_gridsearch_configs()
    coverage_gridtup_list = [
        sparse_grid_coverage(kpts, chipsize, weights, **cfgdict)
        for cfgdict in ut.ProgressIter(cfgdict_list, lbl='coverage grid')
    ]

    fnum = 1
    with ut.Timer('plotting gridsearch'):
        ut.interact_gridsearch_result_images(
            show_coverage_grid, cfgdict_list, cfglbl_list,
            coverage_gridtup_list, fnum=fnum, figtitle='coverage grid', unpack=True,
            max_plots=25)

    pt.iup()
Exemplo n.º 12
0
def test_pyflann_io():
    """
    CommandLine:
        python -m vtool.tests.test_pyflann --test-test_pyflann_io

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.tests.test_pyflann import *  # NOQA
        >>> result = test_pyflann_io()
        >>> print(result)
    """
    # Create qpts and database data
    print('Create random qpts and database data')
    num_neighbors = 3
    nPts = 1009
    nQPts = 31
    qpts = testdata_points(nPts=nQPts)
    pts = testdata_points(nPts=nPts)

    # Create flann object
    print('Create flann object')
    flann = pyflann.FLANN()

    # Build kd-tree index over the data
    print('Build the kd tree')
    with utool.Timer('Buliding the kd-tree with %d pts' % (len(pts), )):
        _build_params = flann.build_index(pts)  # noqa

    # Find the closest few points to num_neighbors
    print('Find nn_index nearest neighbors')
    indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors)

    # Save the data to disk
    print('Save the data to the disk')
    np.savez('test_pyflann_ptsdata.npz', pts)
    npload_pts = np.load('test_pyflann_ptsdata.npz')
    pts2 = npload_pts['arr_0']

    print('Save and delete the FLANN index')
    flann.save_index('test_pyflann_index.flann')
    flann.delete_index()

    print('Reload the data')
    flann2 = pyflann.FLANN()
    flann2.load_index('test_pyflann_index.flann', pts2)
    indices2, dists2 = flann2.nn_index(qpts, num_neighbors=num_neighbors)
    #print(utool.hz_str('indices2, dists2 = ', indices2,  dists2))

    print('Find the same nearest neighbors?')

    if np.all(indices1 == indices2) and np.all(dists1 == dists2):
        print('...data is the same! SUCCESS!')
    else:
        raise AssertionError('...data is the different! FAILURE!')
Exemplo n.º 13
0
def _test_build_internal_structure(_module, lang):
    import utool as ut

    # Test data
    N = 6

    # N = 2000

    def ider_level0():
        return range(N)

    def ider_level1(input_):
        def _single(x):
            return [y for y in range(x**2, x**2 + max(0, ((N // 1) - x - 1)))]

        if isinstance(input_, list):
            return [_single(x) for x in input_]
        else:
            x = input_
            return _single(x)

    # Build Structure
    ider_list = [ider_level0, ider_level1]
    num_levels = len(ider_list)
    # TEST RECURSIVE
    print('================')
    with ut.Timer(lang + ' recursive:'):
        if num_levels == 0:
            root_id_list = []
        else:
            root_id_list = ider_list[0]()
        root_node1 = _module.TreeNode(-1, None, -1)
        level = 0
        _module._populate_tree_recursive(root_node1, root_id_list, num_levels,
                                         ider_list, level)
    if N < 10:
        print('')
        print(api_tree_node.tree_node_string(root_node1, indent=' *  '))
    print('================')
    # with ut.Timer(lang + ' iterative:'):
    #    # TEST ITERATIVE
    #    # TODO: Vet this code a bit more.
    #    root_node2 = _module.TreeNode(-1, None, -1)
    #    _module._populate_tree_iterative(
    #        root_node2, num_levels, ider_list)
    # if N < 10:
    #    print('')
    #    print(api_tree_node.tree_node_string(root_node2, indent=' *  '))
    print('================')
    print('finished %s test' % lang)
Exemplo n.º 14
0
 def run_parallel_task(num_procs=None):
     print('run_parallel_task. num_procs=%r' % None)
     if num_procs is not None:
         util_parallel.close_pool()
         util_parallel.init_pool(num_procs)
     else:
         num_procs = util_parallel.get_default_numprocs()
     msg = 'processing tasks in %s' % ('serial' if num_procs == 1 else
                                       str(num_procs) + '-parallel')
     with utool.Timer(msg):
         result_list = util_parallel.process(pyhesaff.detect_feats,
                                             args_list, hesaff_kwargs)
     print_test_results(result_list)
     return result_list
Exemplo n.º 15
0
def TIME_QUERY(ibs):
    print('[TIME_QUERY]')
    #valid_aids = ibs.get_valid_aids()  # [0:20]
    valid_aids = ibs.get_valid_aids()[0:10]  # [0:20]
    qaid_list = valid_aids
    daid_list = valid_aids

    # Query without using the query cache
    querykw = {
        'use_bigcache': False,
        'use_cache': False,
    }
    with utool.Timer('timing all vs all query'):
        qres_list = ibs.query_chips(qaid_list, daid_list, **querykw)

    print('[/TIME_QUERY]')
    return locals()
Exemplo n.º 16
0
def TEST_PARALLEL():
    gpath_list = grabdata.get_test_gpaths(ndata=10,
                                          names=['zebra', 'lena', 'jeff'])
    args_list = [(gpath, ) for gpath in gpath_list]

    @utool.argv_flag_dec
    def print_test_results(result_list):
        for kpts, desc in result_list:
            print('[test] kpts.shape=(%4d, %d), desc.sum=%8d' %
                  (kpts.shape[0], kpts.shape[1], desc.sum()))

    hesaff_kwargs = {'scale_min': -1, 'scale_max': -1, 'nogravity_hack': False}

    with utool.Timer('c++ parallel'):
        kpts_list, desc_list = pyhesaff.detect_feats_list(
            gpath_list, **hesaff_kwargs)

    # Run parallel tasks
    @utool.indent_func('[test_task]')
    def run_parallel_task(num_procs=None):
        print('run_parallel_task. num_procs=%r' % None)
        if num_procs is not None:
            util_parallel.close_pool()
            util_parallel.init_pool(num_procs)
        else:
            num_procs = util_parallel.get_default_numprocs()
        msg = 'processing tasks in %s' % ('serial' if num_procs == 1 else
                                          str(num_procs) + '-parallel')
        with utool.Timer(msg):
            result_list = util_parallel.process(pyhesaff.detect_feats,
                                                args_list, hesaff_kwargs)
        print_test_results(result_list)
        return result_list

    run_parallel_task()

    # Compare to serial if needed
    @utool.argv_flag_dec
    def compare_serial():
        print('compare_serial')
        run_parallel_task(1)

    compare_serial()
    return locals()
Exemplo n.º 17
0
def test_kmeans_plus_plus_speed(n_clusters=2000,
                                n_features=128,
                                per_cluster=10,
                                asint=False,
                                fix=True):
    """
    from speedup_kmeans import *
    from sklearn.cluster.k_means_ import *
    """
    rng = np.random.RandomState(42)
    # Make random cluster centers on a ball
    centers = rng.rand(n_clusters, n_features)
    centers /= np.linalg.norm(centers, axis=0)[None, :]
    centers = (centers * 512).astype(np.uint8) / 512
    centers /= np.linalg.norm(centers, axis=0)[None, :]

    n_samples = int(n_clusters * per_cluster)
    n_clusters, n_features = centers.shape
    X, true_labels = make_blobs(n_samples=n_samples,
                                centers=centers,
                                cluster_std=1.,
                                random_state=42)

    if asint:
        X = (X * 512).astype(np.int32)

    x_squared_norms = row_norms(X, squared=True)

    if fix:
        _k_init = sklearn.cluster.k_means_._k_init
    else:
        _k_init = sklearn_master.cluster.k_means_._k_init
    random_state = np.random.RandomState(42)
    n_local_trials = None  # NOQA

    with ut.Timer('testing kmeans init') as t:
        centers = _k_init(X,
                          n_clusters,
                          random_state=random_state,
                          x_squared_norms=x_squared_norms)
    return centers, t.ellapsed
Exemplo n.º 18
0
 def grabcut_from_probchip(chip_img, label_mask):
     rect = (0, 0, w, h)
     bgd_model = np.zeros((1, 13 * 5), np.float64)
     fgd_model = np.zeros((1, 13 * 5), np.float64)
     num_iters = 5
     mode = cv2.GC_INIT_WITH_MASK
     # label_mask is an outvar
     label_mask_ = label_mask.copy()
     print(label_values)
     print(np.unique(label_mask_))
     with ut.Timer('grabcut'):
         cv2.grabCut(chip_img,
                     label_mask_,
                     rect,
                     bgd_model,
                     fgd_model,
                     num_iters,
                     mode=mode)
     #is_foreground = (label_mask == cv2.GC_FGD) + (label_mask == cv2.GC_PR_FGD)
     #is_foreground = (label_mask_ == cv2.GC_FGD)  # + (label_mask == cv2.GC_PR_FGD)
     return label_mask_
Exemplo n.º 19
0
def segment(img_fpath, bbox_, new_size=None):
    """ Runs grabcut """
    printDBG('[segm] segment(img_fpath=%r, bbox=%r)>' % (img_fpath, bbox_))
    num_iters = 5
    bgd_model = np.zeros((1, 13 * 5), np.float64)
    fgd_model = np.zeros((1, 13 * 5), np.float64)
    mode = cv2.GC_INIT_WITH_MASK
    # Initialize
    # !!! CV2 READS (H,W) !!!
    #  WH Unsafe
    img_resz, bbox_resz = resize_img_and_bbox(img_fpath, bbox_, new_size=new_size)
    # WH Unsafe
    (img_h, img_w) = img_resz.shape[:2]                       # Image Shape
    printDBG(' * img_resz.shape=%r' % ((img_h, img_w),))
    # WH Safe
    tlbr = ut.xywh_to_tlbr(bbox_resz, (img_w, img_h))  # Rectangle ANNOTATION
    (x1, y1, x2, y2) = tlbr
    rect = tuple(bbox_resz)                               # Initialize: rect
    printDBG(' * rect=%r' % (rect,))
    printDBG(' * tlbr=%r' % (tlbr,))
    # WH Unsafe
    _mask = np.zeros((img_h, img_w), dtype=np.uint8)  # Initialize: mask
    _mask[y1:y2, x1:x2] = cv2.GC_PR_FGD             # Set ANNOTATION to cv2.GC_PR_FGD
    # Grab Cut
    tt = ut.Timer(' * cv2.grabCut()', verbose=DEBUG_SEGM)
    cv2.grabCut(img_resz, _mask, rect, bgd_model, fgd_model, num_iters, mode=mode)
    tt.toc()
    img_mask = np.where((_mask == cv2.GC_FGD) + (_mask == cv2.GC_PR_FGD), 255, 0).astype('uint8')
    # Crop
    chip      = img_resz[y1:y2, x1:x2]
    chip_mask = img_mask[y1:y2, x1:x2]
    chip_mask = clean_mask(chip_mask)
    chip_mask = np.array(chip_mask, np.float) / 255.0
    # Mask the value of HSV
    chip_hsv = cv2.cvtColor(chip, cv2.COLOR_RGB2HSV)
    chip_hsv = np.array(chip_hsv, dtype=np.float) / 255.0
    chip_hsv[:, :, 2] *= chip_mask
    chip_hsv = np.array(np.round(chip_hsv * 255.0), dtype=np.uint8)
    seg_chip = cv2.cvtColor(chip_hsv, cv2.COLOR_HSV2RGB)
    return seg_chip, img_mask
Exemplo n.º 20
0
 def wait_for_job_result(jobiface, jobid, timeout=10, freq=.1):
     t = ut.Timer(verbose=False)
     t.tic()
     while True:
         reply = jobiface.get_job_status(jobid)
         if reply['jobstatus'] == 'completed':
             return
         elif reply['jobstatus'] == 'exception':
             result = jobiface.get_unpacked_result(jobid)
             #raise Exception(result)
             print('Exception occured in engine')
             return result
         elif reply['jobstatus'] == 'working':
             pass
         elif reply['jobstatus'] == 'unknown':
             pass
         else:
             raise Exception('Unknown jobstatus=%r' %
                             (reply['jobstatus'], ))
         time.sleep(freq)
         if timeout is not None and t.toc() > timeout:
             raise Exception('Timeout')
Exemplo n.º 21
0
    def compute_word_weights(inva, method='idf'):
        """
        Compute a per-word weight like idf

        Example:
            >>> # xdoctest: +REQUIRES(--slow)
            >>> # ENABLE_DOCTEST
            >>> from wbia.algo.smk.inverted_index import *  # NOQA
            >>> qreq_, inva = testdata_inva()
            >>> wx_to_weight = inva.compute_word_weights()
            >>> print('wx_to_weight = %r' % (wx_to_weight,))
        """
        wx_list = sorted(inva.wx_to_aids.keys())
        with ut.Timer('Computing %s weights' % (method, )):
            if method == 'idf':
                ndocs_total = len(inva.aids)
                # Unweighted documents
                ndocs_per_word = np.array(
                    [len(set(inva.wx_to_aids[wx])) for wx in wx_list])
                weight_per_word = smk_funcs.inv_doc_freq(
                    ndocs_total, ndocs_per_word)
            elif method == 'idf-maw':
                # idf denom (the num of docs containing a word for each word)
                # The max(maws) denote the prob that this word indexes an annot
                ndocs_total = len(inva.aids)
                # Weighted documents
                wx_to_ndocs = {wx: 0.0 for wx in wx_list}
                for wx, maws in zip(ut.iflatten(inva.wx_lists),
                                    ut.iflatten(inva.maws_lists)):
                    wx_to_ndocs[wx] += min(1.0, max(maws))
                ndocs_per_word = ut.take(wx_to_ndocs, wx_list)
                weight_per_word = smk_funcs.inv_doc_freq(
                    ndocs_total, ndocs_per_word)
            elif method == 'uniform':
                weight_per_word = np.ones(len(wx_list))
            wx_to_weight = dict(zip(wx_list, weight_per_word))
            wx_to_weight = ut.DefaultValueDict(0, wx_to_weight)
        return wx_to_weight
Exemplo n.º 22
0
 def compute_inverted_list(inva):
     with ut.Timer('Building inverted list'):
         wx_to_aids = smk_funcs.invert_lists(inva.aids, inva.wx_lists)
         return wx_to_aids
Exemplo n.º 23
0
def run_asmk_script():
    with ut.embed_on_exception_context:  # NOQA
        """
    >>> from wbia.algo.smk.script_smk import *
    """

  # NOQA

        # ==============================================
        # PREPROCESSING CONFIGURATION
        # ==============================================
        config = {
            # 'data_year': 2013,
            'data_year': None,
            'dtype': 'float32',
            # 'root_sift': True,
            'root_sift': False,
            # 'centering': True,
            'centering': False,
            'num_words': 2**16,
            # 'num_words': 1E6
            # 'num_words': 8000,
            'kmeans_impl': 'sklearn.mini',
            'extern_words': False,
            'extern_assign': False,
            'assign_algo': 'kdtree',
            'checks': 1024,
            'int_rvec': True,
            'only_xy': False,
        }
        # Define which params are relevant for which operations
        relevance = {}
        relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year']
        relevance['words'] = relevance['feats'] + [
            'num_words',
            'extern_words',
            'kmeans_impl',
        ]
        relevance['assign'] = relevance['words'] + [
            'checks',
            'extern_assign',
            'assign_algo',
        ]
        # relevance['ydata'] = relevance['assign'] + ['int_rvec']
        # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec']

        nAssign = 1

        class SMKCacher(ut.Cacher):
            def __init__(self, fname, ext='.cPkl'):
                relevant_params = relevance[fname]
                relevant_cfg = ut.dict_subset(config, relevant_params)
                cfgstr = ut.get_cfg_lbl(relevant_cfg)
                dbdir = ut.truepath('/raid/work/Oxford/')
                super(SMKCacher, self).__init__(fname,
                                                cfgstr,
                                                cache_dir=dbdir,
                                                ext=ext)

        # ==============================================
        # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES
        # ==============================================
        if config['data_year'] == 2007:
            data = load_oxford_2007()
        elif config['data_year'] == 2013:
            data = load_oxford_2013()
        elif config['data_year'] is None:
            data = load_oxford_wbia()

        offset_list = data['offset_list']
        all_kpts = data['all_kpts']
        raw_vecs = data['all_vecs']
        query_uri_order = data['query_uri_order']
        data_uri_order = data['data_uri_order']
        # del data

        # ================
        # PRE-PROCESS
        # ================
        import vtool as vt

        # Alias names to avoid errors in interactive sessions
        proc_vecs = raw_vecs
        del raw_vecs

        feats_cacher = SMKCacher('feats', ext='.npy')
        all_vecs = feats_cacher.tryload()
        if all_vecs is None:
            if config['dtype'] == 'float32':
                logger.info('Converting vecs to float32')
                proc_vecs = proc_vecs.astype(np.float32)
            else:
                proc_vecs = proc_vecs
                raise NotImplementedError('other dtype')

            if config['root_sift']:
                with ut.Timer('Apply root sift'):
                    np.sqrt(proc_vecs, out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['centering']:
                with ut.Timer('Apply centering'):
                    mean_vec = np.mean(proc_vecs, axis=0)
                    # Center and then re-normalize
                    np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['dtype'] == 'int8':
                smk_funcs

            all_vecs = proc_vecs
            feats_cacher.save(all_vecs)
        del proc_vecs

        # =====================================
        # BUILD VISUAL VOCABULARY
        # =====================================
        if config['extern_words']:
            words = data['words']
            assert config['num_words'] is None or len(
                words) == config['num_words']
        else:
            word_cacher = SMKCacher('words')
            words = word_cacher.tryload()
            if words is None:
                with ut.embed_on_exception_context:
                    if config['kmeans_impl'] == 'sklearn.mini':
                        import sklearn.cluster

                        rng = np.random.RandomState(13421421)
                        # init_size = int(config['num_words'] * 8)
                        init_size = int(config['num_words'] * 4)
                        # converged after 26043 iterations
                        clusterer = sklearn.cluster.MiniBatchKMeans(
                            config['num_words'],
                            init_size=init_size,
                            batch_size=1000,
                            compute_labels=False,
                            max_iter=20,
                            random_state=rng,
                            n_init=1,
                            verbose=1,
                        )
                        clusterer.fit(all_vecs)
                        words = clusterer.cluster_centers_
                    elif config['kmeans_impl'] == 'yael':
                        from yael import ynumpy

                        centroids, qerr, dis, assign, nassign = ynumpy.kmeans(
                            all_vecs,
                            config['num_words'],
                            init='kmeans++',
                            verbose=True,
                            output='all',
                        )
                        words = centroids
                    word_cacher.save(words)

        # =====================================
        # ASSIGN EACH VECTOR TO ITS NEAREST WORD
        # =====================================
        if config['extern_assign']:
            assert config[
                'extern_words'], 'need extern cluster to extern assign'
            idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2)
            idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32)
            idx_to_wxs = np.ma.array(idx_to_wxs)
            idx_to_maws = np.ma.array(idx_to_maws)
        else:
            from wbia.algo.smk import vocab_indexer

            vocab = vocab_indexer.VisualVocab(words)
            dassign_cacher = SMKCacher('assign')
            assign_tup = dassign_cacher.tryload()
            if assign_tup is None:
                vocab.flann_params['algorithm'] = config['assign_algo']
                vocab.build()
                # Takes 12 minutes to assign jegous vecs to 2**16 vocab
                with ut.Timer('assign vocab neighbors'):
                    _idx_to_wx, _idx_to_wdist = vocab.nn_index(
                        all_vecs, nAssign, checks=config['checks'])
                    if nAssign > 1:
                        idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns(
                            _idx_to_wx,
                            _idx_to_wdist,
                            massign_alpha=1.2,
                            massign_sigma=80.0,
                            massign_equal_weights=True,
                        )
                    else:
                        idx_to_wxs = np.ma.masked_array(_idx_to_wx,
                                                        fill_value=-1)
                        idx_to_maws = np.ma.ones(idx_to_wxs.shape,
                                                 fill_value=-1,
                                                 dtype=np.float32)
                        idx_to_maws.mask = idx_to_wxs.mask
                assign_tup = (idx_to_wxs, idx_to_maws)
                dassign_cacher.save(assign_tup)

        idx_to_wxs, idx_to_maws = assign_tup

        # Breakup vectors, keypoints, and word assignments by annotation
        wx_lists = [
            idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        maw_lists = [
            idx_to_maws[left:right] for left, right in ut.itertwo(offset_list)
        ]
        vecs_list = [
            all_vecs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        kpts_list = [
            all_kpts[left:right] for left, right in ut.itertwo(offset_list)
        ]

        # =======================
        # FIND QUERY SUBREGIONS
        # =======================

        ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots(
            data_uri_order, query_uri_order)
        daids = data_annots.aids
        qaids = query_annots.aids

        query_super_kpts = ut.take(kpts_list, qx_to_dx)
        query_super_vecs = ut.take(vecs_list, qx_to_dx)
        query_super_wxs = ut.take(wx_lists, qx_to_dx)
        query_super_maws = ut.take(maw_lists, qx_to_dx)
        # Mark which keypoints are within the bbox of the query
        query_flags_list = []
        only_xy = config['only_xy']
        for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes):
            flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy)
            query_flags_list.append(flags)

        logger.info('Queries are crops of existing database images.')
        logger.info('Looking at average percents')
        percent_list = [
            flags_.sum() / flags_.shape[0] for flags_ in query_flags_list
        ]
        percent_stats = ut.get_stats(percent_list)
        logger.info('percent_stats = %s' % (ut.repr4(percent_stats), ))

        import vtool as vt

        query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0)
        query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0)
        query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0)
        query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0)

        # =======================
        # CONSTRUCT QUERY / DATABASE REPR
        # =======================

        # int_rvec = not config['dtype'].startswith('float')
        int_rvec = config['int_rvec']

        X_list = []
        _prog = ut.ProgPartial(length=len(qaids),
                               label='new X',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(
                zip(qaids, query_wxs, query_maws)):
            X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            X_list.append(X)

        # ydata_cacher = SMKCacher('ydata')
        # Y_list = ydata_cacher.tryload()
        # if Y_list is None:
        Y_list = []
        _prog = ut.ProgPartial(length=len(daids),
                               label='new Y',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists,
                                                    maw_lists)):
            Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            Y_list.append(Y)
        # ydata_cacher.save(Y_list)

        # ======================
        # Add in some groundtruth

        logger.info('Add in some groundtruth')
        for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)):
            Y.nid = nid

        for X, nid in zip(X_list, ibs.get_annot_nids(qaids)):
            X.nid = nid

        for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)):
            Y.qual = qual

        # ======================
        # Add in other properties
        for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list):
            Y.vecs = vecs
            Y.kpts = kpts

        imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images')
        for Y, imgid in zip(Y_list, data_uri_order):
            gpath = ut.unixjoin(imgdir, imgid + '.jpg')
            Y.gpath = gpath

        for X, vecs, kpts in zip(X_list, query_vecs, query_kpts):
            X.kpts = kpts
            X.vecs = vecs

        # ======================
        logger.info('Building inverted list')
        daids = [Y.aid for Y in Y_list]
        # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list]))
        wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list]))
        assert daids == data_annots.aids
        assert len(wx_list) <= config['num_words']

        wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list],
                                            all_wxs=wx_list)

        # Compute IDF weights
        logger.info('Compute IDF weights')
        ndocs_total = len(daids)
        # Use only the unique number of words
        ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list])
        logger.info('ndocs_perword stats: ' +
                    ut.repr4(ut.get_stats(ndocs_per_word)))
        idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
        wx_to_weight = dict(zip(wx_list, idf_per_word))
        logger.info('idf stats: ' +
                    ut.repr4(ut.get_stats(wx_to_weight.values())))

        # Filter junk
        Y_list_ = [Y for Y in Y_list if Y.qual != 'junk']

        # =======================
        # CHOOSE QUERY KERNEL
        # =======================
        params = {
            'asmk': dict(alpha=3.0, thresh=0.0),
            'bow': dict(),
            'bow2': dict(),
        }
        # method = 'bow'
        method = 'bow2'
        method = 'asmk'
        smk = SMK(wx_to_weight, method=method, **params[method])

        # Specific info for the type of query
        if method == 'asmk':
            # Make residual vectors
            if True:
                # The stacked way is 50x faster
                # TODO: extend for multi-assignment and record fxs
                flat_query_vecs = np.vstack(query_vecs)
                flat_query_wxs = np.vstack(query_wxs)
                flat_query_offsets = np.array(
                    [0] + ut.cumsum(ut.lmap(len, query_wxs)))

                flat_wxs_assign = flat_query_wxs
                flat_offsets = flat_query_offsets
                flat_vecs = flat_query_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)
                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list,
                                                   agg_flags_list):
                    X.agg_rvecs = agg_rvecs
                    X.agg_flags = agg_flags[:, None]

                flat_wxs_assign = idx_to_wxs
                flat_offsets = offset_list
                flat_vecs = all_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)

                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list,
                                                   agg_flags_list):
                    Y.agg_rvecs = agg_rvecs
                    Y.agg_flags = agg_flags[:, None]
            else:
                # This non-stacked way is about 500x slower
                _prog = ut.ProgPartial(label='agg Y rvecs',
                                       bs=True,
                                       adjust=True)
                for Y in _prog(Y_list_):
                    make_agg_vecs(Y, words, Y.vecs)

                _prog = ut.ProgPartial(label='agg X rvecs',
                                       bs=True,
                                       adjust=True)
                for X in _prog(X_list):
                    make_agg_vecs(X, words, X.vecs)
        elif method == 'bow2':
            # Hack for orig tf-idf bow vector
            nwords = len(words)
            for X in ut.ProgIter(X_list, label='make bow vector'):
                ensure_tf(X)
                bow_vector(X, wx_to_weight, nwords)

            for Y in ut.ProgIter(Y_list_, label='make bow vector'):
                ensure_tf(Y)
                bow_vector(Y, wx_to_weight, nwords)

        if method != 'bow2':
            for X in ut.ProgIter(X_list, 'compute X gamma'):
                X.gamma = smk.gamma(X)
            for Y in ut.ProgIter(Y_list_, 'compute Y gamma'):
                Y.gamma = smk.gamma(Y)

        # Execute matches (could go faster by enumerating candidates)
        scores_list = []
        for X in ut.ProgIter(X_list, label='query %s' % (smk, )):
            scores = [smk.kernel(X, Y) for Y in Y_list_]
            scores = np.array(scores)
            scores = np.nan_to_num(scores)
            scores_list.append(scores)

        import sklearn.metrics

        avep_list = []
        _iter = list(zip(scores_list, X_list))
        _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, ))
        for scores, X in _iter:
            truth = [X.nid == Y.nid for Y in Y_list_]
            avep = sklearn.metrics.average_precision_score(truth, scores)
            avep_list.append(avep)
        avep_list = np.array(avep_list)
        mAP = np.mean(avep_list)
        logger.info('mAP  = %r' % (mAP, ))
Exemplo n.º 24
0
def prometheus_update(ibs, *args, **kwargs):
    try:
        with ut.Timer(verbose=False) as timer:
            if ibs.containerized:
                container_name = const.CONTAINER_NAME
            else:
                container_name = ibs.dbname

            global PROMETHEUS_COUNTER

            PROMETHEUS_COUNTER = PROMETHEUS_COUNTER + 1  # NOQA
            # logger.info('PROMETHEUS LIMIT %d / %d' % (PROMETHEUS_COUNTER, PROMETHEUS_LIMIT, ))

            if PROMETHEUS_COUNTER >= PROMETHEUS_LIMIT:
                PROMETHEUS_COUNTER = 0

                try:
                    PROMETHEUS_DATA['info'].info(
                        {
                            'uuid': str(ibs.get_db_init_uuid()),
                            'dbname': ibs.dbname,
                            'hostname': ut.get_computer_name(),
                            'container': container_name,
                            'version': ibs.db.get_db_version(),
                            'containerized': str(int(ibs.containerized)),
                            'production': str(int(ibs.production)),
                        }
                    )
                except Exception:
                    pass

                try:
                    if ibs.production:
                        num_imageset_rowids = 0
                        num_gids = 0
                        num_aids = 0
                        num_pids = 0
                        num_nids = 0
                        num_species = 0
                    else:
                        num_imageset_rowids = len(ibs._get_all_imageset_rowids())
                        num_gids = len(ibs._get_all_gids())
                        num_aids = len(ibs._get_all_aids())
                        num_pids = len(ibs._get_all_part_rowids())
                        num_nids = len(ibs._get_all_name_rowids())
                        num_species = len(ibs._get_all_species_rowids())

                    PROMETHEUS_DATA['imagesets'].labels(name=container_name).set(
                        num_imageset_rowids
                    )
                    PROMETHEUS_DATA['images'].labels(name=container_name).set(num_gids)
                    PROMETHEUS_DATA['annotations'].labels(name=container_name).set(
                        num_aids
                    )
                    PROMETHEUS_DATA['parts'].labels(name=container_name).set(num_pids)
                    PROMETHEUS_DATA['names'].labels(name=container_name).set(num_nids)
                    PROMETHEUS_DATA['species'].labels(name=container_name).set(
                        num_species
                    )
                except Exception:
                    pass

                try:
                    job_status_dict = ibs.get_job_status()['json_result']
                except Exception:
                    pass

                try:
                    job_uuid_list = list(job_status_dict.keys())
                    status_dict_template = {
                        'received': 0,
                        'accepted': 0,
                        'queued': 0,
                        'working': 0,
                        'publishing': 0,
                        'completed': 0,
                        'exception': 0,
                        'suppressed': 0,
                        'corrupted': 0,
                        '_error': 0,
                    }
                    status_dict = {
                        '*': status_dict_template.copy(),
                        'max': status_dict_template.copy(),
                    }

                    endpoints = set([])
                    working_endpoint = None
                except Exception:
                    pass

                for job_uuid in job_uuid_list:
                    try:
                        job_status = job_status_dict[job_uuid]

                        status = job_status['status']
                        endpoint = job_status['endpoint']
                        jobcounter = job_status['jobcounter']

                        status = '%s' % (status,)
                        endpoint = '%s' % (endpoint,)

                        if status not in status_dict_template.keys():
                            status = '_error'

                        if endpoint not in status_dict:
                            status_dict[endpoint] = status_dict_template.copy()

                        endpoints.add(endpoint)
                    except Exception:
                        pass

                    try:
                        if status in ['working']:
                            from wbia.web.job_engine import (
                                calculate_timedelta,
                                _timestamp,
                            )

                            started = job_status['time_started']
                            now = _timestamp()
                            (
                                hours,
                                minutes,
                                seconds,
                                total_seconds,
                            ) = calculate_timedelta(started, now)
                            logger.info(
                                'ELAPSED (%s): %d seconds...' % (job_uuid, total_seconds)
                            )
                            PROMETHEUS_DATA['elapsed'].labels(
                                name=container_name, endpoint=endpoint
                            ).set(total_seconds)
                            PROMETHEUS_DATA['elapsed'].labels(
                                name=container_name, endpoint='*'
                            ).set(total_seconds)
                            working_endpoint = endpoint
                    except Exception:
                        pass

                    try:
                        if status not in status_dict_template:
                            logger.info('UNRECOGNIZED STATUS %r' % (status,))
                        status_dict[endpoint][status] += 1
                        status_dict['*'][status] += 1

                        current_max = status_dict['max'][status]
                        status_dict['max'][status] = max(current_max, jobcounter)

                        if job_uuid not in PROMETHUS_JOB_CACHE_DICT:
                            PROMETHUS_JOB_CACHE_DICT[job_uuid] = {}
                    except Exception:
                        pass

                    try:
                        runtime_sec = job_status.get('time_runtime_sec', None)
                        if (
                            runtime_sec is not None
                            and 'runtime' not in PROMETHUS_JOB_CACHE_DICT[job_uuid]
                        ):
                            PROMETHUS_JOB_CACHE_DICT[job_uuid]['runtime'] = runtime_sec
                            PROMETHEUS_DATA['runtime'].labels(
                                name=container_name, endpoint=endpoint
                            ).set(runtime_sec)
                            PROMETHEUS_DATA['runtime'].labels(
                                name=container_name, endpoint='*'
                            ).set(runtime_sec)
                    except Exception:
                        pass

                    try:
                        turnaround_sec = job_status.get('time_turnaround_sec', None)
                        if (
                            turnaround_sec is not None
                            and 'turnaround' not in PROMETHUS_JOB_CACHE_DICT[job_uuid]
                        ):
                            PROMETHUS_JOB_CACHE_DICT[job_uuid][
                                'turnaround'
                            ] = turnaround_sec
                            PROMETHEUS_DATA['turnaround'].labels(
                                name=container_name, endpoint=endpoint
                            ).set(turnaround_sec)
                            PROMETHEUS_DATA['turnaround'].labels(
                                name=container_name, endpoint='*'
                            ).set(turnaround_sec)
                    except Exception:
                        pass

                try:
                    if working_endpoint is None:
                        PROMETHEUS_DATA['elapsed'].labels(
                            name=container_name, endpoint='*'
                        ).set(0.0)

                    for endpoint in endpoints:
                        if endpoint == working_endpoint:
                            continue
                        PROMETHEUS_DATA['elapsed'].labels(
                            name=container_name, endpoint=endpoint
                        ).set(0.0)
                except Exception:
                    pass

                try:
                    # logger.info(ut.repr3(status_dict))
                    for endpoint in status_dict:
                        for status in status_dict[endpoint]:
                            number = status_dict[endpoint][status]
                            PROMETHEUS_DATA['engine'].labels(
                                status=status, name=container_name, endpoint=endpoint
                            ).set(number)
                except Exception:
                    pass

                try:
                    # logger.info(ut.repr3(status_dict))
                    process_status_dict = ibs.get_process_alive_status()
                    for process in process_status_dict:
                        number = 0 if process_status_dict.get(process, False) else 1
                        PROMETHEUS_DATA['process'].labels(
                            process=process, name=container_name
                        ).set(number)
                except Exception:
                    pass
        try:
            PROMETHEUS_DATA['update'].labels(name=container_name).set(timer.ellapsed)
        except Exception:
            pass
    except Exception:
        pass
Exemplo n.º 25
0
def load_oxford_2007():
    """
    Loads data from
    http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf

    >>> from wbia.algo.smk.script_smk import *  # NOQA
    """
    from os.path import join, basename, splitext
    import pandas as pd
    import vtool as vt

    dbdir = ut.truepath('/raid/work/Oxford/')
    data_fpath0 = join(dbdir, 'data_2007.pkl')

    if ut.checkpath(data_fpath0):
        data = ut.load_data(data_fpath0)
        return data
    else:
        word_dpath = join(dbdir, 'word_oxc1_hesaff_sift_16M_1M')
        _word_fpath_list = ut.ls(word_dpath)
        imgid_to_word_fpath = {
            splitext(basename(word_fpath))[0]: word_fpath
            for word_fpath in _word_fpath_list
        }
        readme_fpath = join(dbdir, 'README2.txt')
        imgid_order = ut.readfrom(readme_fpath).split('\n')[20:-1]

        imgid_order = imgid_order
        data_uri_order = [x.replace('oxc1_', '') for x in imgid_order]

        imgid_to_df = {}
        for imgid in ut.ProgIter(imgid_order, label='reading kpts'):
            word_fpath = imgid_to_word_fpath[imgid]
            row_gen = (map(float,
                           line.strip('\n').split(' '))
                       for line in ut.read_lines_from(word_fpath)[2:])
            rows = [(int(word_id), x, y, e11, e12, e22)
                    for (word_id, x, y, e11, e12, e22) in row_gen]
            df = pd.DataFrame(
                rows, columns=['word_id', 'x', 'y', 'e11', 'e12', 'e22'])
            imgid_to_df[imgid] = df

        df_list = ut.take(imgid_to_df, imgid_order)

        nfeat_list = [len(df_) for df_ in df_list]
        offset_list = [0] + ut.cumsum(nfeat_list)
        shape = (offset_list[-1], 128)
        # shape = (16334970, 128)
        sift_fpath = join(dbdir, 'OxfordSIFTDescriptors',
                          'feat_oxc1_hesaff_sift.bin')
        try:
            file_ = open(sift_fpath, 'rb')
            with ut.Timer('Reading SIFT binary file'):
                nbytes = np.prod(shape)
                all_vecs = np.fromstring(file_.read(nbytes), dtype=np.uint8)
            all_vecs = all_vecs.reshape(shape)
        finally:
            file_.close()

        kpts_list = [
            df_.loc[:, ('x', 'y', 'e11', 'e12', 'e22')].values
            for df_ in df_list
        ]
        wordid_list = [df_.loc[:, 'word_id'].values for df_ in df_list]
        kpts_Z = np.vstack(kpts_list)
        idx_to_wx = np.hstack(wordid_list)

        # assert len(np.unique(idx_to_wx)) == 1E6

        # Reqd standard query order
        query_files = sorted(
            ut.glob(dbdir + '/oxford_groundtruth', '*_query.txt'))
        query_uri_order = []
        for qpath in query_files:
            text = ut.readfrom(qpath, verbose=0)
            query_uri = text.split(' ')[0].replace('oxc1_', '')
            query_uri_order.append(query_uri)

        logger.info('converting to invV')
        all_kpts = vt.convert_kptsZ_to_kpts(kpts_Z)

        data = {
            'offset_list': offset_list,
            'all_kpts': all_kpts,
            'all_vecs': all_vecs,
            'idx_to_wx': idx_to_wx,
            'data_uri_order': data_uri_order,
            'query_uri_order': query_uri_order,
        }
        ut.save_data(data_fpath0, data)
    return data
Exemplo n.º 26
0
def get_extern_distinctiveness(qreq_, cm, **kwargs):
    r"""
    Uses distinctivness normalizer class (which uses predownloaded models)
    to normalize the distinctivness of a keypoint for query points.


    IDEA:
        because we have database points as well we can use the distance between
        normalizer of the query point and the normalizer of the database point.
        They should have a similar normalizer if they are a correct match AND
        nondistinctive.

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        cm (QueryResult):  object of feature correspondences and scores

    Returns:
        tuple: (new_fsv_list, daid_list)

    CommandLine:
        python -m ibeis.algo.hots.special_query --test-get_extern_distinctiveness

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.algo.hots.special_query import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> daids = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)
        >>> qaids = daids[0:1]
        >>> cfgdict = dict(codename='vsone_unnorm_dist_ratio_extern_distinctiveness')
        >>> qreq_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict)
        >>> #qreq_.lazy_load()
        >>> cm = ibs.query_chips(qreq_=qreq_, use_cache=False, save_qcache=False)[0]
        >>> # execute function
        >>> (new_fsv_list, daid_list) = get_extern_distinctiveness(qreq_, cm)
        >>> # verify results
        >>> assert all([fsv.shape[1] == 1 + len(cm.filtkey_list) for fsv in new_fsv_list])
        >>> assert all([np.all(fsv.T[-1] >= 0) for fsv in new_fsv_list])
        >>> assert all([np.all(fsv.T[-1] <= 1) for fsv in new_fsv_list])
    """
    dstcnvs_normer = qreq_.dstcnvs_normer
    assert dstcnvs_normer is not None, 'must have loaded normalizer'
    filtkey = hstypes.FiltKeys.DISTINCTIVENESS
    # make sure filter does not already exist
    scorex_vsone = ut.listfind(cm.filtkey_list, filtkey)
    assert scorex_vsone is None, 'already applied distinctivness'
    daid_list = list(six.iterkeys(cm.aid2_fsv))
    # Find subset of features to get distinctivness of
    qfxs_list = [cm.aid2_fm[daid].T[0] for daid in daid_list]
    query_vecs = qreq_.ibs.get_annot_vecs(cm.qaid, config2_=qreq_.qparams)

    # there might be duplicate feature indexes in the list of feature index
    # lists. We can use to perform neighbor lookup more efficiently by only
    # performing a single query per feature index. Utool does the mapping for us
    def rowid_distinctivness(unique_flat_qfx_list,
                             dstcnvs_normer=None,
                             query_vecs=None,
                             **kwargs):
        # Take only the unique vectors
        unique_flat_subvecs = query_vecs.take(unique_flat_qfx_list, axis=0)
        unique_flat_dstcvns = dstcnvs_normer.get_distinctiveness(
            unique_flat_subvecs, **kwargs)
        return unique_flat_dstcvns[:, None]

    aug_fsv_list = ut.unflat_unique_rowid_map(rowid_distinctivness,
                                              qfxs_list,
                                              dstcnvs_normer=dstcnvs_normer,
                                              query_vecs=query_vecs,
                                              **kwargs)

    if False:
        with ut.Timer('time1'):
            aug_fsv_list = ut.unflat_unique_rowid_map(
                rowid_distinctivness,
                qfxs_list,
                dstcnvs_normer=dstcnvs_normer,
                query_vecs=query_vecs)
        with ut.Timer('time2'):
            # Less efficient way to do this
            _vecs_list = [query_vecs.take(qfxs, axis=0) for qfxs in qfxs_list]
            _aug_fsv_list = [
                dstcnvs_normer.get_distinctiveness(_vecs)[:, None]
                for _vecs in _vecs_list
            ]
        isequal_list = [
            np.all(np.equal(*tup)) for tup in zip(aug_fsv_list, _aug_fsv_list)
        ]
        assert all(isequal_list), 'utool is broken'

    # Compute the distinctiveness as the augmenting score
    # ensure the shape is (X, 1)
    # Stack the new and augmenting scores
    old_fsv_list = [cm.aid2_fsv[daid] for daid in daid_list]
    new_fsv_list = list(map(np.hstack, zip(old_fsv_list, aug_fsv_list)))

    # FURTHER HACKS TO SCORING
    #if 'fg_power' in kwargs:
    for filtkey in hstypes.WEIGHT_FILTERS:
        key = filtkey + '_power'
        if key in kwargs:
            _power = kwargs[key]
            _index = ut.listfind(cm.filtkey_list, filtkey)
            for fsv in new_fsv_list:
                fsv.T[_index] **= _power
    #new_aid2_fsv = dict(zip(daid_list, new_fsv_list))
    return new_fsv_list, daid_list
Exemplo n.º 27
0
    def post(self):
        global NETWORK_MODEL_TAG
        global NETWORK
        global NETWORK_VALUES

        response = {'success': False}

        # ut.embed()

        try:
            with ut.Timer('Pre'):
                parser = reqparse.RequestParser()
                parser.add_argument('image', type=str)
                parser.add_argument('config', type=dict)
                args = parser.parse_args()

                image_base64_str = args['image']
                image = get_image_from_base64_str(image_base64_str)

                config = args['config']
                model_tag = config.get('model_tag', None)
                num_returns = config.get('topk', 100)

                model_url = model_url_dict.get(model_tag, None)

            assert model_url is not None, 'Model tag %r is not recognized' % (
                model_tag, )
            if model_tag != NETWORK_MODEL_TAG:
                with ut.Timer('Loading network'):
                    print('Loading network from weights %r' % (model_tag, ))
                    values_url = model_url.replace('.pth', '.values.pth')

                    # Download files
                    model_filepath = ut.grab_file_url(model_url,
                                                      appname='kaggle7',
                                                      check_hash=True)
                    values_filepath = ut.grab_file_url(values_url,
                                                       appname='kaggle7',
                                                       check_hash=True)

                    model_values = torch.load(values_filepath)
                    classes = model_values['classes']
                    num_classes = len(classes)

                    model_weights = torch.load(model_filepath,
                                               map_location=get_device())
                    network_model, mutliple = make_new_network(
                        num_classes, RING_HEADS, GEM_CONST, pretrained=False)

                    if mutliple:
                        pass

                    if torch.cuda.is_available():
                        network_model = network_model.cuda()

                    # model_weights = model_weights['model']
                    network_model.load_state_dict(model_weights)
                    network_model.eval()

                    NETWORK_MODEL_TAG = model_tag
                    NETWORK = network_model
                    NETWORK_VALUES = model_values

            print('Using network %r' % (NETWORK_MODEL_TAG, ))
            with ut.Timer('Loading input tensor'):
                input_image = image.convert(CMODE).convert('LA').convert(CMODE)
                input_image = TFRM_RESIZE(input_image)
                input_image = pil2tensor(input_image, np.float32)
                input_image = input_image.div_(255)
                input_image = TFRM_WHITEN(input_image)

                size = input_image.size()
                input_tensor = input_image.view(-1, size[0], size[1], size[2])
                input_tensor = input_tensor.to(get_device())

            # Run inference
            with ut.Timer('Inference'):
                print('Running inference on input tensor %r' %
                      (input_tensor.size(), ))
                output = NETWORK(input_tensor)
                print('...done')
                preds_list, feats_list = output

            with ut.Timer('Post1'):
                print('Performing post-processing')
                prediction_raw = preds_list[-1][0]
                features_raw = TFRM_L2NORM(torch.cat(feats_list, dim=1))[0]

            with ut.Timer('Post2'):
                print('...classifier')
                # Post Process classification
                classifier_temp = NETWORK_VALUES['thresholds'][
                    'classifier_softmax_temp']
                classifier_prediction = torch.softmax(prediction_raw /
                                                      classifier_temp,
                                                      dim=0)

            with ut.Timer('Post3'):
                # Post process features
                print('...features')
                train_feats = NETWORK_VALUES['train_feats']
                train_gt = NETWORK_VALUES['train_gt']
                size = features_raw.size()
                features = features_raw.view(-1, size[0])
                distance_matrix_imgs = batched_dmv(features, train_feats)
                distance_matrix_classes = dm2cm(distance_matrix_imgs, train_gt)
                features_sim = (2.0 - distance_matrix_classes) * 0.5
                features_sim = features_sim[0]

                features_temp = NETWORK_VALUES['thresholds'][
                    'feature_softmax_temp']
                features_prediction = torch.softmax(features_sim /
                                                    features_temp,
                                                    dim=0)

            with ut.Timer('Post4'):
                print('...mixing')
                p = NETWORK_VALUES['thresholds']['mixing_value']
                classifier_prediction = classifier_prediction.to('cpu')
                final_prediction = (p * classifier_prediction +
                                    (1.0 - p) * features_prediction)

            with ut.Timer('Collection'):
                print('Collecting prediction')
                top_k_score_list, top_k_index_list = final_prediction.topk(
                    num_returns, 0)
                top_k_score_list = top_k_score_list.detach().tolist()
                classes = NETWORK_VALUES['classes']
                top_k_class_list = ut.take(classes, top_k_index_list)

                response['scores'] = {}
                for top_k_class, top_k_score in zip(top_k_class_list,
                                                    top_k_score_list):
                    response['scores'][top_k_class] = top_k_score
                response['success'] = True

            print('...done')
        except Exception as ex:
            message = str(ex)
            response['message'] = message
            print('!!!ERROR!!!')
            print(response)

        # if torch.cuda.is_available():
        #     torch.cuda.empty_cache()

        return response
def dev_train_distinctiveness(species=None):
    r"""
    Args:
        ibs (IBEISController):  wbia controller object
        species (None):

    CommandLine:
        python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness

        alias dev_train_distinctiveness='python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness'
        # Publishing (uses cached normalizers if available)
        dev_train_distinctiveness --species GZ --publish
        dev_train_distinctiveness --species PZ --publish
        dev_train_distinctiveness --species PZ --retrain

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.distinctiveness_normalizer import *  # NOQA
        >>> import wbia
        >>> species = ut.get_argval('--species', str, 'zebra_grevys')
        >>> dev_train_distinctiveness(species)
    """
    import wbia

    # if 'species' not in vars() or species is None:
    #    species = 'zebra_grevys'
    if species == 'zebra_grevys':
        dbname = 'GZ_ALL'
    elif species == 'zebra_plains':
        dbname = 'PZ_Master0'
    ibs = wbia.opendb(dbname)
    global_distinctdir = ibs.get_global_distinctiveness_modeldir()
    cachedir = global_distinctdir
    dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir)
    try:
        if ut.get_argflag('--retrain'):
            raise IOError('force cache miss')
        with ut.Timer('loading distinctiveness'):
            dstcnvs_normer.load(cachedir)
        # Cache hit
        logger.info('distinctivness model cache hit')
    except IOError:
        logger.info('distinctivness model cache miss')
        with ut.Timer('training distinctiveness'):
            # Need to train
            # Add one example from each name
            # TODO: add one exemplar per viewpoint for each name
            # max_vecs = 1E6
            # max_annots = 975
            max_annots = 975
            # ibs.fix_and_clean_database()
            nid_list = ibs.get_valid_nids()
            aids_list = ibs.get_name_aids(nid_list)
            # remove junk
            aids_list = ibs.unflat_map(ibs.filter_junk_annotations, aids_list)
            # remove empty
            aids_list = [aids for aids in aids_list if len(aids) > 0]
            num_annots_list = list(map(len, aids_list))
            aids_list = ut.sortedby(aids_list, num_annots_list, reverse=True)
            # take only one annot per name
            aid_list = ut.get_list_column(aids_list, 0)
            # Keep only a certain number of annots for distinctiveness mapping
            aid_list_ = ut.listclip(aid_list, max_annots)
            logger.info('total num named annots = %r' % (sum(num_annots_list)))
            logger.info(
                'training distinctiveness using %d/%d singleton annots' %
                (len(aid_list_), len(aid_list)))
            # vec
            # FIXME: qreq_ params for config rowid
            vecs_list = ibs.get_annot_vecs(aid_list_)
            num_vecs = sum(list(map(len, vecs_list)))
            logger.info('num_vecs = %r' % (num_vecs, ))
            vecs = np.vstack(vecs_list)
            logger.info('vecs size = %r' % (ut.get_object_size_str(vecs), ))
            dstcnvs_normer.init_support(vecs)
            dstcnvs_normer.save(global_distinctdir)

    if ut.get_argflag('--publish'):
        dstcnvs_normer.publish()
def tst_single_annot_distinctiveness_params(ibs, aid):
    r"""

    CommandLine:
        python -m wbia.algo.hots.distinctiveness_normalizer --test-test_single_annot_distinctiveness_params --show
        python -m wbia.algo.hots.distinctiveness_normalizer --test-test_single_annot_distinctiveness_params --show --db GZ_ALL

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots.distinctiveness_normalizer import *  # NOQA
        >>> import wbia.plottool as pt
        >>> import wbia
        >>> # build test data
        >>> ibs = wbia.opendb(ut.get_argval('--db', type_=str, default='PZ_MTEST'))
        >>> aid = ut.get_argval('--aid', type_=int, default=1)
        >>> # execute function
        >>> test_single_annot_distinctiveness_params(ibs, aid)
        >>> pt.show_if_requested()
    """
    ####
    # TODO: Also paramatarize the downweighting based on the keypoint size
    ####
    # HACK IN ABILITY TO SET CONFIG
    from wbia.init.main_commands import postload_commands
    from wbia.algo import Config

    postload_commands(ibs, None)

    import wbia.plottool as pt

    # cfglbl_list = cfgdict_list
    # ut.all_dict_combinations_lbls(varied_dict)

    # Get info to find distinctivness of
    species_text = ibs.get_annot_species(aid)
    # FIXME; qreq_ params for config rowid
    vecs = ibs.get_annot_vecs(aid)
    kpts = ibs.get_annot_kpts(aid)
    chip = ibs.get_annot_chips(aid)

    # Paramater space to search
    # TODO: use slicing to control the params being varied
    # Use GridSearch class to modify paramaters as you go.

    varied_dict = Config.DCVS_DEFAULT.get_varydict()

    logger.info('Varied Dict: ')
    logger.info(ut.repr2(varied_dict))

    cfgdict_list, cfglbl_list = ut.make_constrained_cfg_and_lbl_list(
        varied_dict)

    # Get groundtruthish distinctivness map
    # for objective function
    # Load distinctivness normalizer
    with ut.Timer('Loading Distinctivness Normalizer for %s' % (species_text)):
        dstcvnss_normer = request_species_distinctiveness_normalizer(
            species_text)

    # Get distinctivness over all params
    dstncvs_list = [
        dstcvnss_normer.get_distinctiveness(vecs, **cfgdict)
        for cfgdict in ut.ProgIter(cfgdict_list, lbl='get dstcvns')
    ]

    # fgweights = ibs.get_annot_fgweights([aid])[0]
    # dstncvs_list = [x * fgweights for x in dstncvs_list]
    fnum = 1

    import functools

    show_func = functools.partial(show_chip_distinctiveness_plot, chip, kpts)

    ut.interact_gridsearch_result_images(
        show_func,
        cfgdict_list,
        cfglbl_list,
        dstncvs_list,
        score_list=None,
        fnum=fnum,
        figtitle='dstncvs gridsearch',
    )

    pt.present()
Exemplo n.º 30
0
def vsone_(
    qreq_,
    query_aids,
    data_aids,
    qannot_cfg,
    dannot_cfg,
    configured_obj_annots,
    hyper_params,
):
    # Do vectorized preload before constructing lazy dicts
    # Then make sure the lazy dicts point to this subset
    unique_obj_annots = list(configured_obj_annots.values())
    for annots in ut.ProgIter(unique_obj_annots, 'vectorized preload'):
        annots.set_caching(True)
        annots.chip_size
        annots.vecs
        annots.kpts
        annots.yaw
        annots.qual
        annots.gps
        annots.time
        if qreq_.qparams.featweight_enabled:
            annots.fgweights
    # annots._internal_attrs.clear()

    # Make convinient lazy dict representations (after loading pre info)
    configured_lazy_annots = ut.ddict(dict)
    for config, annots in configured_obj_annots.items():
        annot_dict = configured_lazy_annots[config]
        for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'):
            annot = _annot._make_lazy_dict()
            annot_dict[_annot.aid] = annot

    unique_lazy_annots = ut.flatten([x.values() for x in configured_lazy_annots.values()])

    flann_params = {'algorithm': 'kdtree', 'trees': 4}
    for annot in ut.ProgIter(unique_lazy_annots, label='lazy flann'):
        vt.matching.ensure_metadata_flann(annot, flann_params)
        vt.matching.ensure_metadata_normxy(annot)

    for annot in ut.ProgIter(unique_lazy_annots, 'preload kpts'):
        annot['kpts']
    for annot in ut.ProgIter(unique_lazy_annots, 'preload normxy'):
        annot['norm_xys']
    for annot in ut.ProgIter(unique_lazy_annots, 'preload vecs'):
        annot['vecs']

    # Extract pairs of annot objects (with shared caches)
    lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids)
    lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids)

    # TODO: param search over grid
    #     'use_sv': [0, 1],
    #     'use_fg': [0, 1],
    #     'use_ratio_test': [0, 1],
    matches_RAT = [
        vt.PairwiseMatch(annot1, annot2)
        for annot1, annot2 in zip(lazy_annots1, lazy_annots2)
    ]

    # Construct global measurements
    global_keys = ['yaw', 'qual', 'gps', 'time']
    for match in ut.ProgIter(matches_RAT, label='setup globals'):
        match.add_global_measures(global_keys)

    # Preload flann for only specific annots
    for match in ut.ProgIter(matches_RAT, label='preload FLANN'):
        match.annot1['flann']

    cfgdict = hyper_params.vsone_assign
    # Find one-vs-one matches
    # cfgdict = {'checks': 20, 'symmetric': False}
    for match in ut.ProgIter(matches_RAT, label='assign vsone'):
        match.assign(cfgdict=cfgdict)

    # gridsearch_ratio_thresh()
    # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_ratio_test', {
    #     'ratio_thresh': np.linspace(.6, .7, 50)
    # })
    for match in ut.ProgIter(matches_RAT, label='apply ratio thresh'):
        match.apply_ratio_test({'ratio_thresh': 0.638}, inplace=True)

    # TODO gridsearch over sv params
    # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_sver', {
    #     'xy_thresh': np.linspace(0, 1, 3)
    # })
    matches_RAT_SV = [
        match.apply_sver(inplace=True) for match in ut.ProgIter(matches_RAT, label='sver')
    ]

    # Add keypoint spatial information to local features
    for match in matches_RAT_SV:
        match.add_local_measures()
        # key_ = 'norm_xys'
        # norm_xy1 = match.annot1[key_].take(match.fm.T[0], axis=1)
        # norm_xy2 = match.annot2[key_].take(match.fm.T[1], axis=1)
        # match.local_measures['norm_x1'] = norm_xy1[0]
        # match.local_measures['norm_y1'] = norm_xy1[1]
        # match.local_measures['norm_x2'] = norm_xy2[0]
        # match.local_measures['norm_y2'] = norm_xy2[1]

        # match.local_measures['scale1'] = vt.get_scales(
        #     match.annot1['kpts'].take(match.fm.T[0], axis=0))
        # match.local_measures['scale2'] = vt.get_scales(
        #     match.annot2['kpts'].take(match.fm.T[1], axis=0))

    # Create another version where we find global normalizers for the data
    # qreq_.load_indexer()
    # matches_SV_LNBNN = batch_apply_lnbnn(matches_RAT_SV, qreq_, inplace=True)

    # if 'weight' in cfgdict:
    #     for match in matches_SV_LNBNN[::-1]:
    #         lnbnn_dist = match.local_measures['lnbnn']
    #         ndist = match.local_measures['lnbnn_norm_dist']
    #         weights = match.local_measures[cfgdict['weight']]
    #         match.local_measures['weighted_lnbnn'] = weights * lnbnn_dist
    #         match.local_measures['weighted_lnbnn_norm_dist'] = weights * ndist
    #         match.fs = match.local_measures['weighted_lnbnn']

    cached_data = {
        # 'RAT': matches_RAT,
        'RAT_SV': matches_RAT_SV,
        # 'SV_LNBNN': matches_SV_LNBNN,
    }
    return cached_data

    from sklearn.metrics.classification import coo_matrix

    def quick_cm(y_true, y_pred, labels, sample_weight):
        n_labels = len(labels)
        C = coo_matrix(
            (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels)
        ).toarray()
        return C

    def quick_mcc(C):
        """ assumes y_true and y_pred are in index/encoded format """
        t_sum = C.sum(axis=1)
        p_sum = C.sum(axis=0)
        n_correct = np.diag(C).sum()
        n_samples = p_sum.sum()
        cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)
        cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum)
        cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum)
        mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
        return mcc

    def mcc_hack():
        sample_weight = np.ones(len(self.samples), dtype=np.int)
        task_mccs = ut.ddict(dict)
        # Determine threshold levels per score type
        score_to_order = {}
        for scoretype in score_dict.keys():
            y_score = score_dict[scoretype].values
            sortx = np.argsort(y_score, kind='mergesort')[::-1]
            y_score = y_score[sortx]
            distinct_value_indices = np.where(np.diff(y_score))[0]
            threshold_idxs = np.r_[distinct_value_indices, y_score.size - 1]
            thresh = y_score[threshold_idxs]
            score_to_order[scoretype] = (sortx, y_score, thresh)

        classes_ = np.array([0, 1], dtype=np.int)
        for task in task_list:
            labels = self.samples.subtasks[task]
            for sublabels in labels.gen_one_vs_rest_labels():
                for scoretype in score_dict.keys():
                    sortx, y_score, thresh = score_to_order[scoretype]
                    y_true = sublabels.y_enc[sortx]
                    mcc = -np.inf
                    for t in thresh:
                        y_pred = (y_score > t).astype(np.int)
                        C1 = quick_cm(y_true, y_pred, classes_, sample_weight)
                        mcc1 = quick_mcc(C1)
                        if mcc1 < 0:
                            C2 = quick_cm(y_true, 1 - y_pred, classes_, sample_weight)
                            mcc1 = quick_mcc(C2)
                        mcc = max(mcc1, mcc)
                    # logger.info('mcc = %r' % (mcc,))
                    task_mccs[sublabels.task_name][scoretype] = mcc
        return task_mccs

    if 0:
        with ut.Timer('mcc'):
            task_mccs = mcc_hack()
            logger.info('\nMCC of simple scoring measures:')
            df = pd.DataFrame.from_dict(task_mccs, orient='index')
            from utool.experimental.pandas_highlight import to_string_monkey

            logger.info(to_string_monkey(df, highlight_cols=np.arange(len(df.columns))))