def test_pyflann_searches(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_searches Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_searches() >>> # verify results >>> print(result) """ try: num_neighbors = 3 pts = testdata_points(nPts=5743, nDims=2) qpts = testdata_points(nPts=7, nDims=2) import vtool as vt # sample a radius radius = vt.L2(pts[0:1], qpts[0:1])[0] * 2 + 1 flann = pyflann.FLANN() print('NN_OnTheFly') # build nn_index on the fly indices1, dists1 = flann.nn(pts, qpts, num_neighbors, algorithm='hierarchical') print(utool.hz_str('indices1, dists1 = ', indices1, dists1)) _build_params = flann.build_index(pts, algorithm='kmeans') del _build_params print('NN_Index') indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors) print(utool.hz_str('indices2, dists2 = ', indices2, dists2)) # this can only be called on one query point at a time # because the output size is unknown print('NN_Radius, radius=%r' % (radius, )) indices3, dists3 = flann.nn_radius(pts[0], radius) print('indices3 = %r ' % (indices3, )) print('dists3 = %r ' % (dists3, )) assert np.all(dists3 < radius) except Exception as ex: utool.printex(ex, key_list=[ 'query', 'query.shape', 'pts.shape', ], pad_stdout=True) #utool.embed() raise
def check_expr_eq(expr1, expr2, verbose=True): """ Does not work in general. Problem is not decidable. Thanks Richard. Args: expr1 (?): expr2 (?): CommandLine: python -m vtool.symbolic --test-check_expr_eq SeeALso: vt.symbolic_randcheck Example: >>> # DISABLE_DOCTEST >>> from vtool.symbolic import * # NOQA >>> expr1 = sympy.Matrix([ [sx*x + 1.0*tx + w1*y], [sy*y + 1.0*ty + w2*x], [1.0]]) >>> expr2 = sympy.Matrix([ [sx*x + tx + w1*y], [sy*y + ty + w2*x], [1]]) >>> result = check_expr_eq(expr1, expr2) >>> print(result) """ if isinstance(expr1, six.string_types): expr1 = sympy.simplify(expr1) if isinstance(expr2, six.string_types): expr2 = sympy.simplify(expr2) print(ut.hz_str('Checking if ', repr(expr1), ' == ', repr(expr2))) random_point_check = expr1.equals(expr2) if random_point_check is None: failexpr = expr1.equals(expr2, failing_expression=True) print('failexpr = %r' % (failexpr,)) random_point_check = False print('... seems %r' % (random_point_check,)) #return random_point_check expr3 = expr1 - expr2 if not random_point_check and True: common_symbols = expr1.free_symbols.intersection(expr2.free_symbols) if len(common_symbols): y = sympy.symbols('y') # Hack, should be a new symbol symbol = common_symbols.pop() soln1 = sympy.solve(sympy.Eq(sympy.simplify(expr1), y), symbol) soln2 = sympy.solve(sympy.Eq(sympy.simplify(expr2), y), symbol) print('Solving expr1 for common symbol: ' + str(soln1)) print('Solving expr2 for common symbol: ' + str(soln2)) if soln1 == soln2: print('This seems True') else: print('This seems False') sympy.solve(sympy.Eq(sympy.simplify(expr2), y), 'd') print(ut.hz_str('... checking 0 ', repr(expr3))) # Does not always work. print('(not gaurenteed to work) expr3.is_zero = %r' % (expr3.is_zero,)) return expr3.is_zero
def check_expr_eq(expr1, expr2, verbose=True): """ Does not work in general. Problem is not decidable. Thanks Richard. Args: expr1 (?): expr2 (?): CommandLine: python -m vtool.symbolic --test-check_expr_eq SeeALso: vt.symbolic_randcheck Example: >>> # DISABLE_DOCTEST >>> from vtool.symbolic import * # NOQA >>> expr1 = sympy.Matrix([ [sx*x + 1.0*tx + w1*y], [sy*y + 1.0*ty + w2*x], [1.0]]) >>> expr2 = sympy.Matrix([ [sx*x + tx + w1*y], [sy*y + ty + w2*x], [1]]) >>> result = check_expr_eq(expr1, expr2) >>> print(result) """ if isinstance(expr1, six.string_types): expr1 = sympy.simplify(expr1) if isinstance(expr2, six.string_types): expr2 = sympy.simplify(expr2) print(ut.hz_str("Checking if ", repr(expr1), " == ", repr(expr2))) random_point_check = expr1.equals(expr2) if random_point_check is None: failexpr = expr1.equals(expr2, failing_expression=True) print("failexpr = %r" % (failexpr,)) random_point_check = False print("... seems %r" % (random_point_check,)) # return random_point_check expr3 = expr1 - expr2 if not random_point_check and True: common_symbols = expr1.free_symbols.intersection(expr2.free_symbols) if len(common_symbols): y = sympy.symbols("y") # Hack, should be a new symbol symbol = common_symbols.pop() soln1 = sympy.solve(sympy.Eq(sympy.simplify(expr1), y), symbol) soln2 = sympy.solve(sympy.Eq(sympy.simplify(expr2), y), symbol) print("Solving expr1 for common symbol: " + str(soln1)) print("Solving expr2 for common symbol: " + str(soln2)) if soln1 == soln2: print("This seems True") else: print("This seems False") sympy.solve(sympy.Eq(sympy.simplify(expr2), y), "d") print(ut.hz_str("... checking 0 ", repr(expr3))) # Does not always work. print("(not gaurenteed to work) expr3.is_zero = %r" % (expr3.is_zero,)) return expr3.is_zero
def test_pyflann_searches(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_searches Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_searches() >>> # verify results >>> print(result) """ try: num_neighbors = 3 pts = testdata_points(nPts=5743, nDims=2) qpts = testdata_points(nPts=7, nDims=2) import vtool as vt # sample a radius radius = vt.L2(pts[0:1], qpts[0:1])[0] * 2 + 1 flann = pyflann.FLANN() print('NN_OnTheFly') # build nn_index on the fly indices1, dists1 = flann.nn(pts, qpts, num_neighbors, algorithm='hierarchical') print(utool.hz_str('indices1, dists1 = ', indices1, dists1)) _build_params = flann.build_index(pts, algorithm='kmeans') del _build_params print('NN_Index') indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors) print(utool.hz_str('indices2, dists2 = ', indices2, dists2)) # this can only be called on one query point at a time # because the output size is unknown print('NN_Radius, radius=%r' % (radius,)) indices3, dists3 = flann.nn_radius(pts[0], radius) print('indices3 = %r ' % (indices3,)) print('dists3 = %r ' % (dists3,)) assert np.all(dists3 < radius) except Exception as ex: utool.printex(ex, key_list=[ 'query', 'query.shape', 'pts.shape', ], pad_stdout=True) #utool.embed() raise
def print_priors(model, ignore_ttypes=[], title='Priors', color='blue'): ut.colorprint('\n --- %s ---' % (title, ), color=color) for ttype, cpds in model.ttype2_cpds.items(): if ttype not in ignore_ttypes: for fs_ in ut.ichunks(cpds, 4): ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), color)
def test_pyflann_tune(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_tune Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_tune() >>> # verify results >>> print(result) """ print('Create random qpts and database data') pts = testdata_points(nPts=1009) qpts = testdata_points(nPts=7) num_neighbors = 3 #num_data = len(data) # untuned query flann = pyflann.FLANN() index_untuned, dist_untuned = flann.nn(pts, qpts, num_neighbors) # tuned query flannkw = dict(algorithm='autotuned', target_precision=.01, build_weight=0.01, memory_weight=0.0, sample_fraction=0.001) flann_tuned = pyflann.FLANN() tuned_params = flann_tuned.build_index(pts, **flannkw) index_tuned, dist_tuned = flann_tuned.nn_index(qpts, num_neighbors=num_neighbors) print( utool.hz_str('index_tuned, dist_tuned = ', index_tuned, dist_tuned)) print('') print( utool.hz_str('index_untuned, dist_untuned = ', index_untuned, dist_untuned)) print(dist_untuned >= dist_tuned) return tuned_params
def test_pyflann_tune(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_tune Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_tune() >>> # verify results >>> print(result) """ print('Create random qpts and database data') pts = testdata_points(nPts=1009) qpts = testdata_points(nPts=7) num_neighbors = 3 #num_data = len(data) # untuned query flann = pyflann.FLANN() index_untuned, dist_untuned = flann.nn(pts, qpts, num_neighbors) # tuned query flannkw = dict( algorithm='autotuned', target_precision=.01, build_weight=0.01, memory_weight=0.0, sample_fraction=0.001 ) flann_tuned = pyflann.FLANN() tuned_params = flann_tuned.build_index(pts, **flannkw) index_tuned, dist_tuned = flann_tuned.nn_index(qpts, num_neighbors=num_neighbors) print(utool.hz_str('index_tuned, dist_tuned = ', index_tuned, dist_tuned)) print('') print(utool.hz_str('index_untuned, dist_untuned = ', index_untuned, dist_untuned)) print(dist_untuned >= dist_tuned) return tuned_params
def test_pyflann_add_point(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_add_point Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_add_point() >>> # verify results >>> print(result) """ # Test parameters num_neighbors = 3 pts = testdata_points(nPts=1009) qpts = testdata_points(nPts=7) newpts = testdata_points(nPts=1013) # build index print('Build Index') flann = pyflann.FLANN() _build_params = flann.build_index(pts) print(_build_params) print('NN_Index') indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors) assert np.all( indices1 < pts.shape[0]), 'indicies should be less than num pts' print(utool.hz_str('indices1, dists1 = ', indices1, dists1)) print('Adding points') flann.add_points(newpts, rebuild_threshold=2) print('NN_Index') indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors) print(utool.hz_str('indices2, dists2 = ', indices2, dists2)) assert np.any( indices2 > pts.shape[0]), 'should be some indexes into new points' assert np.all(indices2 < pts.shape[0] + newpts.shape[0]), 'but not more than the points being added'
def print_factors(model, factor_list): if hasattr(model, 'var2_cpd'): semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] else: semtypes = [0] * len(factor_list) for type_, factors in ut.group_items(factor_list, semtypes).items(): logger.info('Result Factors (%r)' % (type_, )) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow')
def print_factors(model, factor_list): if hasattr(model, 'var2_cpd'): semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] else: semtypes = [0] * len(factor_list) for type_, factors in ut.group_items(factor_list, semtypes).items(): print('Result Factors (%r)' % (type_,)) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow')
def test_pyflann_add_point(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_add_point Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> # build test data >>> # execute function >>> result = test_pyflann_add_point() >>> # verify results >>> print(result) """ # Test parameters num_neighbors = 3 pts = testdata_points(nPts=1009) qpts = testdata_points(nPts=7) newpts = testdata_points(nPts=1013) # build index print('Build Index') flann = pyflann.FLANN() _build_params = flann.build_index(pts) print(_build_params) print('NN_Index') indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors) assert np.all(indices1 < pts.shape[0]), 'indicies should be less than num pts' print(utool.hz_str('indices1, dists1 = ', indices1, dists1)) print('Adding points') flann.add_points(newpts, rebuild_threshold=2) print('NN_Index') indices2, dists2 = flann.nn_index(qpts, num_neighbors=num_neighbors) print(utool.hz_str('indices2, dists2 = ', indices2, dists2)) assert np.any(indices2 > pts.shape[0]), 'should be some indexes into new points' assert np.all(indices2 < pts.shape[0] + newpts.shape[0]), 'but not more than the points being added'
def evalprint(str_, globals_=None, locals_=None, simplify=False): if globals_ is None: globals_ = ut.get_parent_globals() if locals_ is None: locals_ = ut.get_parent_locals() if isinstance(str_, six.string_types): var = eval(str_, globals_, locals_) else: var = str_ str_ = ut.get_varname_from_stack(var, N=1) if simplify is True: var = sympy.simplify(var) print(ut.hz_str(str_ + ' = ', repr(var)))
def evalprint(str_, globals_=None, locals_=None, simplify=False): if globals_ is None: globals_ = ut.get_parent_globals() if locals_ is None: locals_ = ut.get_parent_locals() if isinstance(str_, six.string_types): var = eval(str_, globals_, locals_) else: var = str_ str_ = ut.get_varname_from_stack(var, N=1) if simplify is True: var = sympy.simplify(var) print(ut.hz_str(str_ + " = ", repr(var)))
def get_inspect_str(qres, ibs=None, name_scoring=False): qres.assert_self() #ut.embed() top_lbls = [' top aids', ' scores', ' rawscores', ' ranks'] top_aids = np.array(qres.get_top_aids(num=6, name_scoring=name_scoring, ibs=ibs), dtype=np.int32) top_scores = np.array(qres.get_aid_scores(top_aids), dtype=np.float64) top_rawscores = np.array(qres.get_aid_scores(top_aids, rawscore=True), dtype=np.float64) top_ranks = np.array(qres.get_aid_ranks(top_aids), dtype=np.int32) top_list = [top_aids, top_scores, top_rawscores, top_ranks] if ibs is not None: top_lbls += [' isgt'] istrue = qres.get_aid_truth(ibs, top_aids) top_list.append(np.array(istrue, dtype=np.int32)) if name_scoring: top_lbls = ['top nid'] + top_lbls top_list = [ibs.get_annot_name_rowids(top_aids)] + top_list top_stack = np.vstack(top_list) #top_stack = np.array(top_stack, dtype=object) top_stack = np.array(top_stack, dtype=np.float32) #np.int32) top_str = np.array_str(top_stack, precision=3, suppress_small=True, max_line_width=200) top_lbl = '\n'.join(top_lbls) inspect_list = ['QueryResult', qres.cfgstr, ] if ibs is not None: gt_ranks = qres.get_gt_ranks(ibs=ibs) gt_scores = qres.get_gt_scores(ibs=ibs) inspect_list.append('gt_ranks = %r' % gt_ranks) inspect_list.append('gt_scores = %r' % gt_scores) nFeatMatch_list = get_num_feats_in_matches(qres) nFeatMatch_stats_str = ut.get_stats_str(nFeatMatch_list, newlines=True, exclude_keys=('nMin', 'nMax')) inspect_list.extend([ 'qaid=%r ' % qres.qaid, ut.hz_str(top_lbl, ' ', top_str), 'num feat matches per annotation stats:', #ut.indent(ut.dict_str(nFeatMatch_stats)), ut.indent(nFeatMatch_stats_str), ]) inspect_str = '\n'.join(inspect_list) #inspect_str = ut.indent(inspect_str, '[INSPECT] ') return inspect_str
def pandas_repr(df): import utool as ut args = [ df.values, ] kwargs = [ ('columns', df.columns.values.tolist()), ('index', df.index.values.tolist()), ] header = 'pd.DataFrame(' footer = ')' arg_parts = [ ut.hz_str(' ', ut.repr2(arg)) for arg in args if arg is not None ] kwarg_parts = [ ut.hz_str(' {}={}'.format(key, ut.repr2(val))) for key, val in kwargs if val is not None ] body = ',\n'.join(arg_parts + kwarg_parts) dfrepr = '\n'.join([header, body, footer]) print(dfrepr) pass
def make_test_similarity(test_case): # toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} # } # tau = np.pi * 2 from wbia import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys( lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS ) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array( ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view')) ) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, 0.5, size=len(p_same)), 0, 0.9) p_comp_m = np.clip(p_comp + rng.normal(0, 0.5, size=len(p_comp)), 0, 0.9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, 0.01, 0.99) logger.info(ut.hz_str(' P = ', ut.repr2(P, precision=2, max_line_width=140))) return P
def make_test_similarity(test_case): #toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} #} # tau = np.pi * 2 from ibeis import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array(ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view'))) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9) p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, .01, .99) print(ut.hz_str(' P = ', ut.array_repr2(P, precision=2, max_line_width=140))) return P
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print( ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print( ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [ list(itertools.combinations(aids, 2)) for aids in grouped_aids ] aid_matches = [ list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids) ] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes( graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids) }) nx.set_node_attributes( graph, 'label', {node: 'n%r' % (node[1], ) for node in name_nodes}) nx.set_node_attributes( graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def draw_em_graph(P, Pn, PL, gam, num_labels): """ python -m ibeis.algo.hots.testem test_em --show --no-cnn """ num_labels = PL.shape[1] name_nodes = ['N%d' % x for x in list(range(1, num_labels + 1))] annot_nodes = ['X%d' % x for x in list(range(1, len(Pn) + 1))] nodes = name_nodes + annot_nodes PL2 = gam[:, num_labels:].T PL2 += .01 PL2 = PL2 / PL2.sum(axis=1)[:, None] # PL2 = PL2 / np.linalg.norm(PL2, axis=0) zero_part = np.zeros((num_labels, len(Pn) + num_labels)) prob_part = np.hstack([PL2, Pn]) print(ut.hz_str(' PL2 = ', ut.repr2(PL2, precision=2))) # Redo p with posteriors if ut.get_argflag('--postem'): P = np.vstack([zero_part, prob_part]) weight_matrix = P # NOQA graph = ut.nx_from_matrix(P, nodes=nodes) graph = graph.to_directed() # delete graph dup_edges = [] seen_ = set([]) for u, v in graph.edges(): if u < v: u, v = v, u if (u, v) not in seen_: seen_.add((u, v)) else: dup_edges.append((u, v)) graph.remove_edges_from(dup_edges) import plottool_ibeis as pt import networkx as nx if len(name_nodes) == 3 and len(annot_nodes) == 4: graph.nodes[annot_nodes[0]]['pos'] = (20., 200.) graph.nodes[annot_nodes[1]]['pos'] = (220., 200.) graph.nodes[annot_nodes[2]]['pos'] = (20., 100.) graph.nodes[annot_nodes[3]]['pos'] = (220., 100.) graph.nodes[name_nodes[0]]['pos'] = (10., 300.) graph.nodes[name_nodes[1]]['pos'] = (120., 300.) graph.nodes[name_nodes[2]]['pos'] = (230., 300.) nx.set_node_attributes(graph, name='pin', values='true') print('annot_nodes = %r' % (annot_nodes, )) print('name_nodes = %r' % (name_nodes, )) for u in annot_nodes: for v in name_nodes: if graph.has_edge(u, v): print('1) u, v = %r' % ((u, v), )) graph.edge[u][v]['taillabel'] = graph.edge[u][v]['label'] graph.edge[u][v]['color'] = pt.ORANGE graph.edge[u][v]['labelcolor'] = pt.BLUE del graph.edge[u][v]['label'] elif graph.has_edge(v, u): print('2) u, v = %r' % ((u, v), )) graph.edge[v][u]['headlabel'] = graph.edge[v][u]['label'] graph.edge[v][u]['color'] = pt.ORANGE graph.edge[v][u]['labelcolor'] = pt.BLUE del graph.edge[v][u]['label'] else: print((u, v)) print('!!') # import itertools # name_const_edges = [(u, v, {'style': 'invis'}) for u, v in itertools.combinations(name_nodes, 2)] # graph.add_edges_from(name_const_edges) # nx.set_edge_attributes(graph, name='constraint', values={edge: False for edge in graph.edges() if edge[0] == 'b' or edge[1] == 'b'}) # nx.set_edge_attributes(graph, name='constraint', values={edge: False for edge in graph.edges() if edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if edge[0] in name_nodes or edge[1] in name_nodes}) # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['a', 'b'] and edge[1] in ['a', 'b']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['c'] or edge[1] in ['c']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['a'] or edge[1] in ['a']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, name='constraint', values={edge: True for edge in graph.edges() if (edge[0] in ['b'] or edge[1] in ['b']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # graph.add_edges_from([('root', n) for n in nodes]) # {node: 'names' for node in name_nodes}) nx.set_node_attributes(graph, name='color', values={node: pt.RED for node in name_nodes}) # nx.set_node_attributes(graph, name='width', values={node: 20 for node in nodes}) # nx.set_node_attributes(graph, name='height', values={node: 20 for node in nodes}) #nx.set_node_attributes(graph, name='group', values={node: 'names' for node in name_nodes}) #nx.set_node_attributes(graph, name='group', values={node: 'annots' for node in annot_nodes}) nx.set_node_attributes(graph, name='groupid', values={node: 'names' for node in name_nodes}) nx.set_node_attributes(graph, name='groupid', values={node: 'annots' for node in annot_nodes}) graph.graph['clusterrank'] = 'local' # graph.graph['groupattrs'] = { # 'names': {'rankdir': 'LR', 'rank': 'source'}, # 'annots': {'rankdir': 'TB', 'rank': 'source'}, # } ut.nx_delete_edge_attr(graph, 'weight') # pt.show_nx(graph, fontsize=10, layoutkw={'splines': 'spline', 'prog': 'dot', 'sep': 2.0}, verbose=1) layoutkw = { # 'rankdir': 'LR', 'splines': 'spline', # 'splines': 'ortho', # 'splines': 'curved', # 'compound': 'True', # 'prog': 'dot', 'prog': 'neato', # 'packMode': 'clust', # 'sep': 4, # 'nodesep': 1, # 'ranksep': 1, } #pt.show_nx(graph, fontsize=12, layoutkw=layoutkw, verbose=0, as_directed=False) pt.show_nx(graph, fontsize=6, fontname='Ubuntu', layoutkw=layoutkw, verbose=0, as_directed=False) pt.interactions.zoom_factory()
def try_em2(prob_names, prob_annots=None): """ assert prob_names.shape == (nAnnots, nNames) """ learn_rate = 0.05 num_iters = 1 # Matrix if unary probabilites, The probability that each node takes on a # given label, independent of its edges. num_annots, num_names = prob_names.shape # prevent zero probabilities prob_names_ = prob_names + 1E-9 prob_names_ /= prob_names_.sum(axis=1)[:, None] if prob_annots is None: prob_annots_ = np.full((num_annots, num_annots), 1 / num_annots) prob_annots_[np.diag_indices(num_annots)] *= 1.01 # perterb rng = np.random.RandomState(0) prob_annots_ += (rng.randn(*prob_annots_.shape)) / 100 prob_annots_ /= prob_annots_.sum(axis=1)[:, None] prob_annots_ = (prob_annots_.T + prob_annots_) / 2 else: prob_annots_ = prob_annots + 1E-9 prob_annots_ /= prob_annots_.sum(axis=1)[:, None] # Stack everything into a single matrix prob_part = np.hstack([prob_names_, prob_annots_]) zero_part = np.zeros((num_names, num_annots + num_names)) prior = np.vstack([zero_part, prob_part]) # Gamma will hold a probability distribution over the nodes # The labeled nodes must match themselves. # The unlabeld nodes are initialized with a uniform distribution. gam = np.hstack( [np.eye(num_names), np.ones((num_names, num_annots)) / num_names]) verbose = 1 if verbose: print('Initialize') print('num_names = %r' % (num_names, )) print( ut.hz_str( 'prior = ', ut.repr2(prob_part[:, :], precision=2, max_line_width=140, suppress_small=True))) print( ut.hz_str( 'gamma = ', ut.repr2(gam[:, :], max_line_width=140, precision=2, suppress_small=True))) #print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2))) delta_i = np.zeros(num_names) def dErr(i, gam, prior, delta_i=delta_i, num_names=num_names): # exepcted liklihood is cross entropy error delta_i[:] = 0 # Compute the gradient of the cross entropy error # This is over both names and annotations jdxs = [j for j in range(prior.shape[0]) if j != i] prior_ij = prior[i, jdxs] np.log(prior_ij / (1 - prior_ij)) gam[:, jdxs] for j in range(prior.shape[0]): if i != j: delta_i += gam[:, j] * np.log(prior[i, j] / (1 - prior[i, j])) # compute the projected gradient delta_i_hat = delta_i - delta_i.sum() / num_names return delta_i_hat # Build node for each annot and each name num_nodes = num_annots + num_names # Maximies the expected liklihood of gamma dGam = np.zeros(gam.shape) # for count in range(num_iters): for count in ut.ProgIter(range(num_iters), label='EM', bs=True): # Compute error gradient for i in range(num_names, num_nodes): dGam[:, i] = dErr(i, gam, prior) # Make a step in the gradient direction # print(ut.hz_str(' dGam = ', ut.repr2(dGam, max_line_width=140, precision=2))) gam = gam + learn_rate * dGam # Normalize gam = np.clip(gam, 0, 1) for i in range(num_names, num_nodes): gam[:, i] = gam[:, i] / np.sum(gam[:, i]) # print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2))) if verbose: print( ut.hz_str( ' gamma = ', ut.repr2(gam[:, num_names:], max_line_width=140, precision=2, suppress_small=True))) print('Finished') return gam
def try_em(): """ CommandLine: python -m ibeis.algo.hots.testem test_em --show python -m ibeis.algo.hots.testem test_em --show --no-cnn Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.testem import * # NOQA >>> P, Pn, PL, gam, num_labels = test_em() >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> pt.qt4ensure() >>> draw_em_graph(P, Pn, PL, gam, num_labels) >>> ut.show_if_requested() """ print('EM') # Matrix if unary probabilites, The probability that each node takes on a # given label, independent of its edges. test_case = [ { 'name': 1, 'view': 'L' }, { 'name': 1, 'view': 'L' }, { 'name': 2, 'view': 'L' }, { 'name': 2, 'view': 'R' }, { 'name': 2, 'view': 'B' }, { 'name': 3, 'view': 'L' }, #{'name': 3, 'view': 'L'}, #{'name': 4, 'view': 'L'}, ] def make_test_similarity(test_case): #toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} #} # tau = np.pi * 2 from ibeis import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool_ibeis as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array( ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view'))) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9) p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, .01, .99) print(ut.hz_str(' P = ', ut.repr2(P, precision=2, max_line_width=140))) return P Pn = make_test_similarity(test_case) if False: Pn = np.array( np.matrix(b""" .0 .7 .3 .2 .4 .5; .7 .0 .4 .4 .3 .5; .3 .4 .0 .6 .1 .5; .2 .4 .6 .0 .2 .3; .4 .3 .1 .2 .0 .8; .5 .5 .5 .3 .8 .0 """)) PL = np.array( np.matrix(b""" .7 .5 .5; .8 .4 .3; .5 .7 .3; .5 .8 .4; .3 .2 .8; .5 .5 .8 """)) if True: Pn = np.array( np.matrix(b""" 1.0 0.7 0.4 0.2; 0.7 1.0 0.4 0.4; 0.4 0.4 1.0 0.6; 0.2 0.4 0.6 1.0 """)) PL = np.array( np.matrix(b""" 0.7 0.5 0.5; 0.8 0.4 0.3; 0.5 0.7 0.3; 0.5 0.8 0.4 """)) num_nodes = Pn.shape[0] for num_labels in range(1, 2): #Pn = np.array(np.matrix( # b""" # .0 .7 .3 .2 .4 .5; # .7 .0 .4 .4 .3 .5; # .3 .4 .0 .6 .1 .5; # .2 .4 .6 .0 .2 .3; # .4 .3 .1 .2 .0 .8; # .5 .5 .5 .3 .8 .0 # """)) # Uniform distribution over labels if 0: PL = np.ones((num_nodes, num_labels)) / num_labels # Give nodes preferences PL[np.diag_indices(num_labels)] *= 1.01 PL /= np.linalg.norm(PL, axis=0) # PL[0, :] = .01 / (num_labels - 1) # PL[0, 0] = .99 else: PL /= np.linalg.norm(PL, axis=0) # Number of nodes num_nodes = Pn.shape[0] # Number of classes num_labels = PL.shape[1] #num_labels = num_nodes #if 0 or num_labels != 3: # PL = np.ones((num_nodes, num_labels)) / num_labels # # PL[0, :] = .01 / (num_labels - 1) # # PL[0, 0] = .99 d = num_labels + num_nodes # Stack everything into a single matrix zero_part = np.zeros((num_labels, num_nodes + num_labels)) prob_part = np.hstack([PL, Pn]) #print(ut.hz_str(' prob_part = ', ut.repr2(prob_part[:, :], precision=2))) P = np.vstack([zero_part, prob_part]) # Gamma will hold a probability distribution over the nodes # The labeled nodes must match themselves. # The unlabeld nodes are initialized with a uniform distribution. gam = np.hstack([ np.eye(num_labels), np.ones((num_labels, num_nodes)) / num_labels ]) print('Initialize') print('num_labels = %r' % (num_labels, )) # print(ut.hz_str(' gamma = ', ut.repr2(gam[:, num_labels:], max_line_width=140, precision=2))) print( ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2))) delta_i = np.zeros(num_labels) def dErr(i, gam, P, delta_i=delta_i): # exepcted liklihood is cross entropy error delta_i[:] = 0 # Compute the gradient of the cross entropy error # This is over both names and annotations for j in range(d): if i != j: delta_i += gam[:, j] * np.log(P[i, j] / (1 - P[i, j])) # compute the projected gradient delta_i_hat = delta_i - delta_i.sum() / num_labels return delta_i_hat # Maximies the expected liklihood of gamma learn_rate = 0.05 num_iters = 1000 dGam = np.zeros(gam.shape) # for count in range(num_iters): for count in ut.ProgIter(range(num_iters), label='EM', bs=True): # Compute error gradient for i in range(num_labels, d): dGam[:, i] = dErr(i, gam, P) # Make a step in the gradient direction # print(ut.hz_str(' dGam = ', ut.repr2(dGam, max_line_width=140, precision=2))) gam = gam + learn_rate * dGam # Normalize gam = np.clip(gam, 0, 1) for i in range(num_labels, d): gam[:, i] = gam[:, i] / np.sum(gam[:, i]) # print(ut.hz_str(' gamma = ', ut.repr2(gam, max_line_width=140, precision=2))) # print(ut.hz_str(' gamma = ', ut.repr2(gam[:, num_labels:], max_line_width=140, precision=2))) print('Finished') return P, Pn, PL, gam, num_labels
def test_em2(prob_names, prob_annots=None): """ assert prob_names.shape == (nAnnots, nNames) """ learn_rate = 0.05 num_iters = 1 # Matrix if unary probabilites, The probability that each node takes on a # given label, independent of its edges. num_annots, num_names = prob_names.shape # prevent zero probabilities prob_names_ = prob_names + 1E-9 prob_names_ /= prob_names_.sum(axis=1)[:, None] if prob_annots is None: prob_annots_ = np.full((num_annots, num_annots), 1 / num_annots) prob_annots_[np.diag_indices(num_annots)] *= 1.01 # perterb rng = np.random.RandomState(0) prob_annots_ += (rng.randn(*prob_annots_.shape)) / 100 prob_annots_ /= prob_annots_.sum(axis=1)[:, None] prob_annots_ = (prob_annots_.T + prob_annots_) / 2 else: prob_annots_ = prob_annots + 1E-9 prob_annots_ /= prob_annots_.sum(axis=1)[:, None] # Stack everything into a single matrix prob_part = np.hstack([prob_names_, prob_annots_]) zero_part = np.zeros((num_names, num_annots + num_names)) prior = np.vstack([zero_part, prob_part]) # Gamma will hold a probability distribution over the nodes # The labeled nodes must match themselves. # The unlabeld nodes are initialized with a uniform distribution. gam = np.hstack([np.eye(num_names), np.ones((num_names, num_annots)) / num_names]) verbose = 1 if verbose: print('Initialize') print('num_names = %r' % (num_names,)) print(ut.hz_str('prior = ', ut.array2string2(prob_part[:, :], precision=2, max_line_width=140, suppress_small=True))) print(ut.hz_str('gamma = ', ut.array2string2(gam[:, :], max_line_width=140, precision=2, suppress_small=True))) #print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2))) delta_i = np.zeros(num_names) def dErr(i, gam, prior, delta_i=delta_i, num_names=num_names): # exepcted liklihood is cross entropy error delta_i[:] = 0 # Compute the gradient of the cross entropy error # This is over both names and annotations jdxs = [j for j in range(prior.shape[0]) if j != i] prior_ij = prior[i, jdxs] np.log(prior_ij / (1 - prior_ij)) gam[:, jdxs] for j in range(prior.shape[0]): if i != j: delta_i += gam[:, j] * np.log(prior[i, j] / (1 - prior[i, j])) # compute the projected gradient delta_i_hat = delta_i - delta_i.sum() / num_names return delta_i_hat # Build node for each annot and each name num_nodes = num_annots + num_names # Maximies the expected liklihood of gamma dGam = np.zeros(gam.shape) # for count in range(num_iters): for count in ut.ProgIter(range(num_iters), label='EM', bs=True): # Compute error gradient for i in range(num_names, num_nodes): dGam[:, i] = dErr(i, gam, prior) # Make a step in the gradient direction # print(ut.hz_str(' dGam = ', ut.array_repr2(dGam, max_line_width=140, precision=2))) gam = gam + learn_rate * dGam # Normalize gam = np.clip(gam, 0, 1) for i in range(num_names, num_nodes): gam[:, i] = gam[:, i] / np.sum(gam[:, i]) # print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2))) if verbose: print(ut.hz_str(' gamma = ', ut.array2string2(gam[:, num_names:], max_line_width=140, precision=2, suppress_small=True))) print('Finished') return gam
def draw_em_graph(P, Pn, PL, gam, num_labels): """ python -m ibeis.algo.hots.testem test_em --show --no-cnn """ num_labels = PL.shape[1] name_nodes = ['N%d' % x for x in list(range(1, num_labels + 1))] #annot_nodes = ut.chr_range(len(Pn), base='A') annot_nodes = ['X%d' % x for x in list(range(1, len(Pn) + 1))] # name_nodes = ut.chr_range(num_labels, base='A') nodes = name_nodes + annot_nodes PL2 = gam[:, num_labels:].T PL2 += .01 PL2 = PL2 / PL2.sum(axis=1)[:, None] # PL2 = PL2 / np.linalg.norm(PL2, axis=0) zero_part = np.zeros((num_labels, len(Pn) + num_labels)) prob_part = np.hstack([PL2, Pn]) print(ut.hz_str(' PL2 = ', ut.array_repr2(PL2, precision=2))) # Redo p with posteriors if ut.get_argflag('--postem'): P = np.vstack([zero_part, prob_part]) weight_matrix = P # NOQA graph = ut.nx_from_matrix(P, nodes=nodes) graph = graph.to_directed() # delete graph dup_edges = [] seen_ = set([]) for u, v in graph.edges(): if u < v: u, v = v, u if (u, v) not in seen_: seen_.add((u, v)) else: dup_edges.append((u, v)) graph.remove_edges_from(dup_edges) import plottool as pt import networkx as nx if len(name_nodes) == 3 and len(annot_nodes) == 4: graph.node[annot_nodes[0]]['pos'] = (20., 200.) graph.node[annot_nodes[1]]['pos'] = (220., 200.) graph.node[annot_nodes[2]]['pos'] = (20., 100.) graph.node[annot_nodes[3]]['pos'] = (220., 100.) graph.node[name_nodes[0]]['pos'] = (10., 300.) graph.node[name_nodes[1]]['pos'] = (120., 300.) graph.node[name_nodes[2]]['pos'] = (230., 300.) nx.set_node_attributes(graph, 'pin', 'true') print('annot_nodes = %r' % (annot_nodes,)) print('name_nodes = %r' % (name_nodes,)) for u in annot_nodes: for v in name_nodes: if graph.has_edge(u, v): print('1) u, v = %r' % ((u, v),)) graph.edge[u][v]['taillabel'] = graph.edge[u][v]['label'] graph.edge[u][v]['color'] = pt.ORANGE graph.edge[u][v]['labelcolor'] = pt.BLUE del graph.edge[u][v]['label'] elif graph.has_edge(v, u): print('2) u, v = %r' % ((u, v),)) graph.edge[v][u]['headlabel'] = graph.edge[v][u]['label'] graph.edge[v][u]['color'] = pt.ORANGE graph.edge[v][u]['labelcolor'] = pt.BLUE del graph.edge[v][u]['label'] else: print((u, v)) print('!!') # import itertools # name_const_edges = [(u, v, {'style': 'invis'}) for u, v in itertools.combinations(name_nodes, 2)] # graph.add_edges_from(name_const_edges) # nx.set_edge_attributes(graph, 'constraint', {edge: False for edge in graph.edges() if edge[0] == 'b' or edge[1] == 'b'}) # nx.set_edge_attributes(graph, 'constraint', {edge: False for edge in graph.edges() if edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if edge[0] in name_nodes or edge[1] in name_nodes}) # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['a', 'b'] and edge[1] in ['a', 'b']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['c'] or edge[1] in ['c']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['a'] or edge[1] in ['a']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # nx.set_edge_attributes(graph, 'constraint', {edge: True for edge in graph.edges() if (edge[0] in ['b'] or edge[1] in ['b']) and edge[0] in annot_nodes and edge[1] in annot_nodes}) # graph.add_edges_from([('root', n) for n in nodes]) # {node: 'names' for node in name_nodes}) nx.set_node_attributes(graph, 'color', {node: pt.RED for node in name_nodes}) # nx.set_node_attributes(graph, 'width', {node: 20 for node in nodes}) # nx.set_node_attributes(graph, 'height', {node: 20 for node in nodes}) #nx.set_node_attributes(graph, 'group', {node: 'names' for node in name_nodes}) #nx.set_node_attributes(graph, 'group', {node: 'annots' for node in annot_nodes}) nx.set_node_attributes(graph, 'groupid', {node: 'names' for node in name_nodes}) nx.set_node_attributes(graph, 'groupid', {node: 'annots' for node in annot_nodes}) graph.graph['clusterrank'] = 'local' # graph.graph['groupattrs'] = { # 'names': {'rankdir': 'LR', 'rank': 'source'}, # 'annots': {'rankdir': 'TB', 'rank': 'source'}, # } ut.nx_delete_edge_attr(graph, 'weight') # pt.show_nx(graph, fontsize=10, layoutkw={'splines': 'spline', 'prog': 'dot', 'sep': 2.0}, verbose=1) layoutkw = { # 'rankdir': 'LR', 'splines': 'spline', # 'splines': 'ortho', # 'splines': 'curved', # 'compound': 'True', # 'prog': 'dot', 'prog': 'neato', # 'packMode': 'clust', # 'sep': 4, # 'nodesep': 1, # 'ranksep': 1, } #pt.show_nx(graph, fontsize=12, layoutkw=layoutkw, verbose=0, as_directed=False) pt.show_nx(graph, fontsize=6, fontname='Ubuntu', layoutkw=layoutkw, verbose=0, as_directed=False) pt.interactions.zoom_factory()
def temp_model(num_annots, num_names, score_evidence=[], name_evidence=[], other_evidence={}, noquery=False, verbose=None, **kwargs): if verbose is None: verbose = ut.VERBOSE method = kwargs.pop('method', None) model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs) if verbose: model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE]) model, evidence, soft_evidence = update_model_evidence( model, name_evidence, score_evidence, other_evidence) if verbose and len(soft_evidence) != 0: model.print_priors(ignore_ttypes=[MATCH_TTYPE, SCORE_TTYPE], title='Soft Evidence', color='green') # if verbose: # ut.colorprint('\n --- Soft Evidence ---', 'white') # for ttype, cpds in model.ttype2_cpds.items(): # if ttype != MATCH_TTYPE: # for fs_ in ut.ichunks(cpds, 4): # ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), # 'green') if verbose: ut.colorprint('\n --- Inference ---', 'red') if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery: evidence = model._ensure_internal_evidence(evidence) query_vars = [] query_vars += ut.list_getattr(model.ttype2_cpds[NAME_TTYPE], 'variable') # query_vars += ut.list_getattr(model.ttype2_cpds[MATCH_TTYPE], 'variable') query_vars = ut.setdiff(query_vars, evidence.keys()) # query_vars = ut.setdiff(query_vars, soft_evidence.keys()) query_results = cluster_query(model, query_vars, evidence, soft_evidence, method) else: query_results = {} factor_list = query_results['factor_list'] if verbose: if verbose: logger.info('+--------') semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] for type_, factors in ut.group_items(factor_list, semtypes).items(): logger.info('Result Factors (%r)' % (type_, )) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow') logger.info('MAP assignments') top_assignments = query_results.get('top_assignments', []) tmp = [] for lbl, val in top_assignments: tmp.append('%s : %.4f' % (ut.repr2(lbl), val)) logger.info(ut.align('\n'.join(tmp), ' :')) logger.info('L_____\n') showkw = dict(evidence=evidence, soft_evidence=soft_evidence, **query_results) from wbia.algo.hots import pgm_viz pgm_viz.show_model(model, **showkw) return (model, evidence, query_results)
def make_graph(infr, show=False): import networkx as nx import itertools cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() thresh = infr.choose_thresh() # Simply cut any edge with a weight less than a threshold qaid_list = [cm.qaid for cm in cm_list] postcut = prob_names > thresh qxs, nxs = np.where(postcut) if False: kw = dict(precision=2, max_line_width=140, suppress_small=True) print(ut.hz_str('prob_names = ', ut.array2string2((prob_names), **kw))) print(ut.hz_str('postcut = ', ut.array2string2((postcut).astype(np.int), **kw))) matching_qaids = ut.take(qaid_list, qxs) matched_nids = ut.take(unique_nids, nxs) qreq_ = infr.qreq_ nodes = ut.unique(qreq_.qaids.tolist() + qreq_.daids.tolist()) if not hasattr(qreq_, 'dnids'): qreq_.dnids = qreq_.ibs.get_annot_nids(qreq_.daids) qreq_.qnids = qreq_.ibs.get_annot_nids(qreq_.qaids) dnid2_daids = ut.group_items(qreq_.daids, qreq_.dnids) grouped_aids = dnid2_daids.values() matched_daids = ut.take(dnid2_daids, matched_nids) name_cliques = [list(itertools.combinations(aids, 2)) for aids in grouped_aids] aid_matches = [list(ut.product([qaid], daids)) for qaid, daids in zip(matching_qaids, matched_daids)] graph = nx.Graph() graph.add_nodes_from(nodes) graph.add_edges_from(ut.flatten(name_cliques)) graph.add_edges_from(ut.flatten(aid_matches)) #matchless_quries = ut.take(qaid_list, ut.index_complement(qxs, len(qaid_list))) name_nodes = [('nid', l) for l in qreq_.dnids] db_aid_nid_edges = list(zip(qreq_.daids, name_nodes)) #query_aid_nid_edges = list(zip(matching_qaids, [('nid', l) for l in matched_nids])) #G = nx.Graph() #G.add_nodes_from(matchless_quries) #G.add_edges_from(db_aid_nid_edges) #G.add_edges_from(query_aid_nid_edges) graph.add_edges_from(db_aid_nid_edges) if infr.user_feedback is not None: user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) p_bg = 0.0 part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 for aid1, aid2, p_same in zip(user_feedback['aid1'], user_feedback['aid2'], p_same_list): if p_same > .5: if not graph.has_edge(aid1, aid2): graph.add_edge(aid1, aid2) else: if graph.has_edge(aid1, aid2): graph.remove_edge(aid1, aid2) if show: import plottool as pt nx.set_node_attributes(graph, 'color', {aid: pt.LIGHT_PINK for aid in qreq_.daids}) nx.set_node_attributes(graph, 'color', {aid: pt.TRUE_BLUE for aid in qreq_.qaids}) nx.set_node_attributes(graph, 'color', { aid: pt.LIGHT_PURPLE for aid in np.intersect1d(qreq_.qaids, qreq_.daids)}) nx.set_node_attributes(graph, 'label', {node: 'n%r' % (node[1],) for node in name_nodes}) nx.set_node_attributes(graph, 'color', {node: pt.LIGHT_GREEN for node in name_nodes}) if show: import plottool as pt pt.show_nx(graph, layoutkw={'prog': 'neato'}, verbose=False) return graph
def dummy_example_depcacahe(): r""" CommandLine: python -m dtool.example_depcache --exec-dummy_example_depcacahe Example: >>> # ENABLE_DOCTEST >>> from dtool.example_depcache import * # NOQA >>> depc = dummy_example_depcacahe() >>> ut.show_if_requested() """ fname = None # fname = 'dummy_default_depcache' fname = ':memory:' depc = testdata_depc(fname) tablename = 'fgweight' # print('[test] fgweight_path =\n%s' % (ut.repr3(depc.get_dependencies(tablename), nl=1),)) # print('[test] keypoint =\n%s' % (ut.repr3(depc.get_dependencies('keypoint'), nl=1),)) # print('[test] descriptor =\n%s' % (ut.repr3(depc.get_dependencies('descriptor'), nl=1),)) # print('[test] spam =\n%s' % (ut.repr3(depc.get_dependencies('spam'), nl=1),)) root_rowids = [5, 3] desc_rowids = depc.get_rowids('descriptor', root_rowids) # NOQA table = depc[tablename] # NOQA #example_getter_methods(depc, 'vsmany', root_rowids) # example_getter_methods(depc, 'chipmask', root_rowids) # example_getter_methods(depc, 'keypoint', root_rowids) # example_getter_methods(depc, 'chip', root_rowids) test_getters(depc) #import plottool as pt # pt.ensure_pylab_qt4() graph = depc.make_graph() # NOQA #pt.show_nx(graph) print('---------- 111 -----------') # Try testing the algorithm req = depc.new_request('vsmany', root_rowids, root_rowids, {}) print('req = %r' % (req,)) req.execute() print('---------- 222 -----------') cfgdict = {'sver_on': False} req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict) req.execute() print('---------- 333 -----------') cfgdict = {'sver_on': False, 'adapt_shape': False} req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict) req.execute() print('---------- 444 -----------') req = depc.new_request('vsmany', root_rowids, root_rowids, {}) req.execute() #ut.InstanceList( db = list(depc.fname_to_db.values())[0] #db_list = ut.InstanceList(depc.fname_to_db.values()) #db_list.print_table_csv('config', exclude_columns='config_strid') print('config table') column_list, column_names = db.get_table_column_data(tablename, ['config_strid']) print('\n'.join([ut.hz_str(*list(ut.interleave((r, [', '] * (len(r) - 1))))) for r in list(zip(*[[ut.repr3(r, nl=2) for r in col] for col in column_list]))])) return depc
def test_em(): """ CommandLine: python -m ibeis.algo.hots.testem test_em --show python -m ibeis.algo.hots.testem test_em --show --no-cnn Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.testem import * # NOQA >>> P, Pn, PL, gam, num_labels = test_em() >>> ut.quit_if_noshow() >>> import plottool as pt >>> pt.qt4ensure() >>> draw_em_graph(P, Pn, PL, gam, num_labels) >>> ut.show_if_requested() """ print('EM') # Matrix if unary probabilites, The probability that each node takes on a # given label, independent of its edges. test_case = [ {'name': 1, 'view': 'L'}, {'name': 1, 'view': 'L'}, {'name': 2, 'view': 'L'}, {'name': 2, 'view': 'R'}, {'name': 2, 'view': 'B'}, {'name': 3, 'view': 'L'}, #{'name': 3, 'view': 'L'}, #{'name': 4, 'view': 'L'}, ] def make_test_similarity(test_case): #toy_params = { # True: {'mu': 0.9, 'sigma': .1}, # False: {'mu': 0.1, 'sigma': .4} #} # tau = np.pi * 2 from ibeis import constants as const # view_to_ori = const.VIEWTEXT_TO_YAW_RADIANS view_to_ori = ut.map_dict_keys(lambda x: const.YAWALIAS[x], const.VIEWTEXT_TO_YAW_RADIANS) # view_to_ori = { # 'F': -1 * tau / 4, # 'L': 0 * tau / 4, # 'B': 1 * tau / 4, # 'R': 2 * tau / 4, # } import vtool as vt nid_list = np.array(ut.dict_take_column(test_case, 'name')) yaw_list = np.array(ut.dict_take(view_to_ori, ut.dict_take_column(test_case, 'view'))) rng = np.random.RandomState(0) pmat = [] for idx in range(len(test_case)): nid = nid_list[idx] yaw = yaw_list[idx] p_same = nid == nid_list p_comp = 1 - vt.ori_distance(yaw_list, yaw) / np.pi # estimate noisy measurements p_same_m = np.clip(p_same + rng.normal(0, .5, size=len(p_same)), 0, .9) p_comp_m = np.clip(p_comp + rng.normal(0, .5, size=len(p_comp)), 0, .9) # p_same_and_comp = p_same_m * p_comp_m pmat.append(p_same_and_comp) # P = np.array(pmat) P[np.diag_indices(len(P))] = 0 P = P + P.T / 2 P = np.clip(P, .01, .99) print(ut.hz_str(' P = ', ut.array_repr2(P, precision=2, max_line_width=140))) return P Pn = make_test_similarity(test_case) if False: Pn = np.array(np.matrix( b""" .0 .7 .3 .2 .4 .5; .7 .0 .4 .4 .3 .5; .3 .4 .0 .6 .1 .5; .2 .4 .6 .0 .2 .3; .4 .3 .1 .2 .0 .8; .5 .5 .5 .3 .8 .0 """)) PL = np.array(np.matrix( b""" .7 .5 .5; .8 .4 .3; .5 .7 .3; .5 .8 .4; .3 .2 .8; .5 .5 .8 """)) if True: Pn = np.array(np.matrix( b""" 1.0 0.7 0.4 0.2; 0.7 1.0 0.4 0.4; 0.4 0.4 1.0 0.6; 0.2 0.4 0.6 1.0 """)) PL = np.array(np.matrix( b""" 0.7 0.5 0.5; 0.8 0.4 0.3; 0.5 0.7 0.3; 0.5 0.8 0.4 """)) num_nodes = Pn.shape[0] for num_labels in range(1, 2): #Pn = np.array(np.matrix( # b""" # .0 .7 .3 .2 .4 .5; # .7 .0 .4 .4 .3 .5; # .3 .4 .0 .6 .1 .5; # .2 .4 .6 .0 .2 .3; # .4 .3 .1 .2 .0 .8; # .5 .5 .5 .3 .8 .0 # """)) # Uniform distribution over labels if 0: PL = np.ones((num_nodes, num_labels)) / num_labels # Give nodes preferences PL[np.diag_indices(num_labels)] *= 1.01 PL /= np.linalg.norm(PL, axis=0) # PL[0, :] = .01 / (num_labels - 1) # PL[0, 0] = .99 else: PL /= np.linalg.norm(PL, axis=0) # Number of nodes num_nodes = Pn.shape[0] # Number of classes num_labels = PL.shape[1] #num_labels = num_nodes #if 0 or num_labels != 3: # PL = np.ones((num_nodes, num_labels)) / num_labels # # PL[0, :] = .01 / (num_labels - 1) # # PL[0, 0] = .99 d = num_labels + num_nodes # Stack everything into a single matrix zero_part = np.zeros((num_labels, num_nodes + num_labels)) prob_part = np.hstack([PL, Pn]) #print(ut.hz_str(' prob_part = ', ut.array_repr2(prob_part[:, :], precision=2))) P = np.vstack([zero_part, prob_part]) # Gamma will hold a probability distribution over the nodes # The labeled nodes must match themselves. # The unlabeld nodes are initialized with a uniform distribution. gam = np.hstack([np.eye(num_labels), np.ones((num_labels, num_nodes)) / num_labels]) print('Initialize') print('num_labels = %r' % (num_labels,)) # print(ut.hz_str(' gamma = ', ut.array_repr2(gam[:, num_labels:], max_line_width=140, precision=2))) print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2))) delta_i = np.zeros(num_labels) def dErr(i, gam, P, delta_i=delta_i): # exepcted liklihood is cross entropy error delta_i[:] = 0 # Compute the gradient of the cross entropy error # This is over both names and annotations for j in range(d): if i != j: delta_i += gam[:, j] * np.log(P[i, j] / (1 - P[i, j])) # compute the projected gradient delta_i_hat = delta_i - delta_i.sum() / num_labels return delta_i_hat # Maximies the expected liklihood of gamma learn_rate = 0.05 num_iters = 1000 dGam = np.zeros(gam.shape) # for count in range(num_iters): for count in ut.ProgIter(range(num_iters), label='EM', bs=True): # Compute error gradient for i in range(num_labels, d): dGam[:, i] = dErr(i, gam, P) # Make a step in the gradient direction # print(ut.hz_str(' dGam = ', ut.array_repr2(dGam, max_line_width=140, precision=2))) gam = gam + learn_rate * dGam # Normalize gam = np.clip(gam, 0, 1) for i in range(num_labels, d): gam[:, i] = gam[:, i] / np.sum(gam[:, i]) # print(ut.hz_str(' gamma = ', ut.array_repr2(gam, max_line_width=140, precision=2))) # print(ut.hz_str(' gamma = ', ut.array_repr2(gam[:, num_labels:], max_line_width=140, precision=2))) print('Finished') return P, Pn, PL, gam, num_labels
def classification_report2(y_true, y_pred, target_names=None, sample_weight=None, verbose=True): """ References: https://csem.flinders.edu.au/research/techreps/SIE07001.pdf https://www.mathworks.com/matlabcentral/fileexchange/5648-bm-cm-?requestedDomain=www.mathworks.com Jurman, Riccadonna, Furlanello, (2012). A Comparison of MCC and CEN Error Measures in MultiClass Prediction Example: >>> from ibeis.algo.verif.sklearn_utils import * # NOQA >>> y_true = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3] >>> y_pred = [1, 2, 1, 3, 1, 2, 2, 3, 2, 2, 3, 3, 2, 3, 3, 3, 1, 3] >>> target_names = None >>> sample_weight = None >>> verbose = True >>> report = classification_report2(y_true, y_pred, verbose=verbose) Ignore: >>> size = 100 >>> rng = np.random.RandomState(0) >>> p_classes = np.array([.90, .05, .05][0:2]) >>> p_classes = p_classes / p_classes.sum() >>> p_wrong = np.array([.03, .01, .02][0:2]) >>> y_true = testdata_ytrue(p_classes, p_wrong, size, rng) >>> rs = [] >>> for x in range(17): >>> p_wrong += .05 >>> y_pred = testdata_ypred(y_true, p_wrong, rng) >>> report = classification_report2(y_true, y_pred, verbose='hack') >>> rs.append(report) >>> import plottool as pt >>> pt.qtensure() >>> df = pd.DataFrame(rs).drop(['raw'], axis=1) >>> delta = df.subtract(df['target'], axis=0) >>> sqrd_error = np.sqrt((delta ** 2).sum(axis=0)) >>> print('Error') >>> print(sqrd_error.sort_values()) >>> ys = df.to_dict(orient='list') >>> pt.multi_plot(ydata_list=ys) """ import sklearn.metrics from sklearn.preprocessing import LabelEncoder if target_names is None: unique_labels = np.unique(np.hstack([y_true, y_pred])) if len(unique_labels) == 1 and (unique_labels[0] == 0 or unique_labels[0] == 1): target_names = np.array([False, True]) y_true_ = y_true y_pred_ = y_pred else: lb = LabelEncoder() lb.fit(unique_labels) y_true_ = lb.transform(y_true) y_pred_ = lb.transform(y_pred) target_names = lb.classes_ else: y_true_ = y_true y_pred_ = y_pred # Real data is on the rows, # Pred data is on the cols. cm = sklearn.metrics.confusion_matrix( y_true_, y_pred_, sample_weight=sample_weight) confusion = cm # NOQA k = len(cm) # number of classes N = cm.sum() # number of examples real_total = cm.sum(axis=1) pred_total = cm.sum(axis=0) # the number of "positive" cases **per class** n_pos = real_total # NOQA # the number of times a class was predicted. n_neg = N - n_pos # NOQA # number of true positives per class n_tps = np.diag(cm) # number of true negatives per class n_fps = (cm - np.diagflat(np.diag(cm))).sum(axis=0) tprs = n_tps / real_total # true pos rate (recall) tpas = n_tps / pred_total # true pos accuracy (precision) unused = (real_total + pred_total) == 0 fprs = n_fps / n_neg # false pose rate fprs[unused] = np.nan # tnrs = 1 - fprs rprob = real_total / N pprob = pred_total / N if len(cm) == 2: [[A, B], [C, D]] = cm (A * D - B * C) / np.sqrt((A + C) * (B + D) * (A + B) * (C + D)) # c2 = vt.ConfusionMetrics().fit(scores, y) # bookmaker is analogous to recall, but unbiased by class frequency rprob_mat = np.tile(rprob, [k, 1]).T - (1 - np.eye(k)) bmcm = cm.T / rprob_mat bms = np.sum(bmcm.T, axis=0) / N # markedness is analogous to precision, but unbiased by class frequency pprob_mat = np.tile(pprob, [k, 1]).T - (1 - np.eye(k)) mkcm = cm / pprob_mat mks = np.sum(mkcm.T, axis=0) / N mccs = np.sign(bms) * np.sqrt(np.abs(bms * mks)) perclass_data = ut.odict([ ('precision', tpas), ('recall', tprs), ('fpr', fprs), ('markedness', mks), ('bookmaker', bms), ('mcc', mccs), ('support', real_total), ]) tpa = np.nansum(tpas * rprob) tpr = np.nansum(tprs * rprob) fpr = np.nansum(fprs * rprob) mk = np.nansum(mks * rprob) bm = np.nansum(bms * pprob) # The simple mean seems to do the best mccs_ = mccs[~np.isnan(mccs)] if len(mccs_) == 0: mcc_combo = np.nan else: mcc_combo = np.nanmean(mccs_) combined_data = ut.odict([ ('precision', tpa), ('recall', tpr), ('fpr', fpr), ('markedness', mk), ('bookmaker', bm), # ('mcc', np.sign(bm) * np.sqrt(np.abs(bm * mk))), ('mcc', mcc_combo), # np.sign(bm) * np.sqrt(np.abs(bm * mk))), ('support', real_total.sum()) ]) # Not sure how to compute this. Should it agree with the sklearn impl? if verbose == 'hack': verbose = False mcc_known = sklearn.metrics.matthews_corrcoef( y_true, y_pred, sample_weight=sample_weight) mcc_raw = np.sign(bm) * np.sqrt(np.abs(bm * mk)) import scipy as sp def gmean(x, w=None): if w is None: return sp.stats.gmean(x) return np.exp(np.nansum(w * np.log(x)) / np.nansum(w)) def hmean(x, w=None): if w is None: return sp.stats.hmean(x) return 1 / (np.nansum(w * (1 / x)) / np.nansum(w)) def amean(x, w=None): if w is None: return np.mean(x) return np.nansum(w * x) / np.nansum(w) report = { 'target': mcc_known, 'raw': mcc_raw, } # print('%r <<<' % (mcc_known,)) means = { 'a': amean, # 'h': hmean, 'g': gmean, } weights = { 'p': pprob, 'r': rprob, '': None, } for mean_key, mean in means.items(): for w_key, w in weights.items(): # Hack of very wrong items if mean_key == 'g': if w_key in ['r', 'p', '']: continue if mean_key == 'g': if w_key in ['r']: continue m = mean(mccs, w) r_key = '{} {}'.format(mean_key, w_key) report[r_key] = m # print(r_key) # print(np.abs(m - mcc_known)) # print(ut.repr4(report, precision=8)) return report # print('mcc_known = %r' % (mcc_known,)) # print('mcc_combo1 = %r' % (mcc_combo1,)) # print('mcc_combo2 = %r' % (mcc_combo2,)) # print('mcc_combo3 = %r' % (mcc_combo3,)) # if target_names is None: # target_names = list(range(k)) index = pd.Index(target_names, name='class') perclass_df = pd.DataFrame(perclass_data, index=index) # combined_df = pd.DataFrame(combined_data, index=['ave/sum']) combined_df = pd.DataFrame(combined_data, index=['combined']) metric_df = pd.concat([perclass_df, combined_df]) metric_df.index.name = 'class' metric_df.columns.name = 'metric' pred_id = ['%s' % m for m in target_names] real_id = ['%s' % m for m in target_names] confusion_df = pd.DataFrame(confusion, columns=pred_id, index=real_id) confusion_df = confusion_df.append(pd.DataFrame( [confusion.sum(axis=0)], columns=pred_id, index=['Σp'])) confusion_df['Σr'] = np.hstack([confusion.sum(axis=1), [0]]) confusion_df.index.name = 'real' confusion_df.columns.name = 'pred' if np.all(confusion_df - np.floor(confusion_df) < .000001): confusion_df = confusion_df.astype(np.int) confusion_df.iloc[(-1, -1)] = N if np.all(confusion_df - np.floor(confusion_df) < .000001): confusion_df = confusion_df.astype(np.int) # np.nan if verbose: cfsm_str = confusion_df.to_string(float_format=lambda x: '%.1f' % (x,)) print('Confusion Matrix (real × pred) :') print(ut.hz_str(' ', cfsm_str)) # ut.cprint('\nExtended Report', 'turquoise') print('\nEvaluation Metric Report:') float_precision = 2 float_format = '%.' + str(float_precision) + 'f' ext_report = metric_df.to_string(float_format=float_format) print(ut.hz_str(' ', ext_report)) report = { 'metrics': metric_df, 'confusion': confusion_df, } # FIXME: What is the difference between sklearn multiclass-MCC # and BM * MK MCC? def matthews_corrcoef(y_true, y_pred, sample_weight=None): from sklearn.metrics.classification import ( _check_targets, LabelEncoder, confusion_matrix) y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type not in {"binary", "multiclass"}: raise ValueError("%s is not supported" % y_type) lb = LabelEncoder() lb.fit(np.hstack([y_true, y_pred])) y_true = lb.transform(y_true) y_pred = lb.transform(y_pred) C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) t_sum = C.sum(axis=1) p_sum = C.sum(axis=0) n_correct = np.trace(C) n_samples = p_sum.sum() cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum) cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum) cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum) mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp) if np.isnan(mcc): return 0. else: return mcc try: # mcc = sklearn.metrics.matthews_corrcoef( # y_true, y_pred, sample_weight=sample_weight) mcc = matthews_corrcoef(y_true, y_pred, sample_weight=sample_weight) # These scales are chosen somewhat arbitrarily in the context of a # computer vision application with relatively reasonable quality data # https://stats.stackexchange.com/questions/118219/how-to-interpret mcc_significance_scales = ut.odict([ (1.0, 'perfect'), (0.9, 'very strong'), (0.7, 'strong'), (0.5, 'significant'), (0.3, 'moderate'), (0.2, 'weak'), (0.0, 'negligible'), ]) for k, v in mcc_significance_scales.items(): if np.abs(mcc) >= k: if verbose: print('classifier correlation is %s' % (v,)) break if verbose: float_precision = 2 print(('MCC\' = %.' + str(float_precision) + 'f') % (mcc,)) report['mcc'] = mcc except ValueError: pass return report
def print_priors(model, ignore_ttypes=[], title='Priors', color='darkblue'): ut.colorprint('\n --- %s ---' % (title,), color=color) for ttype, cpds in model.ttype2_cpds.items(): if ttype not in ignore_ttypes: for fs_ in ut.ichunks(cpds, 4): ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), color)
def test_model(num_annots, num_names, score_evidence=[], name_evidence=[], other_evidence={}, noquery=False, verbose=None, **kwargs): if verbose is None: verbose = ut.VERBOSE method = kwargs.pop('method', None) model = make_name_model(num_annots, num_names, verbose=verbose, **kwargs) if verbose: model.print_priors(ignore_ttypes=['match', 'score']) model, evidence, soft_evidence = update_model_evidence( model, name_evidence, score_evidence, other_evidence) if verbose and len(soft_evidence) != 0: model.print_priors(ignore_ttypes=['match', 'score'], title='Soft Evidence', color='green') #if verbose: # ut.colorprint('\n --- Soft Evidence ---', 'white') # for ttype, cpds in model.ttype2_cpds.items(): # if ttype != 'match': # for fs_ in ut.ichunks(cpds, 4): # ut.colorprint(ut.hz_str([f._cpdstr('psql') for f in fs_]), # 'green') if verbose: ut.colorprint('\n --- Inference ---', 'red') if (len(evidence) > 0 or len(soft_evidence) > 0) and not noquery: evidence = model._ensure_internal_evidence(evidence) query_vars = [] query_vars += ut.list_getattr(model.ttype2_cpds['name'], 'variable') #query_vars += ut.list_getattr(model.ttype2_cpds['match'], 'variable') query_vars = ut.setdiff(query_vars, evidence.keys()) #query_vars = ut.setdiff(query_vars, soft_evidence.keys()) query_results = cluster_query(model, query_vars, evidence, soft_evidence, method) else: query_results = {} factor_list = query_results['factor_list'] if verbose: if verbose: print('+--------') semtypes = [model.var2_cpd[f.variables[0]].ttype for f in factor_list] for type_, factors in ut.group_items(factor_list, semtypes).items(): print('Result Factors (%r)' % (type_,)) factors = ut.sortedby(factors, [f.variables[0] for f in factors]) for fs_ in ut.ichunks(factors, 4): ut.colorprint(ut.hz_str([f._str('phi', 'psql') for f in fs_]), 'yellow') print('MAP assignments') top_assignments = query_results.get('top_assignments', []) tmp = [] for lbl, val in top_assignments: tmp.append('%s : %.4f' % (ut.repr2(lbl), val)) print(ut.align('\n'.join(tmp), ' :')) print('L_____\n') showkw = dict(evidence=evidence, soft_evidence=soft_evidence, **query_results) pgm_viz.show_model(model, **showkw) return (model, evidence, query_results)
def setcover_example(): """ CommandLine: python -m ibeis.scripts.specialdraw setcover_example --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = setcover_example() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='testdb2') if False: # Select a good set aids = ibs.get_name_aids(ibs.get_valid_nids()) # ibeis.testdata_aids('testdb2', a='default:mingt=2') aids = [a for a in aids if len(a) > 1] for a in aids: print(ut.repr3(ibs.get_annot_stats_dict(a))) print(aids[-2]) #aids = [78, 79, 80, 81, 88, 91] aids = [78, 79, 81, 88, 91] qreq_ = ibs.depc.new_request('vsone', aids, aids, cfgdict={}) cm_list = qreq_.execute() from ibeis.algo.hots import graph_iden infr = graph_iden.AnnotInference(cm_list) unique_aids, prob_annots = infr.make_prob_annots() import numpy as np print( ut.hz_str( 'prob_annots = ', ut.array2string2(prob_annots, precision=2, max_line_width=140, suppress_small=True))) # ut.setcover_greedy(candidate_sets_dict) max_weight = 3 prob_annots[np.diag_indices(len(prob_annots))] = np.inf prob_annots = prob_annots thresh_points = np.sort(prob_annots[np.isfinite(prob_annots)]) # probably not the best way to go about searching for these thresholds # but when you have a hammer... if False: quant = sorted(np.diff(thresh_points))[(len(thresh_points) - 1) // 2] candset = { point: thresh_points[np.abs(thresh_points - point) < quant] for point in thresh_points } check_thresholds = len(aids) * 2 thresh_points2 = np.array( ut.setcover_greedy(candset, max_weight=check_thresholds).keys()) thresh_points = thresh_points2 # pt.plot(sorted(thresh_points), 'rx') # pt.plot(sorted(thresh_points2), 'o') # prob_annots = prob_annots.T # thresh_start = np.mean(thresh_points) current_idxs = [] current_covers = [] current_val = np.inf for thresh in thresh_points: covering_sets = [np.where(row >= thresh)[0] for row in (prob_annots)] candidate_sets_dict = { ax: others for ax, others in enumerate(covering_sets) } soln_cover = ut.setcover_ilp(candidate_sets_dict, max_weight=max_weight) exemplar_idxs = list(soln_cover.keys()) soln_weight = len(exemplar_idxs) val = max_weight - soln_weight # print('val = %r' % (val,)) # print('soln_weight = %r' % (soln_weight,)) if val < current_val: current_val = val current_covers = covering_sets current_idxs = exemplar_idxs exemplars = ut.take(aids, current_idxs) ensure_edges = [(aids[ax], aids[ax2]) for ax, other_xs in enumerate(current_covers) for ax2 in other_xs] graph = viz_graph.make_netx_graph_from_aid_groups( ibs, [aids], allow_directed=True, ensure_edges=ensure_edges, temp_nids=[1] * len(aids)) viz_graph.ensure_node_images(ibs, graph) nx.set_node_attributes(graph, 'framewidth', False) nx.set_node_attributes(graph, 'framewidth', {aid: 4.0 for aid in exemplars}) nx.set_edge_attributes(graph, 'color', pt.ORANGE) nx.set_node_attributes(graph, 'color', pt.LIGHT_BLUE) nx.set_node_attributes(graph, 'shape', 'rect') layoutkw = { 'sep': 1 / 10, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } pt.show_nx(graph, layout='agraph', layoutkw=layoutkw) pt.zoom_factory()
def dummy_example_depcacahe(): r""" CommandLine: python -m dtool.example_depcache --exec-dummy_example_depcacahe Example: >>> # ENABLE_DOCTEST >>> from dtool.example_depcache import * # NOQA >>> depc = dummy_example_depcacahe() >>> ut.show_if_requested() """ fname = None # fname = 'dummy_default_depcache' fname = ':memory:' depc = testdata_depc(fname) tablename = 'fgweight' # print('[test] fgweight_path =\n%s' % (ut.repr3(depc.get_dependencies(tablename), nl=1),)) # print('[test] keypoint =\n%s' % (ut.repr3(depc.get_dependencies('keypoint'), nl=1),)) # print('[test] descriptor =\n%s' % (ut.repr3(depc.get_dependencies('descriptor'), nl=1),)) # print('[test] spam =\n%s' % (ut.repr3(depc.get_dependencies('spam'), nl=1),)) root_rowids = [5, 3] desc_rowids = depc.get_rowids('descriptor', root_rowids) # NOQA table = depc[tablename] # NOQA #example_getter_methods(depc, 'vsmany', root_rowids) # example_getter_methods(depc, 'chipmask', root_rowids) # example_getter_methods(depc, 'keypoint', root_rowids) # example_getter_methods(depc, 'chip', root_rowids) test_getters(depc) #import plottool as pt # pt.ensureqt() graph = depc.make_graph() # NOQA #pt.show_nx(graph) print('---------- 111 -----------') # Try testing the algorithm req = depc.new_request('vsmany', root_rowids, root_rowids, {}) print('req = %r' % (req, )) req.execute() print('---------- 222 -----------') cfgdict = {'sver_on': False} req = depc.new_request('vsmany', root_rowids, [root_rowids], cfgdict) req.execute() print('---------- 333 -----------') cfgdict = {'sver_on': False, 'adapt_shape': False} req = depc.new_request('vsmany', root_rowids, root_rowids, cfgdict) req.execute() print('---------- 444 -----------') req = depc.new_request('vsmany', root_rowids, root_rowids, {}) req.execute() #ut.InstanceList( db = list(depc.fname_to_db.values())[0] #db_list = ut.InstanceList(depc.fname_to_db.values()) #db_list.print_table_csv('config', exclude_columns='config_strid') print('config table') tablename = 'config' column_list, column_names = db.get_table_column_data( tablename, ['config_strid']) print('\n'.join([ ut.hz_str(*list(ut.interleave((r, [', '] * (len(r) - 1))))) for r in list( zip(*[[ut.repr3(r, nl=2) for r in col] for col in column_list])) ])) return depc
def setcover_example(): """ CommandLine: python -m ibeis.scripts.specialdraw setcover_example --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.specialdraw import * # NOQA >>> result = setcover_example() >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> ut.show_if_requested() """ import ibeis import plottool as pt from ibeis.viz import viz_graph import networkx as nx pt.ensure_pylab_qt4() ibs = ibeis.opendb(defaultdb='testdb2') if False: # Select a good set aids = ibs.get_name_aids(ibs.get_valid_nids()) # ibeis.testdata_aids('testdb2', a='default:mingt=2') aids = [a for a in aids if len(a) > 1] for a in aids: print(ut.repr3(ibs.get_annot_stats_dict(a))) print(aids[-2]) #aids = [78, 79, 80, 81, 88, 91] aids = [78, 79, 81, 88, 91] qreq_ = ibs.depc.new_request('vsone', aids, aids, cfgdict={}) cm_list = qreq_.execute() from ibeis.algo.hots import graph_iden infr = graph_iden.AnnotInference(cm_list) unique_aids, prob_annots = infr.make_prob_annots() import numpy as np print(ut.hz_str('prob_annots = ', ut.array2string2(prob_annots, precision=2, max_line_width=140, suppress_small=True))) # ut.setcover_greedy(candidate_sets_dict) max_weight = 3 prob_annots[np.diag_indices(len(prob_annots))] = np.inf prob_annots = prob_annots thresh_points = np.sort(prob_annots[np.isfinite(prob_annots)]) # probably not the best way to go about searching for these thresholds # but when you have a hammer... if False: quant = sorted(np.diff(thresh_points))[(len(thresh_points) - 1) // 2 ] candset = {point: thresh_points[np.abs(thresh_points - point) < quant] for point in thresh_points} check_thresholds = len(aids) * 2 thresh_points2 = np.array(ut.setcover_greedy(candset, max_weight=check_thresholds).keys()) thresh_points = thresh_points2 # pt.plot(sorted(thresh_points), 'rx') # pt.plot(sorted(thresh_points2), 'o') # prob_annots = prob_annots.T # thresh_start = np.mean(thresh_points) current_idxs = [] current_covers = [] current_val = np.inf for thresh in thresh_points: covering_sets = [np.where(row >= thresh)[0] for row in (prob_annots)] candidate_sets_dict = {ax: others for ax, others in enumerate(covering_sets)} soln_cover = ut.setcover_ilp(candidate_sets_dict, max_weight=max_weight) exemplar_idxs = list(soln_cover.keys()) soln_weight = len(exemplar_idxs) val = max_weight - soln_weight # print('val = %r' % (val,)) # print('soln_weight = %r' % (soln_weight,)) if val < current_val: current_val = val current_covers = covering_sets current_idxs = exemplar_idxs exemplars = ut.take(aids, current_idxs) ensure_edges = [(aids[ax], aids[ax2]) for ax, other_xs in enumerate(current_covers) for ax2 in other_xs] graph = viz_graph.make_netx_graph_from_aid_groups( ibs, [aids], allow_directed=True, ensure_edges=ensure_edges, temp_nids=[1] * len(aids)) viz_graph.ensure_node_images(ibs, graph) nx.set_node_attributes(graph, 'framewidth', False) nx.set_node_attributes(graph, 'framewidth', {aid: 4.0 for aid in exemplars}) nx.set_edge_attributes(graph, 'color', pt.ORANGE) nx.set_node_attributes(graph, 'color', pt.LIGHT_BLUE) nx.set_node_attributes(graph, 'shape', 'rect') layoutkw = { 'sep' : 1 / 10, 'prog': 'neato', 'overlap': 'false', #'splines': 'ortho', 'splines': 'spline', } pt.show_nx(graph, layout='agraph', layoutkw=layoutkw) pt.zoom_factory()