def ensure_names_are_connected(graph, aids_list): aug_graph = graph.copy().to_undirected() orig_edges = aug_graph.edges() unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list] aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges()) preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight') if preweighted_edges: orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys())) aug_graph.add_edges_from(new_edges) # Ensure the largest possible set of original edges is in the MST nx.set_edge_attributes(aug_graph, name='weight', values=dict([(edge, 1.0) for edge in new_edges])) nx.set_edge_attributes(aug_graph, name='weight', values=dict([(edge, 0.1) for edge in orig_edges])) for cc_sub_graph in nx.connected_component_subgraphs(aug_graph): mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph) for edge in mst_sub_graph.edges(): redge = edge[::-1] if not (graph.has_edge(*edge) or graph.has_edge(*redge)): graph.add_edge(*redge, attr_dict={})
def get_dbnames(exclude_list=[]): from wbia.expt import experiment_configs dbnames = experiment_configs.get_candidacy_dbnames() dbnames = ut.setdiff_ordered(dbnames, exclude_list) dbnames = ['PZ_Master1'] return dbnames
def print_epoch_info(model, printcol_info, epoch_info): requested_headers = printcol_info['requested_headers'] keys = ut.setdiff_ordered(requested_headers, ['epoch_num', 'duration']) data_fmt_list = printcol_info['data_fmt_list'] data_fmtstr = '[info] ' + '|'.join(data_fmt_list) import colorama ANSI = colorama.Fore def epoch_num_str(): return (epoch_info['epoch_num'], ) def learn_loss_str(): key = 'learn_loss' isbest = epoch_info[key] == model.best_results[key] return ( ANSI.BLUE if isbest else '', '%0.6f' % (epoch_info[key], ), ANSI.RESET if isbest else '', ) def valid_loss_str(): key = 'valid_loss' isbest = epoch_info[key] == model.best_results[key] return ( ANSI.GREEN if isbest else '', epoch_info[key], ANSI.RESET if isbest else '', ) def learnval_rat_str(): ratio = epoch_info['learnval_rat'] unhealthy_ratio = ratio <= 0.5 or 2.0 <= ratio return ( ANSI.RED if unhealthy_ratio else '', '%0.6f' % (ratio, ), ANSI.RESET if unhealthy_ratio else '', ) def valid_acc_str(): key = 'valid_acc' isbest = epoch_info[key] == model.best_results[key] return ( ANSI.MAGENTA if isbest else '', '{:.2f}%'.format(model.best_results[key] * 100), ANSI.RESET if isbest else '', ) def duration_str(): return (epoch_info['duration'], ) # Hack to build up the format data locals_ = locals() func_list = [locals_[prefix + '_str'] for prefix in requested_headers] fmttup = tuple() for func in func_list: fmttup += func() epoch_info_str = data_fmtstr.format(*fmttup) print(epoch_info_str)
def ensure_names_are_connected(graph, aids_list): aug_graph = graph.copy().to_undirected() orig_edges = aug_graph.edges() unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list] aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges()) preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight') if preweighted_edges: orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys())) aug_graph.add_edges_from(new_edges) # Ensure the largest possible set of original edges is in the MST nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 1.0) for edge in new_edges])) nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 0.1) for edge in orig_edges])) for cc_sub_graph in nx.connected_component_subgraphs(aug_graph): mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph) for edge in mst_sub_graph.edges(): redge = edge[::-1] if not (graph.has_edge(*edge) or graph.has_edge(*redge)): graph.add_edge(*redge, attr_dict={})
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True): r""" CommandLine: python -m wbia.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) # infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered( query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) logger.info('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. # map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) # joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred # evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction(query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [ joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx) ] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array( [g.sum() for g in vt.apply_grouping(new_vals, groupxs)]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column( sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict( zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) logger.info(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) logger.info(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list( zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) # probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence # irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) # joint.marginalize(irrelevant_vars) # joint.normalize() # new_rows = joint._row_labels() # new_vals = joint.values.ravel() # map_vals = new_rows[new_vals.argmax()] # map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints # marginalized_joints = {} # for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results
def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None): """ Gets database annot_rowids based on command line arguments DEPRICATE CommandLine: python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra Args: ibs (IBEISController): ibeis controller object default_daids (str): (default = 'all') qaid_list (list): list of chosen qaids that may affect daids (default = None) Returns: list: available_daids CommandLine: python -m ibeis.init.main_helpers --test-get_test_daids python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --verbmhelp python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4 python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_daids = 'all' >>> qaid_list = [1] >>> available_daids = get_test_daids(ibs, default_daids, qaid_list) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True) >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False) >>> print('len(available_daids) %d' % len(available_daids)) >>> print(result) available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] """ daid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_daids] + --- GET_TEST_DAIDS ---') print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False))) print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False))) # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * include step') available_daids = [] CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids') DSHUFFLE = ut.get_argval('--dshuffle') DINDEX = params.args.dindex NO_JUNK = not ut.get_argflag('--junk') EXCLUDE_QUERY = ut.get_argflag('--exclude-query') #daids_exclude = params.args.daid_exclude daids_exclude = None if CONTROLLED_CASES: print('[get_test_daids] * Including controlled daids') from ibeis.other import ibsfuncs controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False) available_daids.extend(controlled_daids) daid_request_info['controlled'] = True else: daid_request_info['controlled'] = False # ---- CHECK_DEFAULTS DATA if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) if len(available_daids) == 0: print('[get_test_daids] * ... defaulting, no available daids on command line.') if isinstance(default_daids, six.string_types): if default_daids == 'all': default_daids = ibs.get_valid_aids() daid_request_info['default_daids'] = 'all' elif default_daids == 'gt': default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list)) daid_request_info['default_daids'] = 'gt' #available_qaids = valid_aids[0:1] assert not isinstance(available_daids, six.string_types) available_daids = default_daids else: if VERB_MAIN_HELPERS: print('[get_test_daids] * ... not defaulting') available_daids = ut.unique_ordered(available_daids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * exclude step') species = ut.get_argval('--species', type_=str, default=None) if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering junk') available_daids = ibs.filter_junk_annotations(available_daids) if EXCLUDE_QUERY: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding query qaids') assert qaid_list is not None, 'must specify qaids to exclude' available_daids = ut.setdiff_ordered(available_daids, qaid_list) if daids_exclude is not None: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding specified daids') available_daids = ut.setdiff_ordered(available_daids, daids_exclude) if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_daids) species = ibs.get_primary_database_species() if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering to species=%r' % (species,)) import numpy as np isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species available_daids = ut.compress(available_daids, isvalid_list) # ---- SUBINDEXING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * subindex step') #ut.get_argval('--qshuffle') if DSHUFFLE: # Determenistic shuffling available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43)) daid_request_info['shuffled'] = True if DINDEX is not None: dindexes = ensure_flatlistlike(DINDEX) _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)] print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids))) available_daids = _test_daids if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] L ___ GET_TEST_DAIDS ___') if return_annot_info: return available_daids, daid_request_info else: return available_daids
def get_dbnames(exclude_list=[]): from ibeis.expt import experiment_configs dbnames = experiment_configs.get_candidacy_dbnames() dbnames = ut.setdiff_ordered(dbnames, exclude_list) dbnames = ['PZ_Master1'] return dbnames
def get_test_daids(ibs, default_daids='all', qaid_list=None, return_annot_info=False, aidcfg=None): """ Gets database annot_rowids based on command line arguments DEPRICATE CommandLine: python dev.py --db PZ_MTEST -t best --exclude-query --qaid 72 -r 0 -c 0 --show --va --vf --dump-extra Args: ibs (IBEISController): ibeis controller object default_daids (str): (default = 'all') qaid_list (list): list of chosen qaids that may affect daids (default = None) Returns: list: available_daids CommandLine: python -m ibeis.init.main_helpers --test-get_test_daids python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --verbmhelp python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --exclude-query python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --daid-exclude 2 3 4 python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_MTEST --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --species=zebra_grevys python -m ibeis.init.main_helpers --test-get_test_daids --db PZ_Master0 --controlled --verbmhelp python -m ibeis.init.main_helpers --exec-get_test_daids --controlled --db PZ_Master0 --exec-mode Example: >>> # ENABLE_DOCTEST >>> from ibeis.init.main_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> default_daids = 'all' >>> qaid_list = [1] >>> available_daids = get_test_daids(ibs, default_daids, qaid_list) >>> ibeis.other.dbinfo.get_dbinfo(ibs, aid_list=available_daids, with_contrib=False, short=True) >>> result = 'available_daids = ' + ut.obj_str(available_daids, truncate=True, nl=False) >>> print('len(available_daids) %d' % len(available_daids)) >>> print(result) available_daids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] """ daid_request_info = {} if VERB_MAIN_HELPERS: print('[get_test_daids] + --- GET_TEST_DAIDS ---') print('[get_test_daids] * default_daids = %s' % (ut.obj_str(default_daids, truncate=True, nl=False))) print('[get_test_daids] * qaid_list = %s' % (ut.obj_str(qaid_list, truncate=True, nl=False))) # ---- INCLUDING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * include step') available_daids = [] CONTROLLED_CASES = ut.get_argflag('--controlled') or ut.get_argflag('--controlled_daids') DSHUFFLE = ut.get_argval('--dshuffle') DINDEX = params.args.dindex NO_JUNK = not ut.get_argflag('--junk') EXCLUDE_QUERY = ut.get_argflag('--exclude-query') #daids_exclude = params.args.daid_exclude daids_exclude = None if CONTROLLED_CASES: print('[get_test_daids] * Including controlled daids') from ibeis import ibsfuncs controlled_daids = ibsfuncs.get_two_annots_per_name_and_singletons(ibs, onlygt=False) available_daids.extend(controlled_daids) daid_request_info['controlled'] = True else: daid_request_info['controlled'] = False # ---- CHECK_DEFAULTS DATA if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) if len(available_daids) == 0: print('[get_test_daids] * ... defaulting, no available daids on command line.') if isinstance(default_daids, six.string_types): if default_daids == 'all': default_daids = ibs.get_valid_aids() daid_request_info['default_daids'] = 'all' elif default_daids == 'gt': default_daids = ut.flatten(ibs.get_annot_groundtruth(qaid_list)) daid_request_info['default_daids'] = 'gt' #available_qaids = valid_aids[0:1] assert not isinstance(available_daids, six.string_types) available_daids = default_daids else: if VERB_MAIN_HELPERS: print('[get_test_daids] * ... not defaulting') available_daids = ut.unique_keep_order(available_daids) # ---- EXCLUSION STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * exclude step') species = ut.get_argval('--species', type_=str, default=None) if NO_JUNK: if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering junk') available_daids = ibs.filter_junk_annotations(available_daids) if EXCLUDE_QUERY: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding query qaids') assert qaid_list is not None, 'must specify qaids to exclude' available_daids = ut.setdiff_ordered(available_daids, qaid_list) if daids_exclude is not None: if VERB_MAIN_HELPERS: print('[get_test_daids] * Excluding specified daids') available_daids = ut.setdiff_ordered(available_daids, daids_exclude) if species is not None: if species == 'primary': if VERB_MAIN_HELPERS: print('[get_test_qaids] * Finiding primary species') #species = ibs.get_primary_database_species(available_daids) species = ibs.get_primary_database_species() if VERB_MAIN_HELPERS: print('[get_test_daids] * Filtering to species=%r' % (species,)) import numpy as np isvalid_list = np.array(ibs.get_annot_species(available_daids)) == species available_daids = ut.compress(available_daids, isvalid_list) # ---- SUBINDEXING STEP if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] * subindex step') #ut.get_argval('--qshuffle') if DSHUFFLE: # Determenistic shuffling available_daids = ut.take(available_daids, ut.random_indexes(len(available_daids), seed=43)) daid_request_info['shuffled'] = True if DINDEX is not None: dindexes = ensure_flatlistlike(DINDEX) _test_daids = [available_daids[dx] for dx in dindexes if dx < len(available_daids)] print('[get_test_daids] Chose subset of size %d/%d' % (len(_test_daids), len(available_daids))) available_daids = _test_daids if VERB_MAIN_HELPERS: print('[get_test_daids] * len(available_daids) = %r' % (len(available_daids))) print('[get_test_daids] L ___ GET_TEST_DAIDS ___') if return_annot_info: return available_daids, daid_request_info else: return available_daids
def check_database_overlap(ibs1, ibs2): """ CommandLine: python -m wbia.other.dbinfo --test-get_dbinfo:1 --db PZ_MTEST dev.py -t listdbs python -m wbia.dbio.export_subset check_database_overlap --db PZ_MTEST --db2 PZ_MOTHERS CommandLine: python -m wbia.dbio.export_subset check_database_overlap python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_MTEST --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=NNP_Master3 --db2=PZ_Master0 # NOQA python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_Master0 --db2=GZ_ALL python -m wbia.dbio.export_subset check_database_overlap --db1=GZ_ALL --db2=lewa_grevys python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_FlankHack --db2=PZ_Master1 python -m wbia.dbio.export_subset check_database_overlap --db1=PZ_PB_RF_TRAIN --db2=PZ_Master1 Example: >>> # SCRIPT >>> from wbia.dbio.export_subset import * # NOQA >>> import wbia >>> import utool as ut >>> #ibs1 = wbia.opendb(db='PZ_Master0') >>> #ibs2 = wbia.opendb(dbdir='/raid/work2/Turk/PZ_Master') >>> db1 = ut.get_argval('--db1', str, default='PZ_MTEST') >>> db2 = ut.get_argval('--db2', str, default='testdb1') >>> dbdir1 = ut.get_argval('--dbdir1', str, default=None) >>> dbdir2 = ut.get_argval('--dbdir2', str, default=None) >>> ibs1 = wbia.opendb(db=db1, dbdir=dbdir1) >>> ibs2 = wbia.opendb(db=db2, dbdir=dbdir2) >>> check_database_overlap(ibs1, ibs2) """ import numpy as np def print_isect(items1, items2, lbl=''): set1_ = set(items1) set2_ = set(items2) items_isect = set1_.intersection(set2_) fmtkw1 = dict( part=1, lbl=lbl, num=len(set1_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set1_), ) fmtkw2 = dict( part=2, lbl=lbl, num=len(set2_), num_isect=len(items_isect), percent=100 * len(items_isect) / len(set2_), ) fmt_a = ' * Num {lbl} {part}: {num_isect} / {num} = {percent:.2f}%' # fmt_b = ' * Num {lbl} isect: {num}' logger.info('Checking {lbl} intersection'.format(lbl=lbl)) logger.info(fmt_a.format(**fmtkw1)) logger.info(fmt_a.format(**fmtkw2)) # logger.info(fmt_b.format(lbl=lbl, num=len(items_isect))) # items = items_isect # list_ = items1 x_list1 = ut.find_list_indexes(items1, items_isect) x_list2 = ut.find_list_indexes(items2, items_isect) return x_list1, x_list2 gids1 = ibs1.images() gids2 = ibs2.images() # Find common images # items1, items2, lbl, = gids1.uuids, gids2.uuids, 'images' gx_list1, gx_list2 = print_isect(gids1.uuids, gids2.uuids, 'images') gids_isect1 = gids1.take(gx_list1) gids_isect2 = gids2.take(gx_list2) assert gids_isect2.uuids == gids_isect1.uuids, 'sequence must be aligned' SHOW_ISECT_GIDS = False if SHOW_ISECT_GIDS: if len(gx_list1) > 0: logger.info('gids_isect1 = %r' % (gids_isect1, )) logger.info('gids_isect2 = %r' % (gids_isect2, )) if False: # Debug code import wbia.viz import wbia.plottool as pt gid_pairs = list(zip(gids_isect1, gids_isect2)) pairs_iter = ut.ichunks(gid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for gid1, gid2 in pairs: wbia.viz.show_image(ibs1, gid1, pnum=pnum_(), fnum=fnum) wbia.viz.show_image(ibs2, gid2, pnum=pnum_(), fnum=fnum) # if False: # aids1 = ibs1.get_valid_aids() # aids2 = ibs2.get_valid_aids() # ibs1.update_annot_visual_uuids(aids1) # ibs2.update_annot_visual_uuids(aids2) # ibs1.update_annot_semantic_uuids(aids1) # ibs2.update_annot_semantic_uuids(aids2) # Check to see which intersecting images have different annotations image_aids_isect1 = gids_isect1.aids image_aids_isect2 = gids_isect2.aids image_avuuids_isect1 = np.array( ibs1.unflat_map(ibs1.get_annot_visual_uuids, image_aids_isect1)) image_avuuids_isect2 = np.array( ibs2.unflat_map(ibs2.get_annot_visual_uuids, image_aids_isect2)) changed_image_xs = np.nonzero( image_avuuids_isect1 != image_avuuids_isect2)[0] if len(changed_image_xs) > 0: logger.info( 'There are %d images with changes in annotation visual information' % (len(changed_image_xs), )) changed_gids1 = ut.take(gids_isect1, changed_image_xs) changed_gids2 = ut.take(gids_isect2, changed_image_xs) SHOW_CHANGED_GIDS = False if SHOW_CHANGED_GIDS: logger.info('gids_isect1 = %r' % (changed_gids2, )) logger.info('gids_isect2 = %r' % (changed_gids1, )) # if False: # # Debug code # import wbia.viz # import wbia.plottool as pt # gid_pairs = list(zip(changed_gids1, changed_gids2)) # pairs_iter = ut.ichunks(gid_pairs, chunksize=8) # for fnum, pairs in enumerate(pairs_iter, start=1): # pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) # for gid1, gid2 in pairs: # wbia.viz.show_image( # ibs1, gid1, pnum=pnum_(), fnum=fnum) # wbia.viz.show_image( # ibs2, gid2, pnum=pnum_(), fnum=fnum) # Check for overlapping annotations (visual info only) in general aids1 = ibs1.annots() aids2 = ibs2.annots() # Check for overlapping annotations (visual + semantic info) in general aux_list1, aux_list2 = print_isect(aids1.uuids, aids2.uuids, 'uuids') avx_list1, avx_list2 = print_isect(aids1.visual_uuids, aids2.visual_uuids, 'vuuids') asx_list1, asx_list2 = print_isect(aids1.semantic_uuids, aids2.semantic_uuids, 'suuids') # Check which images with the same visual uuids have different semantic # uuids changed_ax_list1 = ut.setdiff_ordered(avx_list1, asx_list1) changed_ax_list2 = ut.setdiff_ordered(avx_list2, asx_list2) assert len(changed_ax_list1) == len(changed_ax_list2) assert ut.take(aids1.visual_uuids, changed_ax_list1) == ut.take(aids2.visual_uuids, changed_ax_list2) changed_aids1 = np.array(ut.take(aids1, changed_ax_list1)) changed_aids2 = np.array(ut.take(aids2, changed_ax_list2)) changed_sinfo1 = ibs1.get_annot_semantic_uuid_info(changed_aids1) changed_sinfo2 = ibs2.get_annot_semantic_uuid_info(changed_aids2) sinfo1_arr = np.array(changed_sinfo1) sinfo2_arr = np.array(changed_sinfo2) is_semantic_diff = sinfo2_arr != sinfo1_arr # Inspect semantic differences if np.any(is_semantic_diff): colxs, rowxs = np.nonzero(is_semantic_diff) colx2_rowids = ut.group_items(rowxs, colxs) prop2_rowids = ut.map_dict_keys(changed_sinfo1._fields.__getitem__, colx2_rowids) logger.info('changed_value_counts = ' + ut.repr2(ut.map_dict_vals(len, prop2_rowids))) yawx = changed_sinfo1._fields.index('yaw') # Show change in viewpoints if len(colx2_rowids[yawx]) > 0: vp_category_diff = ibsfuncs.viewpoint_diff( sinfo1_arr[yawx], sinfo2_arr[yawx]).astype(np.float) # Look for category changes # any_diff = np.floor(vp_category_diff) > 0 # _xs = np.nonzero(any_diff)[0] # _aids1 = changed_aids1.take(_xs) # _aids2 = changed_aids2.take(_xs) # Look for significant changes is_significant_diff = np.floor(vp_category_diff) > 1 significant_xs = np.nonzero(is_significant_diff)[0] significant_aids1 = changed_aids1.take(significant_xs) significant_aids2 = changed_aids2.take(significant_xs) logger.info('There are %d significant viewpoint changes' % (len(significant_aids2), )) # vt.ori_distance(sinfo1_arr[yawx], sinfo2_arr[yawx]) # zip(ibs1.get_annot_viewpoint_code(significant_aids1), # ibs2.get_annot_viewpoint_code(significant_aids2)) # logger.info('yawdiff = %r' % ) # if False: # Hack: Apply fixes # good_yaws = ibs2.get_annot_yaws(significant_aids2) # ibs1.set_annot_yaws(significant_aids1, good_yaws) # pass if False: # Debug code import wbia.viz import wbia.plottool as pt # aid_pairs = list(zip(_aids1, _aids2)) aid_pairs = list(zip(significant_aids1, significant_aids2)) pairs_iter = ut.ichunks(aid_pairs, chunksize=8) for fnum, pairs in enumerate(pairs_iter, start=1): pnum_ = pt.make_pnum_nextgen(nRows=len(pairs), nCols=2) for aid1, aid2 in pairs: wbia.viz.show_chip( ibs1, aid1, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) wbia.viz.show_chip( ibs2, aid2, pnum=pnum_(), fnum=fnum, show_viewcode=True, nokpts=True, ) # nAnnots_per_image1 = np.array(ibs1.get_image_num_annotations(gids1)) nAnnots_per_image2 = np.array(ibs2.get_image_num_annotations(gids2)) # images_without_annots1 = sum(nAnnots_per_image1 == 0) images_without_annots2 = sum(nAnnots_per_image2 == 0) logger.info('images_without_annots1 = %r' % (images_without_annots1, )) logger.info('images_without_annots2 = %r' % (images_without_annots2, )) nAnnots_per_image1
def make_metadata_custom_api(metadata): r""" CommandLine: python -m ibeis.expt.experiment_drawing --test-make_metadata_custom_api --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.expt.experiment_drawing import * # NOQA >>> import guitool >>> guitool.ensure_qapp() >>> metadata_fpath = '/media/raid/work/Elephants_drop1_ears/_ibsdb/figures/result_metadata.shelf' >>> metadata = ResultMetadata(metadata_fpath, autoconnect=True) >>> wgt = make_metadata_custom_api(metadata) >>> ut.quit_if_noshow() >>> wgt.show() >>> wgt.raise_() >>> guitool.qtapp_loop(wgt, frequency=100) """ import guitool from guitool.__PYQT__ import QtCore class MetadataViewer(guitool.APIItemWidget): def __init__(wgt, parent=None, tblnice='Result Metadata Viewer', **kwargs): guitool.APIItemWidget.__init__(wgt, parent=parent, tblnice=tblnice, **kwargs) wgt.connect_signals_and_slots() @guitool.slot_(QtCore.QModelIndex) def _on_doubleclick(wgt, qtindex): print('[wgt] _on_doubleclick: ') col = qtindex.column() if wgt.api.col_edit_list[col]: print('do nothing special for editable columns') return model = qtindex.model() colname = model.get_header_name(col) if colname.endswith('fpath'): print('showing fpath') fpath = model.get_header_data(colname, qtindex) ut.startfile(fpath) def connect_signals_and_slots(wgt): #wgt.view.clicked.connect(wgt._on_click) wgt.view.doubleClicked.connect(wgt._on_doubleclick) #wgt.view.pressed.connect(wgt._on_pressed) #wgt.view.activated.connect(wgt._on_activated) guitool.ensure_qapp() #cfgstr_list = metadata col_name_list, column_list = metadata.get_square_data() # Priority of column names colname_priority = [ 'qaids', 'qx2_gt_rank', 'qx2_gt_timedelta', 'qx2_gf_timedelta', 'analysis_fpath', 'qx2_gt_raw_score', 'qx2_gf_raw_score' ] colname_priority += sorted( ut.setdiff_ordered(col_name_list, colname_priority)) sortx = ut.priority_argsort(col_name_list, colname_priority) col_name_list = ut.take(col_name_list, sortx) column_list = ut.take(column_list, sortx) col_lens = list(map(len, column_list)) print('col_name_list = %r' % (col_name_list, )) print('col_lens = %r' % (col_lens, )) assert len(col_lens) > 0, 'no columns' assert col_lens[0] > 0, 'no rows' assert all([len_ == col_lens[0] for len_ in col_lens]), 'inconsistant data' col_types_dict = {} col_getter_dict = dict(zip(col_name_list, column_list)) col_bgrole_dict = {} col_ider_dict = {} col_setter_dict = {} col_nice_dict = {name: name.replace('qx2_', '') for name in col_name_list} col_nice_dict.update({ 'qx2_gt_timedelta': 'GT TimeDelta', 'qx2_gf_timedelta': 'GF TimeDelta', 'qx2_gt_rank': 'GT Rank', }) editable_colnames = [] sortby = 'qaids' def get_thumb_size(): return 128 col_width_dict = {} custom_api = guitool.CustomAPI(col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, get_thumb_size, sort_reverse=True, col_width_dict=col_width_dict, col_nice_dict=col_nice_dict) #headers = custom_api.make_headers(tblnice='results') #print(ut.dict_str(headers)) wgt = MetadataViewer() wgt.connect_api(custom_api) return wgt
def findcite(): """ prints info about used and unused citations """ tex_fpath_list = testdata_fpaths() citekey_list = find_used_citations(tex_fpath_list) # Find uncited entries #bibtexparser = ut.tryimport('bibtexparser') bib_fpath = 'My_Library_clean.bib' bibtex_str = ut.read_from(bib_fpath) bib_database = bibtexparser.loads(bibtex_str) bibtex_dict = bib_database.get_entry_dict() for key in bibtex_dict.keys(): entry = bibtex_dict[key] entry = ut.map_dict_keys(six.text_type, entry) entry = ut.map_dict_keys(six.text_type.lower, entry) bibtex_dict[key] = entry print('ALL') ignore = ['JP', '?'] citekey_list = ut.setdiff_ordered(sorted(ut.unique(citekey_list)), ignore) #print(ut.indentjoin(citekey_list)) print('len(citekey_list) = %r' % (len(citekey_list), )) unknown_keys = list(set(citekey_list) - set(bibtex_dict.keys())) unused_keys = list(set(bibtex_dict.keys()) - set(citekey_list)) try: if len(unknown_keys) != 0: print('\nUNKNOWN KEYS:') print(ut.list_str(unknown_keys)) raise AssertionError('unknown keys') except AssertionError as ex: ut.printex(ex, iswarning=True, keys=['unknown_keys']) @ut.argv_flag_dec(indent=' ') def close_keys(): if len(unknown_keys) > 0: bibtex_dict.keys() print('\nDid you mean:') for key in unknown_keys: print('---') print(key) print(ut.closet_words(key, bibtex_dict.keys(), 3)) print('L___') else: print('no unkown keys') close_keys() @ut.argv_flag_dec(indent=' ') def print_unused(): print(ut.indentjoin(ut.sortedby(unused_keys, map(len, unused_keys)))) print('len(unused_keys) = %r' % (len(unused_keys), )) print_unused() all_authors = [] for key in bibtex_dict.keys(): entry = bibtex_dict[key] toremove = ['author', '{', '}', r'\\textbackslash'] author = ut.multi_replace(entry.get('author', ''), toremove, '') authors = author.split(' and ') all_authors.extend(authors) @ut.argv_flag_dec(indent=' ') def author_hist(): #print(all_authors) hist_ = ut.dict_hist(all_authors, ordered=True) hist_[''] = None del hist_[''] print('Author histogram') print(ut.dict_str(hist_)[-1000:]) author_hist() @ut.argv_flag_dec(indent=' ') def unused_important(): important_authors = [ 'hinton', 'chum', 'Jegou', 'zisserman', 'schmid', 'sivic', 'matas', 'lowe', 'perronnin', 'douze', ] for key in unused_keys: entry = bibtex_dict[key] author = entry.get('author', '') #authors = author.split(' and ') hasimportant = any(auth in author.lower() for auth in important_authors) if hasimportant or 'smk' in str(entry).lower(): toremove = [ 'note', 'month', 'type', 'pages', 'urldate', 'language', 'volume', 'number', 'publisher' ] entry = ut.delete_dict_keys(entry, toremove) print( ut.dict_str(entry, strvals=True, key_order=['title', 'author', 'id'])) unused_important()
def make_metadata_custom_api(metadata): r""" CommandLine: python -m ibeis.expt.experiment_drawing --test-make_metadata_custom_api --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.expt.experiment_drawing import * # NOQA >>> import guitool >>> guitool.ensure_qapp() >>> metadata_fpath = '/media/raid/work/Elephants_drop1_ears/_ibsdb/figures/result_metadata.shelf' >>> metadata = test_result.ResultMetadata(metadata_fpath, autoconnect=True) >>> wgt = make_metadata_custom_api(metadata) >>> ut.quit_if_noshow() >>> wgt.show() >>> wgt.raise_() >>> guitool.qtapp_loop(wgt, frequency=100) """ import guitool from guitool.__PYQT__ import QtCore class MetadataViewer(guitool.APIItemWidget): def __init__(wgt, parent=None, tblnice='Result Metadata Viewer', **kwargs): guitool.APIItemWidget.__init__(wgt, parent=parent, tblnice=tblnice, **kwargs) wgt.connect_signals_and_slots() @guitool.slot_(QtCore.QModelIndex) def _on_doubleclick(wgt, qtindex): print('[wgt] _on_doubleclick: ') col = qtindex.column() if wgt.api.col_edit_list[col]: print('do nothing special for editable columns') return model = qtindex.model() colname = model.get_header_name(col) if colname.endswith('fpath'): print('showing fpath') fpath = model.get_header_data(colname, qtindex) ut.startfile(fpath) def connect_signals_and_slots(wgt): #wgt.view.clicked.connect(wgt._on_click) wgt.view.doubleClicked.connect(wgt._on_doubleclick) #wgt.view.pressed.connect(wgt._on_pressed) #wgt.view.activated.connect(wgt._on_activated) guitool.ensure_qapp() #cfgstr_list = metadata col_name_list, column_list = metadata.get_square_data() # Priority of column names colname_priority = ['qaids', 'qx2_gt_rank', 'qx2_gt_timedelta', 'qx2_gf_timedelta', 'analysis_fpath', 'qx2_gt_raw_score', 'qx2_gf_raw_score'] colname_priority += sorted(ut.setdiff_ordered(col_name_list, colname_priority)) sortx = ut.priority_argsort(col_name_list, colname_priority) col_name_list = ut.take(col_name_list, sortx) column_list = ut.take(column_list, sortx) col_lens = list(map(len, column_list)) print('col_name_list = %r' % (col_name_list,)) print('col_lens = %r' % (col_lens,)) assert len(col_lens) > 0, 'no columns' assert col_lens[0] > 0, 'no rows' assert all([len_ == col_lens[0] for len_ in col_lens]), 'inconsistant data' col_types_dict = {} col_getter_dict = dict(zip(col_name_list, column_list)) col_bgrole_dict = {} col_ider_dict = {} col_setter_dict = {} col_nice_dict = {name: name.replace('qx2_', '') for name in col_name_list} col_nice_dict.update({ 'qx2_gt_timedelta': 'GT TimeDelta', 'qx2_gf_timedelta': 'GF TimeDelta', 'qx2_gt_rank': 'GT Rank', }) editable_colnames = [] sortby = 'qaids' def get_thumb_size(): return 128 col_width_dict = {} custom_api = guitool.CustomAPI( col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, get_thumb_size, sort_reverse=True, col_width_dict=col_width_dict, col_nice_dict=col_nice_dict ) #headers = custom_api.make_headers(tblnice='results') #print(ut.dict_str(headers)) wgt = MetadataViewer() wgt.connect_api(custom_api) return wgt