def vizualize_all_threshold_experiments(): thresh_output_dir = join_mkdir(results_root, 'threshold_matches'+hs_configstr) exptx = 0 for exptx in range(len(vsdb_list)): # Database handles. hsA, hsB = vsdb_list[exptx] expt_lbl = [within_lbl, cross_lbl][not hsA is hsB] results_name = get_results_name(hsA, hsB) print(' --- ---') print(' - database ='+results_name+' ---') print(' - expt_lbl ='+expt_lbl+' ---') count2rr_AB = count2rr_list[exptx] if hsA is hsB or not sansgt: # Visualize chips which have a results with a high score thresh_out_dir = join_mkdir(thresh_output_dir, expt_lbl) viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir)
def vizualize_all_threshold_experiments(): thresh_output_dir = join_mkdir(results_root, 'threshold_matches' + hs_configstr) exptx = 0 for exptx in range(len(vsdb_list)): # Database handles. hsA, hsB = vsdb_list[exptx] expt_lbl = [within_lbl, cross_lbl][not hsA is hsB] results_name = get_results_name(hsA, hsB) print(' --- ---') print(' - database =' + results_name + ' ---') print(' - expt_lbl =' + expt_lbl + ' ---') count2rr_AB = count2rr_list[exptx] if hsA is hsB or not sansgt: # Visualize chips which have a results with a high score thresh_out_dir = join_mkdir(thresh_output_dir, expt_lbl) viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir)
def viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir): 'returns database, cx, database cx' import numpy as np valid_cxsB = hsB.cm.get_valid_cxs() num_matching = 0 MATCH_THRESHOLD = __method_2_matchthresh__.get(__METHOD__, 10) results_name = get_results_name(hsA, hsB) print(' * Visualizing threshold matchings ' + results_name + ' give it some time to plot...') threshdb_out_dir = join_mkdir(thresh_out_dir, results_name) # For each query run in hsA vs hsB for count in xrange(len(count2rr_AB)): rr = count2rr_AB[count] qcx = rr.qcx res = QueryResult(hsB, rr, hsA) qname = res.qhs.cm.cx2_name(qcx) # Set matching threshold res.top_thresh = MATCH_THRESHOLD res.num_top_min = 0 res.num_top_max = 5 res.num_extra_return = 0 # Check to see if any matched over the threshold top_cxs = res.top_cx() top_names = res.hs.cm.cx2_name(top_cxs) top_scores = res.scores()[top_cxs] if len(top_cxs) > 0: # Visualize the result # Stupid segfaults #if qcx == 113: #41: #import IPython #IPython.embed() # Get the scores num_matching += 1 res.visualize() # Create a filename showing dataset, score, cx, match names matchname_set = \ set([name.replace('Lionfish','') for name in (top_names+[qname])]) matchnames = '-'.join(list(matchname_set)) scorestr = str(int(round(top_scores[0]))) fig_fname = '-'.join([ results_name, 'score' + scorestr, 'cx' + str(qcx), 'MATCHES' + matchnames ]) + '.jpg' fig = myfigure(0) fig_fpath = join(threshdb_out_dir, fig_fname) sys.stdout.write('.') safe_savefig(fig, fig_fpath) print(' * Visualized %d above thresh: %f from expt: %s ' % (num_matching, MATCH_THRESHOLD, results_name))
def viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir): 'returns database, cx, database cx' import numpy as np valid_cxsB = hsB.cm.get_valid_cxs() num_matching = 0 MATCH_THRESHOLD = __method_2_matchthresh__.get(__METHOD__, 10) results_name = get_results_name(hsA, hsB) print(' * Visualizing threshold matchings '+results_name+' give it some time to plot...') threshdb_out_dir = join_mkdir(thresh_out_dir, results_name) # For each query run in hsA vs hsB for count in xrange(len(count2rr_AB)): rr = count2rr_AB[count] qcx = rr.qcx res = QueryResult(hsB, rr, hsA) qname = res.qhs.cm.cx2_name(qcx) # Set matching threshold res.top_thresh = MATCH_THRESHOLD res.num_top_min = 0 res.num_top_max = 5 res.num_extra_return = 0 # Check to see if any matched over the threshold top_cxs = res.top_cx() top_names = res.hs.cm.cx2_name(top_cxs) top_scores = res.scores()[top_cxs] if len(top_cxs) > 0: # Visualize the result # Stupid segfaults #if qcx == 113: #41: #import IPython #IPython.embed() # Get the scores num_matching += 1 res.visualize() # Create a filename showing dataset, score, cx, match names matchname_set = \ set([name.replace('Lionfish','') for name in (top_names+[qname])]) matchnames = '-'.join(list(matchname_set)) scorestr = str(int(round(top_scores[0]))) fig_fname = '-'.join([results_name, 'score'+scorestr, 'cx'+str(qcx), 'MATCHES'+matchnames])+'.jpg' fig = myfigure(0) fig_fpath = join(threshdb_out_dir, fig_fname) sys.stdout.write('.') safe_savefig(fig, fig_fpath) print(' * Visualized %d above thresh: %f from expt: %s ' % (num_matching, MATCH_THRESHOLD, results_name))
def vizualize_all_chipscores(): chipscore_dir = join_mkdir(results_root, 'chipscore_frequencies' + hs_configstr) if sansgt: Rank1TNFig = myfigure( 200, doclf=True, title='Frequency of true negative chip scores') CrossAndTNFig = myfigure( 400, doclf=True, title='True negatives and cross database queries') CrossFig = myfigure( 300, doclf=True, title='Frequency of all cross-database chip scores') elif not sansgt: AllTPFig = myfigure(100, doclf=True, title='Frequency of true positive chip scores') cross_db_scores = [] true_pos_scores = [] cmap = plt.get_cmap('Set1') for exptx in range(total_expts): # Database handles. hsA, hsB = vsdb_list[exptx] expt_lbl = [within_lbl, cross_lbl][not hsA is hsB] expt_color = cmap(exptx / float(total_expts)) results_name = get_results_name(hsA, hsB) results_name_abrev = get_results_name(hsA, hsB, abrev=True) print(' --- ---') print(' - database =' + results_name + ' ---') print(' - expt_lbl =' + expt_lbl + ' ---') count2rr_AB = count2rr_list[exptx] print(' * Visualizing chip score frequencies ' + results_name) chipscore_data, ischipscore_TP = get_chipscores( hsA, hsB, count2rr_AB) txt_output_fpath = sanatize_fpath(join(results_root, results_name_abrev+\ '_allscores_'+\ results_configstr+'.txt')) write_all_chipscore_results(hsA, hsB, count2rr_AB, txt_output_fpath) if sansgt and hsA is hsB: print(' * rank1 not TP') # First true negative - within db (sansgt) viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP, fig=Rank1TNFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank=1, not TP') viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP, fig=CrossAndTNFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank=1, not TP') elif sansgt and not hsA is hsB: print(' * Rank <=__RESTRICT_TP__') top_scores = chipscore_data[:, 0] cross_db_scores.append(top_scores) viz_chipscores(chipscore_data, fig=CrossAndTNFig, color=expt_color, holdon=True, labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__) if not sansgt and hsA is hsB: # ALL TRUE POSITIVES - within a database print(' * Top __RESTRICT_TP__ true positive chipscores') top_scores = chipscore_data[:, 0:__RESTRICT_TP__] top_mask = ischipscore_TP[:, 0:__RESTRICT_TP__] true_pos_scores.append(top_scores[top_mask]) viz_chipscores(chipscore_data, chipscore_mask=ischipscore_TP, fig=AllTPFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank<=%d, TP' % __RESTRICT_TP__) if sansgt and not hsA is hsB: # Highest Interdatabase matches for each combination of db print(' * Top __RESTRICT_TP__ cross database chipscores') top_scores = chipscore_data[:, 0] cross_db_scores.append(top_scores) viz_chipscores(chipscore_data, fig=CrossFig, color=expt_color, holdon=True, labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__) if sansgt: print(' * Saving sansgt') figfpath = join(chipscore_dir, within_lbl + '-rank1-chipscore') safe_savefig(Rank1TNFig, figfpath, adjust_axes=True, trunc_max=100) figfpath = join(chipscore_dir, within_lbl + '-and-cross-chipscore') safe_savefig(CrossAndTNFig, figfpath, adjust_axes=True, trunc_max=100) safe_savefig(CrossFig, join(chipscore_dir, 'crossdb-all-chipscores'), adjust_axes=True, trunc_max=100) elif not sansgt: print(' * Saving gt') if len(cross_db_scores) > 0: highest_cdscores = np.sort( np.hstack(cross_db_scores))[::-1][0:20] for c in highest_cdscores: print('There are %d/%d TPs with scores less than %d' % (np.sum(np.hstack(true_pos_scores) < c), np.hstack(true_pos_scores).size, c)) # Finalize Plots and save safe_savefig( AllTPFig, join(chipscore_dir, within_lbl + '-top%dtp-chipscore' % __RESTRICT_TP__), adjust_axes=True, trunc_max=1000)
if dbtup[0] is dbtup[1]: print(' %d --- sx%d - %s vs self' % (dbx, symx, dbtup[0].get_dbid())) else: print(' %d --- sx%d - %s vs %s' % (dbx, symx, dbtup[0].get_dbid(), dbtup[1].get_dbid())) print_vsdb(vsdb_list, sym_list) if __SANSGT__: print( '--- Database versus list __SANSGT__ ---\n DBX --- SYMX - hsA vs hsB ' ) print_vsdb(vsdb_sansgt_list, sym_sansgt_list) # Dependents of parameters results_root = join_mkdir('Results') if not __cmd_mode__: # Compute / Load all query results. Then visualize print('Viz GT') visualize_all_results(vsdb_list, count2rr_list, sym_list, results_root, sansgt=False) if __SANSGT__: print('Viz No GT') visualize_all_results(vsdb_sansgt_list, count2rr_list_sansgt, sym_sansgt_list, results_root,
def vizualize_all_chipscores(): chipscore_dir = join_mkdir(results_root, 'chipscore_frequencies'+hs_configstr) if sansgt: Rank1TNFig = myfigure(200, doclf=True, title='Frequency of true negative chip scores') CrossAndTNFig = myfigure(400, doclf=True, title='True negatives and cross database queries') CrossFig = myfigure(300, doclf=True, title='Frequency of all cross-database chip scores') elif not sansgt: AllTPFig = myfigure(100, doclf=True, title='Frequency of true positive chip scores') cross_db_scores = [] true_pos_scores = [] cmap = plt.get_cmap('Set1') for exptx in range(total_expts): # Database handles. hsA, hsB = vsdb_list[exptx] expt_lbl = [within_lbl, cross_lbl][not hsA is hsB] expt_color = cmap(exptx/float(total_expts)) results_name = get_results_name(hsA, hsB) results_name_abrev = get_results_name(hsA, hsB, abrev=True) print(' --- ---') print(' - database ='+results_name+' ---') print(' - expt_lbl ='+expt_lbl+' ---') count2rr_AB = count2rr_list[exptx] print(' * Visualizing chip score frequencies '+results_name) chipscore_data, ischipscore_TP = get_chipscores(hsA, hsB, count2rr_AB) txt_output_fpath = sanatize_fpath(join(results_root, results_name_abrev+\ '_allscores_'+\ results_configstr+'.txt')) write_all_chipscore_results(hsA, hsB, count2rr_AB, txt_output_fpath) if sansgt and hsA is hsB: print(' * rank1 not TP') # First true negative - within db (sansgt) viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP, fig=Rank1TNFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank=1, not TP') viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP, fig=CrossAndTNFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank=1, not TP') elif sansgt and not hsA is hsB: print(' * Rank <=__RESTRICT_TP__') top_scores = chipscore_data[:,0] cross_db_scores.append(top_scores) viz_chipscores(chipscore_data, fig=CrossAndTNFig, color=expt_color, holdon=True, labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__) if not sansgt and hsA is hsB: # ALL TRUE POSITIVES - within a database print(' * Top __RESTRICT_TP__ true positive chipscores') top_scores = chipscore_data[:,0:__RESTRICT_TP__] top_mask = ischipscore_TP[:,0:__RESTRICT_TP__] true_pos_scores.append(top_scores[top_mask]) viz_chipscores(chipscore_data, chipscore_mask=ischipscore_TP, fig=AllTPFig, holdon=True, color=expt_color, labelaug=results_name, conditions='Rank<=%d, TP' % __RESTRICT_TP__) if sansgt and not hsA is hsB: # Highest Interdatabase matches for each combination of db print(' * Top __RESTRICT_TP__ cross database chipscores') top_scores = chipscore_data[:,0] cross_db_scores.append(top_scores) viz_chipscores(chipscore_data, fig=CrossFig, color=expt_color, holdon=True, labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__) if sansgt: print(' * Saving sansgt') figfpath=join(chipscore_dir, within_lbl+'-rank1-chipscore') safe_savefig(Rank1TNFig,figfpath, adjust_axes=True,trunc_max=100) figfpath=join(chipscore_dir, within_lbl+'-and-cross-chipscore') safe_savefig(CrossAndTNFig,figfpath, adjust_axes=True,trunc_max=100) safe_savefig(CrossFig, join(chipscore_dir, 'crossdb-all-chipscores'), adjust_axes=True, trunc_max=100) elif not sansgt: print(' * Saving gt') if len(cross_db_scores) > 0: highest_cdscores = np.sort(np.hstack(cross_db_scores))[::-1][0:20] for c in highest_cdscores: print('There are %d/%d TPs with scores less than %d' % (np.sum(np.hstack(true_pos_scores) < c ), np.hstack(true_pos_scores).size, c)) # Finalize Plots and save safe_savefig(AllTPFig, join(chipscore_dir, within_lbl+'-top%dtp-chipscore' % __RESTRICT_TP__), adjust_axes=True, trunc_max=1000)
# List the database list we are running on def print_vsdb(vsdb_list, sym_list): print('--- Database versus list GT ---\n DBX --- SYMX - hsA vs hsB ') for dbx, (dbtup, symx) in enumerate(zip(vsdb_list, sym_list)): if dbtup[0] is dbtup[1]: print(' %d --- sx%d - %s vs self' % (dbx, symx, dbtup[0].get_dbid()) ) else: print(' %d --- sx%d - %s vs %s' % (dbx, symx, dbtup[0].get_dbid(), dbtup[1].get_dbid()) ) print_vsdb(vsdb_list, sym_list) if __SANSGT__: print('--- Database versus list __SANSGT__ ---\n DBX --- SYMX - hsA vs hsB ') print_vsdb(vsdb_sansgt_list, sym_sansgt_list) # Dependents of parameters results_root = join_mkdir('Results') if not __cmd_mode__: # Compute / Load all query results. Then visualize print('Viz GT') visualize_all_results(vsdb_list, count2rr_list, sym_list, results_root, sansgt=False) if __SANSGT__: print('Viz No GT') visualize_all_results(vsdb_sansgt_list, count2rr_list_sansgt, sym_sansgt_list, results_root,