示例#1
0
 def vizualize_all_threshold_experiments():
     thresh_output_dir = join_mkdir(results_root, 'threshold_matches'+hs_configstr)
     exptx = 0
     for exptx in range(len(vsdb_list)):
         # Database handles.
         hsA, hsB = vsdb_list[exptx]
         expt_lbl   = [within_lbl, cross_lbl][not hsA is hsB]
         results_name = get_results_name(hsA, hsB) 
         print('    --- ---')
         print('      - database  ='+results_name+' ---')
         print('      - expt_lbl  ='+expt_lbl+' ---')
         count2rr_AB = count2rr_list[exptx]
         if hsA is hsB or not sansgt:
             # Visualize chips which have a results with a high score
             thresh_out_dir = join_mkdir(thresh_output_dir, expt_lbl)
             viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir)
示例#2
0
 def vizualize_all_threshold_experiments():
     thresh_output_dir = join_mkdir(results_root,
                                    'threshold_matches' + hs_configstr)
     exptx = 0
     for exptx in range(len(vsdb_list)):
         # Database handles.
         hsA, hsB = vsdb_list[exptx]
         expt_lbl = [within_lbl, cross_lbl][not hsA is hsB]
         results_name = get_results_name(hsA, hsB)
         print('    --- ---')
         print('      - database  =' + results_name + ' ---')
         print('      - expt_lbl  =' + expt_lbl + ' ---')
         count2rr_AB = count2rr_list[exptx]
         if hsA is hsB or not sansgt:
             # Visualize chips which have a results with a high score
             thresh_out_dir = join_mkdir(thresh_output_dir, expt_lbl)
             viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir)
示例#3
0
def viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir):
    'returns database, cx, database cx'
    import numpy as np
    valid_cxsB = hsB.cm.get_valid_cxs()
    num_matching = 0

    MATCH_THRESHOLD = __method_2_matchthresh__.get(__METHOD__, 10)
    results_name = get_results_name(hsA, hsB)
    print('  * Visualizing threshold matchings ' + results_name +
          ' give it some time to plot...')
    threshdb_out_dir = join_mkdir(thresh_out_dir, results_name)
    # For each query run in hsA vs hsB
    for count in xrange(len(count2rr_AB)):
        rr = count2rr_AB[count]
        qcx = rr.qcx
        res = QueryResult(hsB, rr, hsA)
        qname = res.qhs.cm.cx2_name(qcx)
        # Set matching threshold
        res.top_thresh = MATCH_THRESHOLD
        res.num_top_min = 0
        res.num_top_max = 5
        res.num_extra_return = 0
        # Check to see if any matched over the threshold
        top_cxs = res.top_cx()
        top_names = res.hs.cm.cx2_name(top_cxs)
        top_scores = res.scores()[top_cxs]
        if len(top_cxs) > 0:
            # Visualize the result
            # Stupid segfaults #if qcx == 113: #41: #import IPython #IPython.embed() # Get the scores
            num_matching += 1
            res.visualize()
            # Create a filename showing dataset, score, cx, match names
            matchname_set = \
                    set([name.replace('Lionfish','') for name in (top_names+[qname])])
            matchnames = '-'.join(list(matchname_set))
            scorestr = str(int(round(top_scores[0])))

            fig_fname = '-'.join([
                results_name, 'score' + scorestr, 'cx' + str(qcx),
                'MATCHES' + matchnames
            ]) + '.jpg'

            fig = myfigure(0)
            fig_fpath = join(threshdb_out_dir, fig_fname)
            sys.stdout.write('.')
            safe_savefig(fig, fig_fpath)
    print('  * Visualized %d above thresh: %f from expt: %s ' %
          (num_matching, MATCH_THRESHOLD, results_name))
示例#4
0
def viz_threshold_matchings(hsA, hsB, count2rr_AB, thresh_out_dir):
    'returns database, cx, database cx'
    import numpy as np
    valid_cxsB = hsB.cm.get_valid_cxs()
    num_matching = 0
    
    MATCH_THRESHOLD = __method_2_matchthresh__.get(__METHOD__, 10)
    results_name = get_results_name(hsA, hsB)
    print('  * Visualizing threshold matchings '+results_name+' give it some time to plot...')
    threshdb_out_dir = join_mkdir(thresh_out_dir, results_name)
    # For each query run in hsA vs hsB
    for count in xrange(len(count2rr_AB)):
        rr    = count2rr_AB[count]
        qcx   = rr.qcx
        res   = QueryResult(hsB, rr, hsA)
        qname = res.qhs.cm.cx2_name(qcx)
        # Set matching threshold
        res.top_thresh       = MATCH_THRESHOLD
        res.num_top_min      = 0
        res.num_top_max      = 5
        res.num_extra_return = 0
        # Check to see if any matched over the threshold
        top_cxs    = res.top_cx()
        top_names  = res.hs.cm.cx2_name(top_cxs)
        top_scores = res.scores()[top_cxs]
        if len(top_cxs) > 0:
            # Visualize the result
            # Stupid segfaults #if qcx == 113: #41: #import IPython #IPython.embed() # Get the scores
            num_matching += 1
            res.visualize()
            # Create a filename showing dataset, score, cx, match names
            matchname_set = \
                    set([name.replace('Lionfish','') for name in (top_names+[qname])])
            matchnames = '-'.join(list(matchname_set))
            scorestr = str(int(round(top_scores[0])))

            fig_fname = '-'.join([results_name,
                                 'score'+scorestr,
                                 'cx'+str(qcx),
                                 'MATCHES'+matchnames])+'.jpg'

            fig = myfigure(0)
            fig_fpath = join(threshdb_out_dir, fig_fname)
            sys.stdout.write('.')
            safe_savefig(fig, fig_fpath)
    print('  * Visualized %d above thresh: %f from expt: %s ' % (num_matching,
                                                                 MATCH_THRESHOLD,
                                                                 results_name))
示例#5
0
    def vizualize_all_chipscores():
        chipscore_dir = join_mkdir(results_root,
                                   'chipscore_frequencies' + hs_configstr)
        if sansgt:
            Rank1TNFig = myfigure(
                200,
                doclf=True,
                title='Frequency of true negative chip scores')
            CrossAndTNFig = myfigure(
                400,
                doclf=True,
                title='True negatives and cross database queries')
            CrossFig = myfigure(
                300,
                doclf=True,
                title='Frequency of all cross-database chip scores')

        elif not sansgt:
            AllTPFig = myfigure(100,
                                doclf=True,
                                title='Frequency of true positive chip scores')
        cross_db_scores = []
        true_pos_scores = []
        cmap = plt.get_cmap('Set1')
        for exptx in range(total_expts):
            # Database handles.
            hsA, hsB = vsdb_list[exptx]
            expt_lbl = [within_lbl, cross_lbl][not hsA is hsB]
            expt_color = cmap(exptx / float(total_expts))

            results_name = get_results_name(hsA, hsB)
            results_name_abrev = get_results_name(hsA, hsB, abrev=True)
            print('    --- ---')
            print('      - database  =' + results_name + ' ---')
            print('      - expt_lbl  =' + expt_lbl + ' ---')

            count2rr_AB = count2rr_list[exptx]
            print('    * Visualizing chip score frequencies ' + results_name)
            chipscore_data, ischipscore_TP = get_chipscores(
                hsA, hsB, count2rr_AB)

            txt_output_fpath = sanatize_fpath(join(results_root,
                                                   results_name_abrev+\
                                                   '_allscores_'+\
                                                   results_configstr+'.txt'))
            write_all_chipscore_results(hsA, hsB, count2rr_AB,
                                        txt_output_fpath)

            if sansgt and hsA is hsB:
                print('      * rank1 not TP')
                # First true negative - within db (sansgt)
                viz_chipscores(chipscore_data,
                               chipscore_mask=True - ischipscore_TP,
                               fig=Rank1TNFig,
                               holdon=True,
                               color=expt_color,
                               labelaug=results_name,
                               conditions='Rank=1, not TP')

                viz_chipscores(chipscore_data,
                               chipscore_mask=True - ischipscore_TP,
                               fig=CrossAndTNFig,
                               holdon=True,
                               color=expt_color,
                               labelaug=results_name,
                               conditions='Rank=1, not TP')

            elif sansgt and not hsA is hsB:
                print('      * Rank <=__RESTRICT_TP__')
                top_scores = chipscore_data[:, 0]
                cross_db_scores.append(top_scores)
                viz_chipscores(chipscore_data,
                               fig=CrossAndTNFig,
                               color=expt_color,
                               holdon=True,
                               labelaug=results_name,
                               conditions='Rank<=%d' % __RESTRICT_TP__)

            if not sansgt and hsA is hsB:
                # ALL TRUE POSITIVES - within a database
                print('      * Top __RESTRICT_TP__ true positive chipscores')
                top_scores = chipscore_data[:, 0:__RESTRICT_TP__]
                top_mask = ischipscore_TP[:, 0:__RESTRICT_TP__]
                true_pos_scores.append(top_scores[top_mask])
                viz_chipscores(chipscore_data,
                               chipscore_mask=ischipscore_TP,
                               fig=AllTPFig,
                               holdon=True,
                               color=expt_color,
                               labelaug=results_name,
                               conditions='Rank<=%d, TP' % __RESTRICT_TP__)
            if sansgt and not hsA is hsB:
                # Highest Interdatabase matches for each combination of db
                print('      * Top __RESTRICT_TP__ cross database chipscores')
                top_scores = chipscore_data[:, 0]
                cross_db_scores.append(top_scores)
                viz_chipscores(chipscore_data,
                               fig=CrossFig,
                               color=expt_color,
                               holdon=True,
                               labelaug=results_name,
                               conditions='Rank<=%d' % __RESTRICT_TP__)

        if sansgt:
            print('  * Saving sansgt')
            figfpath = join(chipscore_dir, within_lbl + '-rank1-chipscore')
            safe_savefig(Rank1TNFig, figfpath, adjust_axes=True, trunc_max=100)

            figfpath = join(chipscore_dir, within_lbl + '-and-cross-chipscore')
            safe_savefig(CrossAndTNFig,
                         figfpath,
                         adjust_axes=True,
                         trunc_max=100)
            safe_savefig(CrossFig,
                         join(chipscore_dir, 'crossdb-all-chipscores'),
                         adjust_axes=True,
                         trunc_max=100)

        elif not sansgt:
            print('  * Saving gt')
            if len(cross_db_scores) > 0:
                highest_cdscores = np.sort(
                    np.hstack(cross_db_scores))[::-1][0:20]
                for c in highest_cdscores:
                    print('There are %d/%d TPs with scores less than %d' %
                          (np.sum(np.hstack(true_pos_scores) < c),
                           np.hstack(true_pos_scores).size, c))

            # Finalize Plots and save
            safe_savefig(
                AllTPFig,
                join(chipscore_dir,
                     within_lbl + '-top%dtp-chipscore' % __RESTRICT_TP__),
                adjust_axes=True,
                trunc_max=1000)
示例#6
0
            if dbtup[0] is dbtup[1]:
                print('     %d --- sx%d - %s vs self' %
                      (dbx, symx, dbtup[0].get_dbid()))
            else:
                print('     %d --- sx%d - %s vs %s' %
                      (dbx, symx, dbtup[0].get_dbid(), dbtup[1].get_dbid()))

    print_vsdb(vsdb_list, sym_list)
    if __SANSGT__:
        print(
            '--- Database versus list __SANSGT__ ---\n   DBX --- SYMX - hsA vs hsB '
        )
        print_vsdb(vsdb_sansgt_list, sym_sansgt_list)

    # Dependents of parameters
    results_root = join_mkdir('Results')

    if not __cmd_mode__:
        # Compute / Load all query results. Then visualize
        print('Viz GT')
        visualize_all_results(vsdb_list,
                              count2rr_list,
                              sym_list,
                              results_root,
                              sansgt=False)
        if __SANSGT__:
            print('Viz No GT')
            visualize_all_results(vsdb_sansgt_list,
                                  count2rr_list_sansgt,
                                  sym_sansgt_list,
                                  results_root,
示例#7
0
    def vizualize_all_chipscores():
        chipscore_dir = join_mkdir(results_root, 'chipscore_frequencies'+hs_configstr)
        if sansgt:
                Rank1TNFig = myfigure(200, doclf=True,
                                    title='Frequency of true negative chip scores')
                CrossAndTNFig = myfigure(400, doclf=True,
                                    title='True negatives and cross database queries')
                CrossFig   = myfigure(300, doclf=True,
                                    title='Frequency of all cross-database chip scores')

        elif not sansgt:
            AllTPFig   = myfigure(100, doclf=True,
                                title='Frequency of true positive chip scores')
        cross_db_scores = []
        true_pos_scores = []
        cmap = plt.get_cmap('Set1')
        for exptx in range(total_expts):
            # Database handles.
            hsA, hsB = vsdb_list[exptx]
            expt_lbl   = [within_lbl, cross_lbl][not hsA is hsB]
            expt_color = cmap(exptx/float(total_expts))
            
            results_name = get_results_name(hsA, hsB) 
            results_name_abrev = get_results_name(hsA, hsB, abrev=True) 
            print('    --- ---')
            print('      - database  ='+results_name+' ---')
            print('      - expt_lbl  ='+expt_lbl+' ---')
            
            count2rr_AB = count2rr_list[exptx]
            print('    * Visualizing chip score frequencies '+results_name)
            chipscore_data, ischipscore_TP = get_chipscores(hsA, hsB, count2rr_AB)

            txt_output_fpath = sanatize_fpath(join(results_root,
                                                   results_name_abrev+\
                                                   '_allscores_'+\
                                                   results_configstr+'.txt'))
            write_all_chipscore_results(hsA, hsB, count2rr_AB, txt_output_fpath)

            if sansgt and hsA is hsB:
                print('      * rank1 not TP')
                # First true negative - within db (sansgt)
                viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP,
                                fig=Rank1TNFig, holdon=True,
                                color=expt_color, labelaug=results_name,
                                conditions='Rank=1, not TP')

                viz_chipscores(chipscore_data, chipscore_mask=True - ischipscore_TP,
                                fig=CrossAndTNFig, holdon=True,
                                color=expt_color, labelaug=results_name,
                                conditions='Rank=1, not TP')

            elif sansgt and not hsA is hsB:
                print('      * Rank <=__RESTRICT_TP__')
                top_scores = chipscore_data[:,0]
                cross_db_scores.append(top_scores)
                viz_chipscores(chipscore_data, fig=CrossAndTNFig,
                                color=expt_color, holdon=True,
                                labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__)

            if not sansgt and hsA is hsB:
                # ALL TRUE POSITIVES - within a database
                print('      * Top __RESTRICT_TP__ true positive chipscores')
                top_scores = chipscore_data[:,0:__RESTRICT_TP__]
                top_mask   = ischipscore_TP[:,0:__RESTRICT_TP__]
                true_pos_scores.append(top_scores[top_mask])
                viz_chipscores(chipscore_data, chipscore_mask=ischipscore_TP,
                                fig=AllTPFig, holdon=True,
                                color=expt_color, labelaug=results_name,
                                conditions='Rank<=%d, TP' % __RESTRICT_TP__)
            if sansgt and not hsA is hsB:
                # Highest Interdatabase matches for each combination of db
                print('      * Top __RESTRICT_TP__ cross database chipscores')
                top_scores = chipscore_data[:,0]
                cross_db_scores.append(top_scores)
                viz_chipscores(chipscore_data, fig=CrossFig,
                                color=expt_color, holdon=True,
                                labelaug=results_name, conditions='Rank<=%d' % __RESTRICT_TP__)

        if sansgt:
            print('  * Saving sansgt')
            figfpath=join(chipscore_dir, within_lbl+'-rank1-chipscore')
            safe_savefig(Rank1TNFig,figfpath, adjust_axes=True,trunc_max=100)

            figfpath=join(chipscore_dir, within_lbl+'-and-cross-chipscore')
            safe_savefig(CrossAndTNFig,figfpath,
adjust_axes=True,trunc_max=100)
            safe_savefig(CrossFig,
                        join(chipscore_dir, 'crossdb-all-chipscores'),
                        adjust_axes=True, trunc_max=100)
            
        elif not sansgt:
            print('  * Saving gt')
            if len(cross_db_scores) > 0:
                highest_cdscores = np.sort(np.hstack(cross_db_scores))[::-1][0:20]
                for c in highest_cdscores:
                    print('There are %d/%d TPs with scores less than %d' %
                        (np.sum(np.hstack(true_pos_scores) < c ),
                        np.hstack(true_pos_scores).size, c))

            # Finalize Plots and save
            safe_savefig(AllTPFig,
                        join(chipscore_dir, within_lbl+'-top%dtp-chipscore' % __RESTRICT_TP__),
                        adjust_axes=True, trunc_max=1000)
示例#8
0
    # List the database list we are running on
    def print_vsdb(vsdb_list, sym_list):
        print('--- Database versus list GT ---\n   DBX --- SYMX - hsA vs hsB ')
        for dbx, (dbtup, symx) in enumerate(zip(vsdb_list, sym_list)):
            if dbtup[0] is dbtup[1]: 
                print('     %d --- sx%d - %s vs self' %
                    (dbx, symx, dbtup[0].get_dbid()) )
            else:
                print('     %d --- sx%d - %s vs %s' % (dbx, symx, dbtup[0].get_dbid(), dbtup[1].get_dbid()) )
    print_vsdb(vsdb_list, sym_list)
    if __SANSGT__:
        print('--- Database versus list __SANSGT__ ---\n   DBX --- SYMX - hsA vs hsB ')
        print_vsdb(vsdb_sansgt_list, sym_sansgt_list)

    # Dependents of parameters 
    results_root = join_mkdir('Results')
    
    if not __cmd_mode__:
        # Compute / Load all query results. Then visualize
        print('Viz GT')
        visualize_all_results(vsdb_list,
                              count2rr_list,
                              sym_list,
                              results_root,
                              sansgt=False)
        if __SANSGT__:
            print('Viz No GT')
            visualize_all_results(vsdb_sansgt_list,
                                count2rr_list_sansgt,
                                sym_sansgt_list, 
                                results_root,