def get_dist(mtx, biom_data):
    cols = [i['id'] for i in biom_data['columns']]
    if args.create_splits:
        dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False)
        dtvar_bc = dt.dist_bray_curtis(mtx, strict=False)
        dtvar_mh = dt.dist_morisita_horn(mtx, strict=False)
        dtvar_cb = dt.dist_canberra(mtx, strict=False)
        dtvar_kz = dt.dist_kulczynski(mtx, strict=False)

        # there are 10 combos
        np_jc_l = np.tril(dtvar_jc)
        np_bc_u = np.triu(dtvar_bc)
        np_mh_u = np.triu(dtvar_mh)
        np_mh_l = np.tril(dtvar_mh)
        np_cb_u = np.triu(dtvar_cb)
        np_cb_l = np.tril(dtvar_cb)
        np_kz_u = np.triu(dtvar_kz)
        np_kz_l = np.tril(dtvar_kz)
        matrices = {
            'jc_kz': np_jc_l + np_kz_u,
            'jc_cb': np_jc_l + np_cb_u,
            'jc_mh': np_jc_l + np_mh_u,
            'jc_bc': np_jc_l + np_bc_u,
            'kz_cb': np_kz_l + np_cb_u,
            'kz_mh': np_kz_l + np_mh_u,
            'kz_bc': np_kz_l + np_bc_u,
            'cb_mh': np_cb_l + np_mh_u,
            'cb_bc': np_cb_l + np_bc_u,
            'mh_bc': np_mh_l + np_bc_u
        }
        #print(matrices['cb_mh'])
        # convention: first(left) is lower left on heatmap
        for spl in matrices:
            df = pd.DataFrame(data=matrices[spl], index=cols, columns=cols)
            #dict = df.to_dict()
            out_file = os.path.join(args.basedir,
                                    args.prefix + '_distance_' + spl + '.tsv')
            df.to_csv(out_file, sep='\t', encoding='utf-8')

        #print(dict)
        #sys.exit()
        return {}
    else:
        if args.metric == 'bray_curtis':
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
        elif args.metric == 'morisita_horn':
            dtvar = dt.dist_morisita_horn(mtx, strict=False)
        elif args.metric == 'canberra':
            dtvar = dt.dist_canberra(mtx, strict=False)
        elif args.metric == 'jaccard':
            dtvar = dt.binary_dist_jaccard(mtx, strict=False)
        elif args.metric == 'kulczynski':
            dtvar = dt.dist_kulczynski(mtx, strict=False)
        else:  # default
            dtvar = dt.dist_bray_curtis(mtx, strict=False)

        #sys.exit()
        dist = distance.squareform(dtvar)
        return dist
Пример #2
0
 def test_dist_canberra_bug(self):
     i = array([[0, 0, 1], [0, 1, 1]])
     d = (1.0 / 2.0) * sum(
         [abs(0.0 - 1.0) / (0.0 + 1.0),
          abs(1.0 - 1.0) / (1.0 + 1.0)])
     expected = array([[0.0, d], [d, 0.0]])
     actual = dist_canberra(i)
     assert_allclose(expected, actual)
Пример #3
0
    def test_dist_canberra(self):
        """tests dist_canberra

        tests inputs of empty mtx, zeros, and results compared with calcs done
        by hand"""

        self.assertFloatEqual(dist_canberra(self.zeromtx), zeros((4, 4), "d"))

        mtx1expected = array([[0.0, 46.2 / 52.2], [46.2 / 52.2, 0.0]], "d")
        self.assertFloatEqual(dist_canberra(self.mtx1), mtx1expected)

        sparse1exp = ones((self.sparse1.shape[0], self.sparse1.shape[0]))
        # remove diagonal
        sparse1exp[0, 0] = sparse1exp[1, 1] = sparse1exp[2, 2] = sparse1exp[3, 3] = 0.0

        sparse1exp[0, 1] = sparse1exp[1, 0] = (5.33 - 0.4) / (5.33 + 0.4)
        self.assertFloatEqual(dist_canberra(self.sparse1), sparse1exp)
Пример #4
0
def get_dist(metric, mtx):
    if metric == 'bray_curtis':
        dtvar = dt.dist_bray_curtis(mtx, strict=False)
    elif metric == 'morisita_horn':
        dtvar = dt.dist_morisita_horn(mtx, strict=False)
    elif metric == 'canberra':
        dtvar = dt.dist_canberra(mtx, strict=False)
    elif metric == 'jaccard':
        dtvar = dt.binary_dist_jaccard(mtx, strict=False)
    elif metric == 'kulczynski':
        dtvar = dt.dist_kulczynski(mtx, strict=False)
    else:  # default
        dtvar = dt.dist_bray_curtis(mtx, strict=False)

    dist = distance.squareform(dtvar)
    return dist
Пример #5
0
def get_dist(metric, mtx):
    if metric == 'bray_curtis':
        dtvar = dt.dist_bray_curtis(mtx, strict=False)
    elif metric == 'morisita_horn':
        dtvar = dt.dist_morisita_horn(mtx, strict=False)
    elif metric == 'canberra':
        dtvar = dt.dist_canberra(mtx, strict=False)
    elif metric == 'jaccard':
        dtvar = dt.binary_dist_jaccard(mtx, strict=False)
    elif metric == 'kulczynski':
        dtvar = dt.dist_kulczynski(mtx, strict=False)
    else:  # default
        dtvar = dt.dist_bray_curtis(mtx, strict=False)

    dist = distance.squareform( dtvar )
    return dist
def get_dist(mtx, biom_data):
    cols = [ i['id'] for i in biom_data['columns']]
    if args.create_splits:
        dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False)
        dtvar_bc = dt.dist_bray_curtis(mtx, strict=False)        
        dtvar_mh = dt.dist_morisita_horn(mtx, strict=False)
        dtvar_cb = dt.dist_canberra(mtx, strict=False)        
        dtvar_kz = dt.dist_kulczynski(mtx, strict=False)
        
        # there are 10 combos
        np_jc_l = np.tril(dtvar_jc)        
        np_bc_u = np.triu(dtvar_bc)
        np_mh_u = np.triu(dtvar_mh)
        np_mh_l = np.tril(dtvar_mh)        
        np_cb_u = np.triu(dtvar_cb)
        np_cb_l = np.tril(dtvar_cb)        
        np_kz_u = np.triu(dtvar_kz)
        np_kz_l = np.tril(dtvar_kz)
        matrices = {'jc_kz':np_jc_l + np_kz_u,'jc_cb':np_jc_l + np_cb_u,'jc_mh':np_jc_l + np_mh_u,'jc_bc':np_jc_l + np_bc_u ,'kz_cb':np_kz_l + np_cb_u,'kz_mh':np_kz_l + np_mh_u,'kz_bc':np_kz_l + np_bc_u,'cb_mh':np_cb_l + np_mh_u,'cb_bc':np_cb_l + np_bc_u,'mh_bc':np_mh_l + np_bc_u}
        #print(matrices['cb_mh'])
        # convention: first(left) is lower left on heatmap
        for spl in matrices:
            df = pd.DataFrame(data=matrices[spl],index=cols,columns=cols)
            #dict = df.to_dict()
            out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.tsv')
            df.to_csv(out_file, sep='\t', encoding='utf-8')
            #out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.json')
            #out_fp = open(out_file,'w')
            #out_fp.write(json.dumps(dict))
            #out_fp.close()
        # dtvar = np_jc_l + np_cb_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_cb.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')
#         
#         
#         dtvar = np_jc_l + np_mh_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')
#         
#         
#         dtvar = np_jc_l + np_bc_u  
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')      
#         
#         
#         dtvar = np_kz_l + np_cb_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_cb.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_kz_l + np_mh_u  
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')      
#         
#         
#         dtvar = np_kz_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_cb_l + np_mh_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_cb_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_mh_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         dict = df.to_dict()
        
        
        #df.to_csv(out_file, sep='\t', encoding='utf-8')         
        
        
        #print(dict)
        #sys.exit()
        return {}
    else:    
        if args.metric == 'bray_curtis':
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
        elif args.metric == 'morisita_horn':
            dtvar = dt.dist_morisita_horn(mtx, strict=False)
        elif args.metric == 'canberra':
            dtvar = dt.dist_canberra(mtx, strict=False)
        elif args.metric == 'jaccard':
            dtvar = dt.binary_dist_jaccard(mtx, strict=False)
        elif args.metric == 'kulczynski':
            dtvar = dt.dist_kulczynski(mtx, strict=False)
        else:  # default
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
    
        #sys.exit()
        dist = distance.squareform( dtvar )
        return dist