def get_dist(mtx, biom_data):
    cols = [i['id'] for i in biom_data['columns']]
    if args.create_splits:
        dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False)
        dtvar_bc = dt.dist_bray_curtis(mtx, strict=False)
        dtvar_mh = dt.dist_morisita_horn(mtx, strict=False)
        dtvar_cb = dt.dist_canberra(mtx, strict=False)
        dtvar_kz = dt.dist_kulczynski(mtx, strict=False)

        # there are 10 combos
        np_jc_l = np.tril(dtvar_jc)
        np_bc_u = np.triu(dtvar_bc)
        np_mh_u = np.triu(dtvar_mh)
        np_mh_l = np.tril(dtvar_mh)
        np_cb_u = np.triu(dtvar_cb)
        np_cb_l = np.tril(dtvar_cb)
        np_kz_u = np.triu(dtvar_kz)
        np_kz_l = np.tril(dtvar_kz)
        matrices = {
            'jc_kz': np_jc_l + np_kz_u,
            'jc_cb': np_jc_l + np_cb_u,
            'jc_mh': np_jc_l + np_mh_u,
            'jc_bc': np_jc_l + np_bc_u,
            'kz_cb': np_kz_l + np_cb_u,
            'kz_mh': np_kz_l + np_mh_u,
            'kz_bc': np_kz_l + np_bc_u,
            'cb_mh': np_cb_l + np_mh_u,
            'cb_bc': np_cb_l + np_bc_u,
            'mh_bc': np_mh_l + np_bc_u
        }
        #print(matrices['cb_mh'])
        # convention: first(left) is lower left on heatmap
        for spl in matrices:
            df = pd.DataFrame(data=matrices[spl], index=cols, columns=cols)
            #dict = df.to_dict()
            out_file = os.path.join(args.basedir,
                                    args.prefix + '_distance_' + spl + '.tsv')
            df.to_csv(out_file, sep='\t', encoding='utf-8')

        #print(dict)
        #sys.exit()
        return {}
    else:
        if args.metric == 'bray_curtis':
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
        elif args.metric == 'morisita_horn':
            dtvar = dt.dist_morisita_horn(mtx, strict=False)
        elif args.metric == 'canberra':
            dtvar = dt.dist_canberra(mtx, strict=False)
        elif args.metric == 'jaccard':
            dtvar = dt.binary_dist_jaccard(mtx, strict=False)
        elif args.metric == 'kulczynski':
            dtvar = dt.dist_kulczynski(mtx, strict=False)
        else:  # default
            dtvar = dt.dist_bray_curtis(mtx, strict=False)

        #sys.exit()
        dist = distance.squareform(dtvar)
        return dist
Пример #2
0
    def test_dist_kulczynski(self):
        """tests dist_kulczynski

        tests inputs of empty mtx, zeros, and mtx1 compared with calcs done
        by hand"""

        assert_allclose(dist_kulczynski(self.zeromtx), zeros((4, 4) * 1, "d"))

        mtx1expected = array(
            [
                [0, 1.0 - 1.0 / 2.0 * (3.0 / 4.0 + 3.0 / 23.1)],
                [1.0 - 1.0 / 2.0 * (3.0 / 4.0 + 3.0 / 23.1), 0],
            ],
            "d",
        )

        assert_allclose(dist_kulczynski(self.mtx1), mtx1expected)
Пример #3
0
def get_dist(metric, mtx):
    if metric == 'bray_curtis':
        dtvar = dt.dist_bray_curtis(mtx, strict=False)
    elif metric == 'morisita_horn':
        dtvar = dt.dist_morisita_horn(mtx, strict=False)
    elif metric == 'canberra':
        dtvar = dt.dist_canberra(mtx, strict=False)
    elif metric == 'jaccard':
        dtvar = dt.binary_dist_jaccard(mtx, strict=False)
    elif metric == 'kulczynski':
        dtvar = dt.dist_kulczynski(mtx, strict=False)
    else:  # default
        dtvar = dt.dist_bray_curtis(mtx, strict=False)

    dist = distance.squareform(dtvar)
    return dist
Пример #4
0
def get_dist(metric, mtx):
    if metric == 'bray_curtis':
        dtvar = dt.dist_bray_curtis(mtx, strict=False)
    elif metric == 'morisita_horn':
        dtvar = dt.dist_morisita_horn(mtx, strict=False)
    elif metric == 'canberra':
        dtvar = dt.dist_canberra(mtx, strict=False)
    elif metric == 'jaccard':
        dtvar = dt.binary_dist_jaccard(mtx, strict=False)
    elif metric == 'kulczynski':
        dtvar = dt.dist_kulczynski(mtx, strict=False)
    else:  # default
        dtvar = dt.dist_bray_curtis(mtx, strict=False)

    dist = distance.squareform( dtvar )
    return dist
def get_dist(mtx, biom_data):
    cols = [ i['id'] for i in biom_data['columns']]
    if args.create_splits:
        dtvar_jc = dt.binary_dist_jaccard(mtx, strict=False)
        dtvar_bc = dt.dist_bray_curtis(mtx, strict=False)        
        dtvar_mh = dt.dist_morisita_horn(mtx, strict=False)
        dtvar_cb = dt.dist_canberra(mtx, strict=False)        
        dtvar_kz = dt.dist_kulczynski(mtx, strict=False)
        
        # there are 10 combos
        np_jc_l = np.tril(dtvar_jc)        
        np_bc_u = np.triu(dtvar_bc)
        np_mh_u = np.triu(dtvar_mh)
        np_mh_l = np.tril(dtvar_mh)        
        np_cb_u = np.triu(dtvar_cb)
        np_cb_l = np.tril(dtvar_cb)        
        np_kz_u = np.triu(dtvar_kz)
        np_kz_l = np.tril(dtvar_kz)
        matrices = {'jc_kz':np_jc_l + np_kz_u,'jc_cb':np_jc_l + np_cb_u,'jc_mh':np_jc_l + np_mh_u,'jc_bc':np_jc_l + np_bc_u ,'kz_cb':np_kz_l + np_cb_u,'kz_mh':np_kz_l + np_mh_u,'kz_bc':np_kz_l + np_bc_u,'cb_mh':np_cb_l + np_mh_u,'cb_bc':np_cb_l + np_bc_u,'mh_bc':np_mh_l + np_bc_u}
        #print(matrices['cb_mh'])
        # convention: first(left) is lower left on heatmap
        for spl in matrices:
            df = pd.DataFrame(data=matrices[spl],index=cols,columns=cols)
            #dict = df.to_dict()
            out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.tsv')
            df.to_csv(out_file, sep='\t', encoding='utf-8')
            #out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_'+spl+'.json')
            #out_fp = open(out_file,'w')
            #out_fp.write(json.dumps(dict))
            #out_fp.close()
        # dtvar = np_jc_l + np_cb_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_cb.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')
#         
#         
#         dtvar = np_jc_l + np_mh_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')
#         
#         
#         dtvar = np_jc_l + np_bc_u  
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_jc_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')      
#         
#         
#         dtvar = np_kz_l + np_cb_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_cb.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_kz_l + np_mh_u  
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8')      
#         
#         
#         dtvar = np_kz_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_kz_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_cb_l + np_mh_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_mh.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_cb_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         out_file       = os.path.join(args.basedir, 'tmp', args.prefix+'_distance_cb_bc.tsv')
#         df.to_csv(out_file, sep='\t', encoding='utf-8') 
#         
#         
#         dtvar = np_mh_l + np_bc_u
#         df = pd.DataFrame(data=dtvar,index=cols,columns=cols)
#         dict = df.to_dict()
        
        
        #df.to_csv(out_file, sep='\t', encoding='utf-8')         
        
        
        #print(dict)
        #sys.exit()
        return {}
    else:    
        if args.metric == 'bray_curtis':
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
        elif args.metric == 'morisita_horn':
            dtvar = dt.dist_morisita_horn(mtx, strict=False)
        elif args.metric == 'canberra':
            dtvar = dt.dist_canberra(mtx, strict=False)
        elif args.metric == 'jaccard':
            dtvar = dt.binary_dist_jaccard(mtx, strict=False)
        elif args.metric == 'kulczynski':
            dtvar = dt.dist_kulczynski(mtx, strict=False)
        else:  # default
            dtvar = dt.dist_bray_curtis(mtx, strict=False)
    
        #sys.exit()
        dist = distance.squareform( dtvar )
        return dist