def main(args=None): # load KDEs object sys.stderr.write('Loading KDEs...\n') KDEs = Utils.load_kde(args['<kde>']) # header if args['-s']: print '\t'.join(['lib_ID', 'taxon_ID', 'KDE_ID', 'min', 'percentile_5', 'percentile_25', 'mean', 'median', 'percentile_75', 'percentile_95', 'max', 'stdev']) # KDE type kde_type = Utils.KDE_type(KDEs) # parsing KDE if kde_type == 1: if args['-n']: print len(KDEs) sys.exit() for x in KDEs: if args['-s']: KDE_dataset_stats(x[1], x[0]) else: print x[0] elif kde_type == 2: if args['-n']: print len(KDEs.keys()) sys.exit() for x,y in KDEs.items(): if args['-s']: KDE_dataset_stats(y, x) else: print x elif kde_type == 3: if args['-n']: print '\t'.join(['library', 'N']) for libID,v in KDEs.items(): print '\t'.join([str(x) for x in [libID, len(v.keys())]]) sys.exit() for x,y in KDEs.items(): for xx,yy in y.items(): if args['-s']: KDE_dataset_stats(yy, xx, libID=x) else: print '\t'.join([x,xx]) elif kde_type == 4: for libID,filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) if args['-n']: print len(KDE_bylib.keys()) sys.exit() for x,y in KDE_bylib.items(): if args['-s']: KDE_dataset_stats(y, x) else: print x else: raise TypeError, 'KDE object type not recognized'
def main(args=None): # loading taxa names taxa = load_taxa(args['<taxa>']) # loading KDEs KDEs = Utils.load_kde(args['<kde>']) # parsing KDEs kde_type = Utils.KDE_type(KDEs) # parsing KDE if kde_type == 1: KDEs_p = [[t, k] for t, k in KDEs if t in taxa] elif kde_type == 2: KDEs_p = {t: k for t, k in KDEs.items() if t in taxa} elif kde_type == 3: KDEs_p = {} for libID, v in KDEs_p.items(): KDEs_pp = {t: k for t, k in v.items() if t in taxa} KDEs_p[libID] = KDEs_pp KDEs_pp = None elif kde_type == 4: KDEs_p = {} for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) KDE_bylib = {t: k for t, k in KDE_bylib.items() if t in taxa} KDEs_p[libID] = KDE_bylib KDE_bylib = None else: raise TypeError, 'KDE object type not recognized' # writing dill.dump(KDEs_p, sys.stdout)
def main(args=None): KDEs = Utils.load_kde(args['<kde>']) # KDE object type kde_type = Utils.KDE_type(KDEs) # info for KDEs print '\t'.join(['libID', 'taxon', 'bandwidth']) if kde_type == 1: for t, k in KDEs: print '\t'.join(['1', t, kde_factor(k)]) elif kde_type == 2: for t, k in KDEs.items(): print '\t'.join(['1', t, kde_factor(k)]) elif kde_type == 3: for libID, x in KDEs.items(): for t, k in x.items(): print '\t'.join([libID, t, kde_factor(k)]) elif kde_type == 4: for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) for t, k in KDE_bylib.items(): print '\t'.join([libID, t, kde_factor(k)]) else: raise TypeError, 'KDE object type not recognized'
def main(args=None): # input KDEs = Utils.load_kde(args['<kde>']) n = int(args['-n']) # KDE object type kde_type = Utils.KDE_type(KDEs) # sampling from KDEs vals = {} if kde_type == 1: vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs \ if kde is not None} elif kde_type == 2: vals['1'] = {taxon:kde.resample(n)[0,] for taxon,kde in KDEs.items() \ if kde is not None} elif kde_type == 3: for x, y in KDEs.items(): vals[x] = {taxon:kde.resample(n)[0,] for taxon,kde in y.items() \ if kde is not None} elif kde_type == 4: for libID, filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) vals[libID] = { taxon: kde.resample(n)[0, ] for taxon, kde in KDE_bylib.items() if kde is not None } else: raise TypeError, 'KDE object type not recognized' # writing out results tbl = pd.concat([to_df(y, x) for x, y in vals.items()]) tbl.to_csv(sys.stdout, sep='\t', index=False)
def get_taxa(KDEs, kde_type, all_taxa=None): """Getting taxa names from KDE object. """ # parsing KDE if kde_type == 1: if all_taxa is not None: taxa = [k[0] for k in KDEs] else: taxa = [k[0] for k in KDEs if k[1] is not None] elif kde_type == 2: if all_taxa is not None: taxa = KDEs.keys() else: taxa = [k for k,v in KDEs.items() if v is not None] elif kde_type == 3: taxa = [] for libID,v in KDEs.items(): if all_taxa is not None: taxa += v.keys() else: taxa += [k for k,vv in v.items() if v is not None] taxa = list(set(taxa)) elif kde_type == 4: taxa = [] for libID,filename in KDEs.items(): KDE_bylib = Utils.load_kde(filename) if all_taxa is not None: taxa += KDE_bylib.keys() else: taxa += [k for k,v in KDE_bylib.items() if v is not None] else: raise TypeError, 'KDE object type not recognized' return taxa
def test_kde(): f = os.path.join(data_dir, 'ampFrag_skewN90-25-n5-nS_kde.pkl') # load kde kde = Utils.load_kde(f) assert isinstance(kde, dict) # check type kde_type = Utils.KDE_type(kde) assert kde_type == 2
def main(args=None): # subsample with replacement arg #if args['-r'] is not None: # args['-r'] = True #else: # args['-r'] = False # load KDEs object sys.stderr.write('Loading KDEs...\n') KDEs = Utils.load_kde(args['<kde>']) # KDE type kde_type = Utils.KDE_type(KDEs) # all taxa names in KDEs taxa = get_taxa(KDEs, kde_type, args['-a']) ntaxa = len(taxa) # subsampling (if needed) ## number to subsample nsub = None if args['-s'] is not None: nsub = int(args['-s']) elif args['-f'] is not None: nsub = float(args['-f']) * ntaxa nsub = int(nsub) elif args['-p'] is not None: nsub = float(args['-p']) / 100 * ntaxa nsub = int(nsub) ## subsampling if nsub is not None: if nsub > ntaxa: args['-r'] = True msg = 'WARNING: nsub > ntaxa, sub-sampling with replacement!' sys.stderr.write(msg + '\n') taxa = np.random.choice(taxa, size=nsub, replace=args['-r']) msg = 'Subsampled {} taxa' sys.stderr.write(msg.format(nsub) + '\n') # writing to STDOUT for x in taxa: print x