def main(): args = get_args() dna = util.Table(args.input_dna) rna = util.Table(args.input_rna) method = {"laplace": laplace, "witten_bell": witten_bell}[args.method] assert dna.is_stratified == rna.is_stratified, \ "FAILED: Tables have nonequal stratification status." strat_mode = dna.is_stratified all_features = sorted(set(dna.rowheads).__or__(set(rna.rowheads))) for t in dna, rna: if strat_mode: remove_totals(t) all_features = [k for k in all_features if util.c_strat_delim in k] method(t, all_features) if strat_mode: hsum(t) # write out dna/rna dna.write(args.output_basename + c_new_dna_extension, unfloat=True) rna.write(args.output_basename + c_new_rna_extension, unfloat=True) # normalize rna by dna (account for seq depth [scale]), then write scale = [d / r for r, d in zip(rna.colsums, dna.colsums)] for i in range(len(dna.data)): rna.data[i] = [ s * r / d for s, r, d in zip(scale, rna.data[i], dna.data[i]) ] if args.log_transform: divisor = log(args.log_base) rna.data[i] = list(map(lambda x: log(x) / divisor, rna.data[i])) rna.write(args.output_basename + c_norm_rna_extension, unfloat=True)
def main( ): args = get_args() # load the table; find unique feature ids (no names) table = util.Table( args.input ) features = {k for k in table.rowheads} features = {k.split( util.c_strat_delim )[0] for k in features} features = {k.split( util.c_name_delim )[0] for k in features} # decide what grouping file to load and how if args.custom is not None: print( "Loading custom groups file: {}".format( args.custom ), file=sys.stderr ) p_groups, start, skip = args.custom, 0, [] elif args.groups is not None: p_groups, start, skip = c_default_groups[args.groups] else: sys.exit( "Must specify either 1) built-in groups option [--groups] or 2) custom groups file [--custom]" ) # load the grouping file map_group_features = util.load_polymap( p_groups, start=start, skip=skip, allowed_values=features ) # coerce to features-first format (unless explicitly reversed) if not args.reversed: map_feature_groups = {} for group, fdict in map_group_features.items(): for feature in fdict: map_feature_groups.setdefault( feature, {} )[group] = 1 else: map_feature_groups = map_group_features # add protected cases to mapping? if args.protected == "Y": for feature in c_protected: map_feature_groups.setdefault( feature, {} )[feature] = 1 # perform the table regrouping regroup( table, map_feature_groups, args.function, args.precision, ungrouped=args.ungrouped=="Y" ) table.write( args.output )
def main(): args = get_args() table = util.Table(args.input) partitions = partition_table( table, args.critical_mean, args.critical_count, args.pinterval, ) for name, partition in partitions.items(): if len( partition.get_cols() ) >= args.critical_samples and \ len( partition.get_rows() ) >= 1 and \ ( args.limit is None or args.limit in name ): write_partition(table, partition, name + c_strain_profile_extension)
def main(): args = get_args() table = util.Table(args.input) normalize( table, cpm=args.units == "cpm", levelwise=args.mode == "levelwise", special=args.special == "y", ) if args.update_snames: for i, colhead in enumerate(table.colheads): if re.search(c_default_suffix + "$", colhead): table.colheads[i] = re.sub(c_default_suffix + "$", "-" + args.units.upper(), colhead) else: table.colheads[i] += "-" + args.units.upper() table.write(args.output)
def main(): args = get_args() table = util.Table(args.input) allowed_keys = {k.split(util.c_strat_delim)[0]: 1 for k in table.rowheads} if args.custom is not None: polymap = util.load_polymap(args.custom, allowed_keys=allowed_keys) elif args.names is not None: polymap = util.load_polymap(c_default_names[args.names].path, allowed_keys=allowed_keys) else: sys.exit("Must (i) choose names option or (ii) provide names file") if args.simplify: for c, ndict in polymap.items(): ndict = {re.sub("[^A-Za-z0-9]+", "_", n): 1 for n in ndict} polymap[c] = ndict rename(table, polymap) table.write(args.output)
def main(): args = get_args() T = util.Table(args.input) # build the taxmap (uniref -> taxon mapping) print("Building taxonomic map for input table", file=sys.stderr) if args.devdb is not None: p_datafile = args.devdb elif args.database in databases: p_datafile = databases[args.database] else: sys.exit( "Must specify a valid database (from utility mapping or --devdb)") taxmap = build_taxmap(T.rowheads, args.level, args.lca_choice, p_datafile) # refine the taxmap (remove rare taxa) counts = Counter(taxmap.values()) total = float(sum(counts.values())) counts = {k: v / total for k, v in counts.items()} taxmap = { old: new for old, new in taxmap.items() if counts[new] >= args.threshold } # reindex the table (which rows to keep, which rows to pseudo-stratify) print("Reindexing the input table", file=sys.stderr) ticker = util.Ticker(T.rowheads) index = {} for i, rowhead in enumerate(T.rowheads): ticker.tick() feature, name, stratum = util.fsplit(rowhead) new_rowhead = tax_connect(rowhead, taxmap) # unmapped is never stratified if feature == util.c_unmapped: index.setdefault(rowhead, []).append(i) # outside of unclassified mode, keep totals elif stratum is None and args.mode != c_umode: index.setdefault(rowhead, []).append(i) # in totals mode, guess at taxonomy from uniref name if args.mode == c_tmode: index.setdefault(new_rowhead, []).append(i) # in unclassified mode, make a new row for the total... elif stratum == c_unclassified and args.mode == c_umode: index.setdefault(util.fjoin(feature, name, None), []).append(i) # ...then replace "unclassified" with inferred taxonomy index.setdefault(new_rowhead, []).append(i) # update strata in stratified mode elif stratum is not None and args.mode == c_smode: index.setdefault(new_rowhead, []).append(i) # rebuild the table print("Rebuilding the input table", file=sys.stderr) rowheads2, data2 = [], [] ticker = util.Ticker(index) for rowhead in util.fsort(index): ticker.tick() rowheads2.append(rowhead) newrow = [0 for k in T.colheads] for i in index[rowhead]: oldrow = map(float, T.data[i]) newrow = [a + b for a, b in zip(newrow, oldrow)] data2.append(newrow) T.rowheads = rowheads2 T.data = data2 print("Writing new table", file=sys.stderr) T.write(args.output, unfloat=True) # report on performance success, total = 0, 0 for rowhead in T.rowheads: feature, name, stratum = util.fsplit(rowhead) if stratum is not None: total += 1 if stratum != c_unclassified: success += 1 print( "Summary: Of {TOTAL} stratifications, {SUCCESS} mapped at {TARGET} level ({PERCENT}%)" .format( TOTAL=total, SUCCESS=success, TARGET=args.level, PERCENT=round(100 * success / float(total), 1), ), file=sys.stderr, )
def main(): args = get_args() tbl = util.Table(args.input) # build the taxmap print("Building taxonomic map for input table", file=sys.stderr) p_datafile = args.dev if args.dev is not None else databases[ args.resolution] taxmap = build_taxmap(tbl.rowheads, args.level, p_datafile) # refine the taxmap counts = {} for old, new in taxmap.items(): counts[new] = counts.get(new, 0) + 1 total = float(sum(counts.values())) count = {k: v / total for k, v in counts.items()} taxmap = { old: new for old, new in taxmap.items() if count[new] >= args.threshold } # reindex the table print("Reindexing the input table", file=sys.stderr) ticker = util.Ticker(tbl.rowheads) index = {} for i, rowhead in enumerate(tbl.rowheads): ticker.tick() feature, name, stratum = util.fsplit(rowhead) new_rowhead = tax_connect(rowhead, taxmap) # unmapped is never stratified if feature == util.c_unmapped: index.setdefault(rowhead, []).append(i) # outside of unclassified mode, keep totals elif stratum is None and args.mode != c_umode: index.setdefault(rowhead, []).append(i) # in totals mode, guess at taxonomy from uniref name if args.mode == c_tmode: index.setdefault(new_rowhead, []).append(i) elif stratum == c_unclassified and args.mode == c_umode: # in unclassified mode, make a new row for the total... index.setdefault(util.fjoin(feature, name, None), []).append(i) # ...then replace "unclassified" with inferred taxonomy index.setdefault(new_rowhead, []).append(i) elif stratum is not None and args.mode == c_smode: index.setdefault(new_rowhead, []).append(i) # rebuild the table print("Rebuilding the input table", file=sys.stderr) rowheads2, data2 = [], [] ticker = util.Ticker(index) for rowhead in util.fsort(index): ticker.tick() rowheads2.append(rowhead) newrow = [0 for k in tbl.colheads] for i in index[rowhead]: oldrow = map(float, tbl.data[i]) newrow = [a + b for a, b in zip(newrow, oldrow)] data2.append(newrow) tbl.rowheads = rowheads2 tbl.data = data2 # output print("Writing new table", file=sys.stderr) tbl.write(args.output, unfloat=True) # report on performance success, total = 0, 0 for rowhead in tbl.rowheads: feature, name, stratum = util.fsplit(rowhead) if stratum is not None: total += 1 if stratum != c_unclassified: success += 1 print( "Summary: Of {TOTAL} stratifications, {SUCCESS} mapped at {TARGET} level ({PERCENT}%)" .format( TOTAL=total, SUCCESS=success, TARGET=args.level, PERCENT=round(100 * success / float(total), 1), ), file=sys.stderr, )