def anova2(values, factor1, factor2, factor1name="factor1", factor2name="factor2", interaction=True): """ python wrapper for a two-way anova in R with optional interaction term ( default=True ) """ # build a dataframe for R dataframe = {} dataframe["feature"] = values dataframe["factor1"] = factor1 dataframe["factor2"] = factor2 r.assign("df", dataframe) r("df$factor1 <- factor( df$factor1 )") r("df$factor2 <- factor( df$factor2 )") # run the model results = r("anova( lm( df$feature ~ df$factor1 %s df$factor2 ) )" % ("*" if interaction else "+")) r("rm( list=ls() )") # convert R results to table colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"] rowheads = [factor1name, factor2name] rowheads += ["int term", "error"] if interaction else ["error"] ndictData = {} for rowhead in results.keys(): for index, name in zip(range(len(rowheads)), rowheads): dictName = ndictData.setdefault(name, {}) dictName[rowhead] = results[rowhead][index] # return as zopy table return nesteddict2table(ndictData, rowheads, colheads)
def write_abunds(values, depths, metadata, path, groups=False): T = nesteddict2table(values) T.apply_rowheads(rename) if groups: T.groupby(lambda x: x.split("_")[1], sum) T.rowsort() T.data[0][0] = "#" T.normalize_columns() # colname -> sample for colname, col in T.iter_cols(): # fixes: multinomial( ) sensitive to sum( col ) rounding to 1 + eps col[-1] = 1 - sum(col[:-1]) counts = multinomial(depths[colname], col) for i, c in enumerate(counts): T.set(i + 1, colname, int(c)) M = nesteddict2table({"Phenotype": metadata, "SeqDepth": depths}) T.metamerge(M) T.dump(path) return None
def main(): args = get_args() data = read_stream(sys.stdin, stratified=args.stratified) for stratum in sorted(data): data[stratum] = row_stats(data[stratum], engin=args.engin) tdata = nesteddict2table(data, aColheads=c_props, origin="STAT \ LEVEL") # counter intuitive, but "transposed" is opposite expectation here if not args.transpose: tdata.transpose() if args.excel: excel(tdata.data) else: tdata.dump()
def anova(values, factor1, factor1name="factor1"): """ python wrapper for a one-way ANOVA in R """ # build a dataframe for R dataframe = {} dataframe["feature"] = values dataframe["factor1"] = factor1 r.assign("df", dataframe) r("df$factor1 <- factor( df$factor1 )") # run the model results = r("anova( lm( df$feature ~ df$factor1 ) )") r("rm( list=ls() )") # convert R results to table colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"] rowheads = [factor1name, "error"] ndictData = {} for rowhead in results.keys(): for index, name in zip(range(len(rowheads)), rowheads): dictName = ndictData.setdefault(name, {}) dictName[rowhead] = results[rowhead][index] # return as zopy table return nesteddict2table(ndictData, rowheads, colheads)
sizes = {} command = "ls -l " + " ".join( sys.argv[1:] ) cmd = subprocess.Popen( command, shell=True, stdout=subprocess.PIPE ) for line in cmd.stdout: items = line.split( ) if len( items ) >= 8: size = int( items[4] ) items = os.path.split( items[8] )[1].split( "." ) group = "--" if len( items ) > 1: group = "." + ".".join( items[1:] ) sizes.setdefault( "[ALL]", [] ).append( size ) sizes.setdefault( group, [] ).append( size ) else: print >>sys.stderr, "Bad line:", line def prettysize( size ): for val, txt in zip( [12, 9, 6, 3, 0], "TGMKB" ): if size / 10**val > 1: return "%.1f%s" % ( round( size / 10**val, 1 ), txt ) data = {} for group, values in sizes.items( ): inner = data.setdefault( group, {} ) inner["count"] = len( values ) inner["total"] = prettysize( sum( values ) ) inner["mean"] = prettysize( mean( values ) ) T = nesteddict2table( data ) T.data[0][0] = "#" T.dump( )
if re.search(args.key_pattern, astrRow[0]) ] for strFeature, strValue in aastrData: if strFeature not in dictFeatureIndex: dictFeatureIndex[strFeature] = len(dictFeatureIndex) + 1 dictTableData[strColhead] = { strKey: strValue for [strKey, strValue] in aastrData } # --------------------------------------------------------------- # coerce to table # --------------------------------------------------------------- # not ideal kwargs = {"empty": args.fill_empty} if args.fill_empty is not None else {} # try to maintain original ordering (modified 4/2/2015) astrFeatures = sorted(dictFeatureIndex.keys(), key=lambda x: dictFeatureIndex[x]) tableData = nesteddict2table(dictTableData, aColheads=astrFeatures, **kwargs) tableData.rowsort() tableData.transpose() # not ideal if args.output is not None: tableData.dump(args.output) else: tableData.dump()
if len( args.tables ) == 1: t = table( p ) elif args.legacy: t = table( p ) for p2 in args.tables[1:]: t2 = table( p2 ) t.merge( t2 ) else: data = {} for p in args.tables: d = table( p ).table2nesteddict( ) for r in d: inner = data.setdefault( r, {} ) for c in d[r]: if c in inner and inner[c] != d[r][c]: warn( p, "overwrites", r, c, inner[c], "with", d[r][c] ) inner[c] = d[r][c] t = nesteddict2table( data, empty=c_strNA ) if args.metatable is not None: t.metamerge( table( args.metatable ) ) if args.fill_empty is not None: t.apply_entries( lambda x: x if x != c_strNA else args.fill_empty ) # --------------------------------------------------------------- # dump table # --------------------------------------------------------------- t.dump( )
# check for homogeneity stats = {} stats["N"] = len(x) if len(set(x)) < 2 or len(set(y)) < 2: for s in order[1:]: stats[s] = "#N/A" else: stats["r"], stats["r_p"] = pearsonr(x, y) stats["r2"] = stats["r"]**2 stats["rho"], stats["rho_p"] = spearmanr(x, y) stats["rho2"] = stats["rho"]**2 stats["NMI"] = mutinfo(quantform(x), quantform(y), normalized=True) stats["BC"] = bc(x, y) stats["BC_norm"] = bc(x, y, norm=True) slope, y_int = polyfit(x, y, 1) stats["slope"] = slope stats["y-int"] = y_int stats["1/slope"] = 1 / slope stats["x-int"] = -y_int / slope stats = {k: pretty(v) for k, v in stats.items()} return stats for stratum in sorted(data): data[stratum] = row_stats(data[stratum]) tdata = nesteddict2table(data, aColheads=list(order), origin="STAT \ LEVEL") tdata.transpose() tdata.dump() if bad > 0: print >> sys.stderr, "%% bad rows: %.2f" % (100 * bad / float(total))
parser.add_argument('-i', '--input', nargs="+", help='One or more MetaPhlAn clade profiles') parser.add_argument('-o', '--output', help='Marker PCL file') parser.add_argument('-e', '--headers', action="store_true", help='File has headers') parser.add_argument('-g', '--grep', default=None, help='grep on clades') parser.add_argument('-x', '--extension_groups', default=1, type=int, help='.txt is 1, .cp.txt is 2, etc.') args = parser.parse_args() # load everything as nested dict [sample][marker]=value nesteddictData = {} for i, path in enumerate(args.input): print >> sys.stderr, "loading", i + 1, "of", len(args.input) name = path2name(path, args.extension_groups) nesteddictData[name] = funcLoadCladeProfile(path, grep=args.grep, headers=args.headers) # convert to a table, substituting 0 for missing values tableData = nesteddict2table(nesteddictData, empty=0) # transpose to get markers on the rows, unfloat, save as pcl tableData.transpose() tableData.unfloat() tableData.dump(args.output)