Exemplo n.º 1
0
def anova2(values,
           factor1,
           factor2,
           factor1name="factor1",
           factor2name="factor2",
           interaction=True):
    """ python wrapper for a two-way anova in R with optional interaction term ( default=True ) """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    dataframe["factor2"] = factor2
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    r("df$factor2 <- factor( df$factor2 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 %s df$factor2 ) )" %
                ("*" if interaction else "+"))
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, factor2name]
    rowheads += ["int term", "error"] if interaction else ["error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Exemplo n.º 2
0
def write_abunds(values, depths, metadata, path, groups=False):
    T = nesteddict2table(values)
    T.apply_rowheads(rename)
    if groups:
        T.groupby(lambda x: x.split("_")[1], sum)
    T.rowsort()
    T.data[0][0] = "#"
    T.normalize_columns()
    # colname -> sample
    for colname, col in T.iter_cols():
        # fixes: multinomial( ) sensitive to sum( col ) rounding to 1 + eps
        col[-1] = 1 - sum(col[:-1])
        counts = multinomial(depths[colname], col)
        for i, c in enumerate(counts):
            T.set(i + 1, colname, int(c))
    M = nesteddict2table({"Phenotype": metadata, "SeqDepth": depths})
    T.metamerge(M)
    T.dump(path)
    return None
Exemplo n.º 3
0
def main():
    args = get_args()
    data = read_stream(sys.stdin, stratified=args.stratified)
    for stratum in sorted(data):
        data[stratum] = row_stats(data[stratum], engin=args.engin)
    tdata = nesteddict2table(data, aColheads=c_props, origin="STAT \ LEVEL")
    # counter intuitive, but "transposed" is opposite expectation here
    if not args.transpose:
        tdata.transpose()
    if args.excel:
        excel(tdata.data)
    else:
        tdata.dump()
Exemplo n.º 4
0
def anova(values, factor1, factor1name="factor1"):
    """ python wrapper for a one-way ANOVA in R """
    # build a dataframe for R
    dataframe = {}
    dataframe["feature"] = values
    dataframe["factor1"] = factor1
    r.assign("df", dataframe)
    r("df$factor1 <- factor( df$factor1 )")
    # run the model
    results = r("anova( lm( df$feature ~ df$factor1 ) )")
    r("rm( list=ls() )")
    # convert R results to table
    colheads = ["Df", "Sum Sq", "Mean Sq", "F value", "Pr( >F )"]
    rowheads = [factor1name, "error"]
    ndictData = {}
    for rowhead in results.keys():
        for index, name in zip(range(len(rowheads)), rowheads):
            dictName = ndictData.setdefault(name, {})
            dictName[rowhead] = results[rowhead][index]
    # return as zopy table
    return nesteddict2table(ndictData, rowheads, colheads)
Exemplo n.º 5
0
sizes = {}
command = "ls -l " + " ".join( sys.argv[1:] )
cmd = subprocess.Popen( command, shell=True, stdout=subprocess.PIPE )
for line in cmd.stdout:
    items = line.split( )
    if len( items ) >= 8:
        size = int( items[4] )
        items = os.path.split( items[8] )[1].split( "." )
        group = "--"
        if len( items ) > 1:
            group = "." + ".".join( items[1:] )
        sizes.setdefault( "[ALL]", [] ).append( size )
        sizes.setdefault( group, [] ).append( size )
    else:
        print >>sys.stderr, "Bad line:", line

def prettysize( size ):
    for val, txt in zip( [12, 9, 6, 3, 0], "TGMKB" ):
        if size / 10**val > 1:
            return "%.1f%s" % ( round( size / 10**val, 1 ), txt )

data = {}
for group, values in sizes.items( ):
    inner = data.setdefault( group, {} )
    inner["count"] = len( values )
    inner["total"] = prettysize( sum( values ) )
    inner["mean"] = prettysize( mean( values ) )
T = nesteddict2table( data )
T.data[0][0] = "#"
T.dump( )
            if re.search(args.key_pattern, astrRow[0])
        ]
    for strFeature, strValue in aastrData:
        if strFeature not in dictFeatureIndex:
            dictFeatureIndex[strFeature] = len(dictFeatureIndex) + 1
    dictTableData[strColhead] = {
        strKey: strValue
        for [strKey, strValue] in aastrData
    }

# ---------------------------------------------------------------
# coerce to table
# ---------------------------------------------------------------

# not ideal
kwargs = {"empty": args.fill_empty} if args.fill_empty is not None else {}

# try to maintain original ordering (modified 4/2/2015)
astrFeatures = sorted(dictFeatureIndex.keys(),
                      key=lambda x: dictFeatureIndex[x])

tableData = nesteddict2table(dictTableData, aColheads=astrFeatures, **kwargs)
tableData.rowsort()
tableData.transpose()

# not ideal
if args.output is not None:
    tableData.dump(args.output)
else:
    tableData.dump()
Exemplo n.º 7
0
if len( args.tables ) == 1:
    t = table( p )
elif args.legacy:
    t = table( p )
    for p2 in args.tables[1:]:
        t2 = table( p2 )
        t.merge( t2 )
else:    
    data = {}
    for p in args.tables:
        d = table( p ).table2nesteddict( )
        for r in d:
            inner = data.setdefault( r, {} )
            for c in d[r]:
                if c in inner and inner[c] != d[r][c]:
                    warn( p, "overwrites", r, c, inner[c], "with", d[r][c] )
                inner[c] = d[r][c]
    t = nesteddict2table( data, empty=c_strNA )

if args.metatable is not None:
    t.metamerge( table( args.metatable ) )
if args.fill_empty is not None:
    t.apply_entries( lambda x: x if x != c_strNA else args.fill_empty )
    
# ---------------------------------------------------------------
# dump table
# ---------------------------------------------------------------

t.dump( )
Exemplo n.º 8
0
    # check for homogeneity
    stats = {}
    stats["N"] = len(x)
    if len(set(x)) < 2 or len(set(y)) < 2:
        for s in order[1:]:
            stats[s] = "#N/A"
    else:
        stats["r"], stats["r_p"] = pearsonr(x, y)
        stats["r2"] = stats["r"]**2
        stats["rho"], stats["rho_p"] = spearmanr(x, y)
        stats["rho2"] = stats["rho"]**2
        stats["NMI"] = mutinfo(quantform(x), quantform(y), normalized=True)
        stats["BC"] = bc(x, y)
        stats["BC_norm"] = bc(x, y, norm=True)
        slope, y_int = polyfit(x, y, 1)
        stats["slope"] = slope
        stats["y-int"] = y_int
        stats["1/slope"] = 1 / slope
        stats["x-int"] = -y_int / slope
    stats = {k: pretty(v) for k, v in stats.items()}
    return stats


for stratum in sorted(data):
    data[stratum] = row_stats(data[stratum])
tdata = nesteddict2table(data, aColheads=list(order), origin="STAT \ LEVEL")
tdata.transpose()
tdata.dump()
if bad > 0:
    print >> sys.stderr, "%% bad rows: %.2f" % (100 * bad / float(total))
Exemplo n.º 9
0
    parser.add_argument('-i',
                        '--input',
                        nargs="+",
                        help='One or more MetaPhlAn clade profiles')
    parser.add_argument('-o', '--output', help='Marker PCL file')
    parser.add_argument('-e',
                        '--headers',
                        action="store_true",
                        help='File has headers')
    parser.add_argument('-g', '--grep', default=None, help='grep on clades')
    parser.add_argument('-x',
                        '--extension_groups',
                        default=1,
                        type=int,
                        help='.txt is 1, .cp.txt is 2, etc.')
    args = parser.parse_args()
    # load everything as nested dict [sample][marker]=value
    nesteddictData = {}
    for i, path in enumerate(args.input):
        print >> sys.stderr, "loading", i + 1, "of", len(args.input)
        name = path2name(path, args.extension_groups)
        nesteddictData[name] = funcLoadCladeProfile(path,
                                                    grep=args.grep,
                                                    headers=args.headers)
    # convert to a table, substituting 0 for missing values
    tableData = nesteddict2table(nesteddictData, empty=0)
    # transpose to get markers on the rows, unfloat, save as pcl
    tableData.transpose()
    tableData.unfloat()
    tableData.dump(args.output)