flux = pd.DataFrame.from_csv("../data/mmol_gCDW_h.csv")
abundance = pd.DataFrame.from_csv("../data/g_gCDW.csv")
cu = CAPACITY_USAGE(flux, abundance, shared_reactions=False)

biosyn_mean = []
ccm_mean = []
ranksum_pvalues = {}
dists = []
for c in cu.cs:
    x = cu.CU[c]
    x.replace(np.inf, np.nan, inplace=True)
    x.dropna(inplace=True)
    subsystems = pd.Series(index=x.index, data=[cu.rxns[r].subsystem for r in x.index])
    for k, v in cu.get_master_groups().iteritems():
        subsystems.replace({i: k for i in v}, inplace=True)

    s = set(subsystems.values)
    l = []
    for j, s0 in enumerate(["central metabolism", "biosynthesis"]):
        i = subsystems[subsystems == s0].index
        l.append(x[i])
        if s0 == "central metabolism":
            ccm_mean.append(x[i].median())
        if s0 == "biosynthesis":
            biosyn_mean.append(x[i].median())
        dists.append(x[i])
    ranksum_pvalues[c] = ranksums(l[0], l[1])[1]