def fit_clv(observations, time_points, held_out_rel_abun, held_out_time_points, denom, method): print("Estimating cLV parameters using", method) rel_abun = estimate_relative_abundances(observations) clv = CompositionalLotkaVolterra(rel_abun, time_points, denom=denom) if method == "Elastic Net": clv.train() predictions = [ clv.predict(o[0], tpts) for (o, tpts) in zip(held_out_rel_abun, held_out_time_points) ] A, g, B = clv.get_params() return A, g, predictions elif method == "Ridge": clv.train_ridge() A, g, B = clv.get_params() predictions = [ clv.predict(o[0], tpts) for (o, tpts) in zip(held_out_rel_abun, held_out_time_points) ] return A, g, predictions else: print("bad optimization method for cLV", file=sys.stderr) return
def fit_clv(observations, time_points, effects, held_out_observations, held_out_time_points, held_out_effects, using_rel_abun=False, ret_params=False, folds=None): # if observations are concentrations rel_abun = [] held_out_rel_abun = [] if folds is None: folds = len(observations) if not using_rel_abun: for obs in observations: rel_abun.append(obs / obs.sum(axis=1,keepdims=True)) for obs in held_out_observations: held_out_rel_abun.append(obs / obs.sum(axis=1,keepdims=True)) else: rel_abun = observations held_out_rel_abun = held_out_observations clv = CompositionalLotkaVolterra(rel_abun, time_points, effects) clv.train(folds=folds) predictions = [clv.predict(o[0],tpts,e) for (o,tpts,e) in zip(held_out_rel_abun, held_out_time_points, held_out_effects)] return predictions
# estimated previously r_A = 1 r_g = 4 r_B = 0.5 P = [] Y_pc = [] log_Y = [] for y in Y: mass = y.sum(axis=1) p = y / y.sum(axis=1, keepdims=True) p = (p + 1e-5) / (p + 1e-5).sum(axis=1, keepdims=True) P.append(p) Y_pc.append((mass.T * p.T).T) log_Y.append(np.log(mass.T * p.T).T) clv = CompositionalLotkaVolterra(P, T, U, pseudo_count=1e-5) clv.r_A = r_A clv.r_g = r_g clv.r_B = r_B clv.train_ridge() A_clv, g_clv, B_clv = clv.get_params() A_glv, g_glv, B_glv = ridge_regression_glv(log_Y, U, T, clv.r_A, clv.r_g, clv.r_B) A_rel, g_rel, B_rel = compute_relative_parameters(A_glv, g_glv, B_glv, clv.denom) plot_corr(A_rel, g_rel, B_rel, A_clv, g_clv, B_clv, "plots/stein_correlation.pdf")
U = pkl.load(open("data/stein/U.pkl", "rb")) T = pkl.load(open("data/stein/T.pkl", "rb")) col_names = np.array(['und. Enterobacteriaceae', 'Blautia', 'Barnesiella', 'und. uncl. Mollicutes', 'und. Lachnospiraceae', 'Akkermansia', 'C. difficile', 'uncl. Lachnospiraceae', 'Coprobacillus', 'Enterococcus', 'Other']) P = [] held_out_rel_abun = [] for y in Y: P.append(y / y.sum(axis=1,keepdims=True)) clv = CompositionalLotkaVolterra(P, T, U) A = np.loadtxt("pub-results/stein_A") g = np.loadtxt("pub-results/stein_g") B = np.loadtxt("pub-results/stein_B") B = np.expand_dims(B, axis=1) glv = GeneralizedLotkaVolterra(P, T, U) A_glv = np.loadtxt("pub-results/stein_A_glv") g_glv = np.loadtxt("pub-results/stein_g_glv") B_glv = np.loadtxt("pub-results/stein_B_glv") B_glv = np.expand_dims(B_glv, axis=1) ntaxa = Y[0].shape[1] old_denom = clv.denom taxon_row_names = col_names[np.array([i for i in range(ntaxa) if i != old_denom])]