def clustermodel(fcovs, fmeth, model, # clustering args max_dist=200, linkage='complete', rho_min=0.32, min_clust_size=1, merge_linkage=None, max_merge_dist=0, counts=False, sep="\t", X=None, X_locs=None, X_dist=None, weights=None, outlier_sds=None, combine=False, bumping=False, betareg=False, gee_args=(), skat=False, png_path=None): # an iterable of feature objects # from here, weights are attached to the feature. feature_iter = feature_gen(fmeth, rho_min=rho_min, weights=weights) assert min_clust_size >= 1 cluster_gen = (c for c in mclust(feature_iter, max_dist=max_dist, linkage=linkage, merge_linkage=merge_linkage, max_merge_dist=max_merge_dist ) if len(c) >= min_clust_size) for res in clustermodelgen(fcovs, cluster_gen, model, sep=sep, X=X, X_locs=X_locs, X_dist=X_dist, outlier_sds=outlier_sds, combine=combine, bumping=bumping, betareg=betareg, gee_args=gee_args, skat=skat, counts=counts, png_path=png_path): yield res
methylation_file = '../../../crystal/tests/meth.txt.gz' formula = 'methylation ~ age + gender' coef = 'gender' covs = pd.read_csv(covariates_file) def feature_gen(fname): for i, d in enumerate(ts.reader(fname, header=False)): if i == 0: continue chrom, pos = d[0].split(":") yield crystal.Feature(chrom, int(pos), crystal.logit(np.array(map(float, d[1:])))) cluster_iter = mclust(feature_gen(methylation_file), max_dist=100, max_skip=0) fmt = "{chrom}\t{start}\t{end}\t{p:.4g}\t{coef:.3f}\t{n_sites:d}" print(ts.fmt2header(fmt)) for i, c in enumerate( crystal.model_clusters(cluster_iter, covs, formula, coef, model_fn=crystal.zscore_cluster, n_cpu=1)): print(fmt.format(**c)) if c['p'] < 1e-3 and abs(c['coef']) > 0.2 and c['n_sites'] > 3: crystal.plot.spaghetti_plot(c, covs) plt.savefig('/tmp/figure-1.eps') break
return r if __name__ == "__main__": def feature_gen(fname): for i, toks in enumerate(ts.reader(fname, header=False)): if i == 0: continue chrom, pos = toks[0].split(":") yield Feature(chrom, int(pos), map(float, toks[1:])) # fmt = "{chrom}\t{start}\t{end}\t{n_probes}\t{p:5g}\t{t:.4f}\t{coef:.4f}\t{var}" # print ts.fmt2header(fmt) clust_iter = (c for c in mclust(feature_gen(sys.argv[1]), max_dist=400, max_skip=1) if len(c) > 2) df = pd.read_table("meth.clin.txt") df["id"] = df["StudyID"] formula = "methylation ~ asthma + age + gender" np.random.seed(10) ntrue, nfalse = 10, 10 results = [] for fn in (bump_cluster, liptak_cluster, zscore_cluster): results.append(evaluate_method(clust_iter, df, formula, "asthma", fn, ntrue, nfalse)) formula = "methylation ~ asthma + age + gender" for fn in (gee_cluster, mixed_model_cluster): results.append(evaluate_method(clust_iter, df, formula, "asthma", fn, ntrue, nfalse))
return r if __name__ == "__main__": def feature_gen(fname): for i, toks in enumerate(ts.reader(fname, header=False)): if i == 0: continue chrom, pos = toks[0].split(":") yield Feature(chrom, int(pos), map(float, toks[1:])) #fmt = "{chrom}\t{start}\t{end}\t{n_probes}\t{p:5g}\t{t:.4f}\t{coef:.4f}\t{var}" #print ts.fmt2header(fmt) clust_iter = ( c for c in mclust(feature_gen(sys.argv[1]), max_dist=400, max_skip=1) if len(c) > 2) df = pd.read_table('meth.clin.txt') df['id'] = df['StudyID'] formula = "methylation ~ asthma + age + gender" np.random.seed(10) ntrue, nfalse = 10, 10 results = [] for fn in (bump_cluster, liptak_cluster, zscore_cluster): results.append( evaluate_method(clust_iter, df, formula, 'asthma', fn, ntrue, nfalse))
covariates_file = "../../../crystal/tests/covs.csv" methylation_file = "../../../crystal/tests/meth.txt.gz" formula = "methylation ~ age + gender" coef = "gender" covs = pd.read_csv(covariates_file) def feature_gen(fname): for i, d in enumerate(ts.reader(fname, header=False)): if i == 0: continue chrom, pos = d[0].split(":") yield crystal.Feature(chrom, int(pos), crystal.logit(np.array(map(float, d[1:])))) cluster_iter = mclust(feature_gen(methylation_file), max_dist=100, max_skip=0) fmt = "{chrom}\t{start}\t{end}\t{p:.4g}\t{coef:.3f}\t{n_sites:d}" print(ts.fmt2header(fmt)) for i, c in enumerate( crystal.model_clusters(cluster_iter, covs, formula, coef, model_fn=crystal.zscore_cluster, n_cpu=1) ): print(fmt.format(**c)) if c["p"] < 1e-3 and abs(c["coef"]) > 0.2 and c["n_sites"] > 3: crystal.plot.spaghetti_plot(c, covs) plt.savefig("/tmp/figure-1.eps") break