def run_edgeR(gene_expression, bio_assignment, gene_names, batch_info=None, batch=True): if batch_info is None: batch = False r_counts = conversion_pydataframe(gene_expression) r_bio_group = conversion_pydataframe(bio_assignment) r_dge = r.DGEList(counts=r.t(r_counts), genes=gene_names) r.assign("dge", r_dge) r.assign("bio_group", r.factor(r_bio_group)) r("dge$samples$bio_group <- bio_group") if batch: r_batch_group = conversion_pydataframe(batch_info) r.assign("batch_group", r.factor(r_batch_group)) r("dge$samples$batch_group <- batch_group") r("""dge <- suppressWarnings(edgeR::calcNormFactors(dge))""") if not batch: r("""design <- model.matrix(~bio_group, data = dge$samples)""") r("""colnames(design) <- c("Intercept", "bio")""") if batch: r("""design <- model.matrix(~bio_group+batch_group, data = dge$samples)""") r("""colnames(design) <- c("Intercept", "bio", "batch")""") r("""dge <- estimateDisp(dge, design)""") r("""fit <- glmFit(dge, design)""") if not batch: r("""lrt <- glmLRT(fit)""") if batch: r("""lrt <- glmLRT(fit, coef="bio")""") return r("lrt$table$PValue")
def ancova(lm1, lm2, names=('lm1', 'lm2')): """ Compares the slopes and intercepts of two linear models. Currently this is quite limited in that it only compares single-variable linear models that have `x` and `y` attributes. Returns (pval of slope difference, pval of intercept difference). Recall that if the slope is significant, you can't really say anything about the intercept. """ # R code, from the extremely useful blog: # http://r-eco-evo.blogspot.com/2011/08/ # comparing-two-regression-slopes-by.html # # model1 = aov(y~x*factor, data=df) # (interaction term on summary(model1)'s 3rd table line) # # model2 = aov(y~x+factor, data=df) # (2nd table line for "factor" in summary(model2) is the sig of intercept # diff) # # anova(model1, model2) # does removing the interaction term affect the model fit? # Construct variables suitable for ANOVA/ANCOVA label1 = [names[0] for i in lm1.x] label2 = [names[1] for i in lm2.x] labels = r.factor(np.array(label1 + label2)) xi = np.concatenate((lm1.x, lm2.x)) yi = np.concatenate((lm1.y, lm2.y)) # The workflow is to populate the formula as a separate environment. # This first formula includes the interaction term fmla1 = robjects.Formula('yi~xi*labels') fmla1.environment['xi'] = xi fmla1.environment['yi'] = yi fmla1.environment['labels'] = labels result1 = r('aov(%s)' % fmla1.r_repr()) interaction_pval = r.summary(result1)[0].rx2('Pr(>F)')[2] # No interaction term fmla2 = robjects.Formula('yi~xi+labels') fmla2.environment['xi'] = xi fmla2.environment['yi'] = yi fmla2.environment['labels'] = labels result2 = r('aov(%s)' % fmla2.r_repr()) intercept_pval = r.summary(result2)[0].rx2('Pr(>F)')[1] # TODO: anova(result1, result2)? return interaction_pval, intercept_pval
rownames=['true'], colnames=['predicted']) # <headingcell level=4> # Using non-base packages in Rpy2 # <codecell> import rpy2.robjects as robjects from rpy2.robjects.packages import importr r = robjects.r e1071 = importr('e1071') Yr = np2r(iris['Type']) Yr = r.factor(Yr) svm = e1071.svm(Xr, Yr) yhat = r.predict(svm, Xr) print r.table(yhat, Yr) # <headingcell level=4> # ggplot2 in python with Rpy2 # <markdowncell> # Thanks to [Fei Yu](http://www.thefeiyu.com/) for this vignette. # <codecell> import rpy2.robjects as robjects
print pd.crosstab(iris['Type'], yhat_hclust, rownames=['true'], colnames=['predicted']) # <headingcell level=4> # Using non-base packages in Rpy2 # <codecell> import rpy2.robjects as robjects from rpy2.robjects.packages import importr r = robjects.r e1071 = importr('e1071') Yr = np2r(iris['Type']) Yr = r.factor(Yr) svm = e1071.svm(Xr, Yr) yhat = r.predict(svm, Xr) print r.table(yhat, Yr) # <headingcell level=4> # ggplot2 in python with Rpy2 # <markdowncell> # Thanks to [Fei Yu](http://www.thefeiyu.com/) for this vignette. # <codecell> import rpy2.robjects as robjects