def __init__(self, x, y, **kwargs): self.nvar = 1 if x.ndim==1 else x.shape[1] assert (x.ndim==1 and x.size==y.size) or (x.ndim==2 and x.shape[0]==y.size), "X and Y inputs must have same number of rows" assert (self.nvar < 5), "Maximum number of predictors is 4" df = with_mode(NO_CONVERSION, r.data_frame)(x=x,y=y.flatten()) if x.ndim==1: model = r("y ~ x") else: model = r("y ~ " + ' + '.join('x.%d' % (i+1) for i in range(4))) self.smoother = with_mode(NO_CONVERSION, r.loess)(model, data=df, **kwargs)
def __init__(self, x, y, **kwargs): self.nvar = 1 if x.ndim == 1 else x.shape[1] assert (x.ndim == 1 and x.size == y.size) or ( x.ndim == 2 and x.shape[0] == y.size), "X and Y inputs must have same number of rows" assert (self.nvar < 5), "Maximum number of predictors is 4" df = with_mode(NO_CONVERSION, r.data_frame)(x=x, y=y.flatten()) if x.ndim == 1: model = r("y ~ x") else: model = r("y ~ " + ' + '.join('x.%d' % (i + 1) for i in range(4))) self.smoother = with_mode(NO_CONVERSION, r.loess)(model, data=df, **kwargs)
def wilcox_test(x,y=None,**kwargs): """ Paired or unpaired wilcoxon rank sum (signed rank) test Returns p value, and V/W statistic Optional arguments: paired - set to true for paired test mu - value for null hypothesis (default 0.0) alternative - 'two.sided' (default), 'less', 'greater' """ if y==None: wc = with_mode(NO_CONVERSION, r.wilcox_test)(x, **kwargs) else: wc = with_mode(NO_CONVERSION, r.wilcox_test)(x,y, **kwargs) return r['$'](wc, 'p.value'), r['$'](wc, 'statistic')
def wilcox_test(x, y=None, **kwargs): """ Paired or unpaired wilcoxon rank sum (signed rank) test Returns p value, and V/W statistic Optional arguments: paired - set to true for paired test mu - value for null hypothesis (default 0.0) alternative - 'two.sided' (default), 'less', 'greater' """ if y == None: wc = with_mode(NO_CONVERSION, r.wilcox_test)(x, **kwargs) else: wc = with_mode(NO_CONVERSION, r.wilcox_test)(x, y, **kwargs) return r['$'](wc, 'p.value'), r['$'](wc, 'statistic')
def research_pair( analysis, categs, pair, max_separation = 45 ): """Look for infomation about a particular pair analysis: The analysis file or object categs: Go categories R object pair: The definition of the pair """ import rpy in_analysis = find_pair_in_analysis( analysis, pair, max_separation ) genes = [ s.split()[0] for s in in_analysis ] print '%s is in:\n%s' % ( str(pair), "\n".join( genes ) ) result = rpy.with_mode( rpy.NO_CONVERSION, rpy.r.analyseGoAnnotations )( categs, genes )
def mask_array_construct(self): data = rpy.with_mode(0, rpy.r.read_table)(self.dataset_source, row_names=1) ''' !Important! if the dataset_source has too few data, conversion from R to python will be a problem. The whole data matrix will be converted to a python string matrix. R's NA is not converted to nan in python. The problem has been found. r.as_matrix converts small dataset to character type. r.matrix won't rig the class type, but it rigs the structure. The only to sovle this is add a colClasses vector to r.read_table. such as: colClasses=c('character',rep('double',11)) But you have to know the no_of_cols in advance. As our dataset is really big, this problem hasn't appeared. ''' #print r.as_matrix(data) array = ma.masked_inside(rpy.r.as_matrix(data), -1.0e20, 1.0e20) #all are set to be masked except nan. weird! So have to do a converse. self.mask_array = ma.array(array, mask=ma.logical_not(ma.getmask(array))) self.genelabels = rpy.r.rownames(data) self.no_of_genes = len(self.genelabels) self.no_of_cols = len(array[0]) self.mask_matrix=ma.identity(self.no_of_cols) del array ,data
def categorise_genes( mart, genes ): """Annotate/categorise genes according to GO""" import rpy return rpy.with_mode( rpy.NO_CONVERSION, rpy.r.categoriseGenes )( genes, mart )
def funcion(dato, variable, caso, opciones): """Funcion que convierte los datos de entrada en los resultados""" import rpy #pylint: disable=import-error diccionario = {} r_data = {"Variable":[], "Factor":[]} for x in dato.query(variable, caso = caso): r_data["Variable"].append(float(x)) for x in dato.query(opciones["Factor"], caso = caso): r_data["Factor"].append(repr(x)) # lista=[float(x) for x in dato.getCol(variable,caso=caso)] # agrupacion=[x for x in dato.getCasos(opciones["Factor"])] # agrupacion2=[x for x in dato.getCol(opciones["Factor"],caso=caso)] # mifuncion=lambda f:agrupacion.index(f) # agrupacionfinal=map(mifuncion,agrupacion2) r_data_table = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.data_frame)(r_data) modelo = rpy.r("Variable ~ Factor") aov = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.aov)(modelo, r_data_table) diccionario = rpy.r.summary(aov) return diccionario
def get_mart( dataset = "mmusculus_gene_ensembl" ): """Gets ensembl mart for dataset""" if not dataset in _marts: _marts[ dataset ] = rpy.with_mode( rpy.NO_CONVERSION, rpy.r.useMart )( "ensembl", dataset = dataset ) return _marts[ dataset ]
def r_from_str(s): "Returns an R object in a representation as a list of strings." from rpy import r, with_mode, NO_CONVERSION from tempfile import mktemp tmpfile = mktemp() #logging.info('Tmpfile: %s' % tmpfile) try: open(tmpfile, 'w').write(s) names = with_mode(NO_CONVERSION, lambda : r.load(file=tmpfile))() finally: if os.access(tmpfile, os.R_OK): os.remove(tmpfile)
def r_from_str(s): "Returns an R object in a representation as a list of strings." from rpy import r, with_mode, NO_CONVERSION from tempfile import mktemp tmpfile = mktemp() #logging.info('Tmpfile: %s' % tmpfile) try: open(tmpfile, 'w').write(s) names = with_mode(NO_CONVERSION, lambda: r.load(file=tmpfile))() finally: if os.access(tmpfile, os.R_OK): os.remove(tmpfile)
def research_pair(analysis, categs, pair, max_separation=45): """Look for infomation about a particular pair analysis: The analysis file or object categs: Go categories R object pair: The definition of the pair """ import rpy in_analysis = find_pair_in_analysis(analysis, pair, max_separation) genes = [s.split()[0] for s in in_analysis] print '%s is in:\n%s' % (str(pair), "\n".join(genes)) result = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.analyseGoAnnotations)(categs, genes)
def print_pairs_go_analysis( pairs, gene_universe, analysis ): """Prints the go analysis for the given pairs """ import rpy, r_go mart = r_go.get_mart( "mmusculus_gene_ensembl" ) categs = r_go.categorise_genes( mart, gene_universe ) #pair = ( 'M00349', 'M00350', True, False ) for p in pairs: pair = p.binder_pair print pair in_analysis = find_pair_in_analysis( analysis, pair, max_separation = 45 ) genes = [ seq.split(' ')[0] for seq, hits in in_analysis.iteritems() ] print len(genes) print "\n".join( genes ) result = rpy.with_mode( rpy.NO_CONVERSION, rpy.r.analyseGoAnnotations )( categs, genes ) rpy.r.printAnnotationResult( result, 10 )
def print_pairs_go_analysis(pairs, gene_universe, analysis): """Prints the go analysis for the given pairs """ import rpy, r_go mart = r_go.get_mart("mmusculus_gene_ensembl") categs = r_go.categorise_genes(mart, gene_universe) #pair = ( 'M00349', 'M00350', True, False ) for p in pairs: pair = p.binder_pair print pair in_analysis = find_pair_in_analysis(analysis, pair, max_separation=45) genes = [seq.split(' ')[0] for seq, hits in in_analysis.iteritems()] print len(genes) print "\n".join(genes) result = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.analyseGoAnnotations)(categs, genes) rpy.r.printAnnotationResult(result, 10)