def _remember_dict(r, robj=None): new = ro.ListVector({name:Remember._remember_item(x) for name, x in r.items()}) if robj is None: return new else: return ro.ListVector({**dict(robj.items()), **dict(new.items())})
def testNewListVector(self): vec = robjects.ListVector({'a': 1, 'b': 2}) self._testNewListVector(vec) s = (('a', 1), ('b', 2)) vec = robjects.ListVector(s) self._testNewListVector(vec) it = iter(s) vec = robjects.ListVector(s) self._testNewListVector(vec)
def viper(self, data_mx, subunit_set, subunit_tfms): from rpy2 import robjects from rpy2.robjects import r, pandas2ri from rpy2.robjects.conversion import localconverter from rpy2.robjects.packages import importr base = importr('base') try: vp = importr("viper") except: base.source("http://www.bioconductor.org/biocLite.R") biocinstaller = importr("BiocInstaller") biocinstaller.biocLite("viper") vp = importr("viper") # Conduct VIPER analysis r_networks = robjects.ListVector.from_length(len(subunit_tfms)) for i, subunit_tfm in enumerate(subunit_tfms): regulons = [] for subunit in subunit_set: tfmode = robjects.FloatVector( np.asarray(subunit_tfm[subunit]).astype(float)) tfmode.names = robjects.StrVector(subunit_set[subunit]) likelihood = robjects.FloatVector( np.repeat(1.0, len(subunit_set[subunit]))) regulon = robjects.ListVector({ 'tfmode': tfmode, 'likelihood': likelihood }) regulons.append(regulon) # Generate R regulon r_networks[i] = robjects.ListVector( zip(subunit_set.keys(), regulons)) # Generate R matrix mx_nr, mx_nc = data_mx.shape mx_vec = robjects.FloatVector(data_mx.values.transpose().reshape( (data_mx.size))) r_mx = robjects.r.matrix(mx_vec, nrow=mx_nr, ncol=mx_nc) r_mx.rownames = robjects.StrVector(data_mx.index) r_mx.colnames = robjects.StrVector(data_mx.columns) # Compute VIPER profile vpres = vp.viper(r_mx, r_networks, verbose=False, minsize=1, cores=self.threads) with localconverter(robjects.default_converter + pandas2ri.converter): vpres_df = robjects.conversion.rpy2py(vpres) pd_mx = pd.DataFrame(vpres_df, columns=vpres.colnames) pd_mx['query_id'] = vpres.rownames return (pd_mx)
def call_function(id, function, add, *args, **kwargs): ro.globalenv["args"] = ro.ListVector([["wazzup", x] for x in args]) ro.globalenv["kwargs"] = ro.ListVector([[name, x] for name, x in kwargs.items()]) ro.r("names(args) <- NULL") if add: fun = f"{id} = do.call(dynwrap::{function}, c(list({id}), args, kwargs));NULL" ro.r(fun) else: fun = f"{id} = do.call(dynwrap::{function}, c(args, kwargs))" ro.r(fun) return id
def sarima_test(steps, path): index_name, my_trend = parse_csv(path) dta = pd.DataFrame(my_trend) dta.index = index_name dta = dta.rename(columns={0: 'search'}) r_df = com.convert_to_r_dataframe(dta) y = stats.ts(r_df) order = R.IntVector((1, 1, 1)) season = R.ListVector({'order': R.IntVector((0, 1, 0)), 'period': 52}) model = stats.arima(y[-5 * 52:-steps], order=order, seasonal=season) f = forecast.forecast(model, h=steps) future = [var for var in f[3]] y_pred = np.array(future) y_true = np.array(my_trend[-steps:]) metrics_result = { 'sarima_MAE': metrics.mean_absolute_error(y_true, y_pred), 'sarima_MSE': metrics.mean_squared_error(y_true, y_pred), 'sarima_MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100 } p1 = plt.plot(my_trend[-steps:], '*-') p2 = plt.plot(future) # p1 = plt.plot(index_name,my_trend,'r-') # p2 = plt.plot(index_name_future,future,'g-') plt.ylabel('Search Intensity') plt.xlabel('Year') plt.title('Search Prediction of ' + path.split('/')[-1][:-4]) plt.legend((p1[0], p2[0]), ["Actual", "Predicted"], loc=1) plt.grid(True) # print metrics_result['sarima_MAPE'] return metrics_result['sarima_MAPE']
def get_gcindices_r(countsGeneLabels, correctBackground=False, remove_empty=True): """ >>> indices_r = get_gcindices(counts.index,correctBackground=False) # doctest: +SKIP TODO overlapping with genesets2indices_r => refactor code """ limma = importr('limma') gc = LSD.get_msigdb6() if correctBackground: gc = { gsc: { gs: [g for g in gc[gsc][gs] if g in countsGeneLabels] for gs in gc[gsc] } for gsc in gc } countsGeneLabels_r = ro.StrVector(countsGeneLabels) gc_indices_r = { gsc: limma.ids2indices(ro.ListVector(gc[gsc]), countsGeneLabels_r, remove_empty=remove_empty) for gsc in gc } return gc_indices_r
def identify(self, polarity, in_file, databases, non_empty, mzmatch_params, mzmatch_outputs): group_dict = {} for group_label, index, description, files, abspath in non_empty: group_dict[group_label] = robjects.StrVector(files) groups = robjects.ListVector(group_dict) # turns out that 'stds.xml.db' always has a value, e.g. /home/pimp/media/projects/10/analysis_38/stds_db.xml # regardless of whether the file exists or not. # this behaviour is different from the old pipeline? So here we set it to R NULL if the file doesn't exist stds_xml_file = self.get_value(mzmatch_outputs, 'stds.xml.db')[0] stds_xml_file = os.path.abspath(stds_xml_file) if not os.path.isfile(stds_xml_file): logger.info('%s is not found, setting stds.xml.db to NULL', stds_xml_file) self.set_value(mzmatch_outputs, 'stds.xml.db', robjects.r("NULL")) args = { 'in_file': in_file, 'databases': databases, 'groups': groups, 'mzmatch.outputs': mzmatch_outputs, 'mzmatch.params': mzmatch_params, 'polarity': polarity } raw_data = run_r('Pimp.identify.metabolites', **args) return raw_data
def setREnv(self, call, tsname='r_timeseries', inline=False, **kwargs): command = '{}({}'.format(call, tsname) for key, item in kwargs.items(): if isinstance(item, bool): if inline: rinline = "TRUE" if item else "FALSE" else: ro.globalenv[ key] = ro.rinterface.TRUE if item else ro.rinterface.FALSE elif isinstance(item, list) and all( isinstance(x, float) for x in item): if inline: rinline = item # This is not yet correct! else: ro.globalenv[key] = pandas2ri.FloatSexpVector(item) elif isinstance(item, list) and all( isinstance(x, int) for x in item): rinline = None ro.globalenv[key] = pandas2ri.IntSexpVector(item) elif isinstance(item, dict): rinline = None ro.globalenv[key] = ro.ListVector(item) else: try: if inline is False: ro.globalenv[key] = item else: if isinstance(item, str): rinline = '\"' + item + '\"' else: rinline = item except: logging.error('Variable {} - Traceback - {}'.format( key, self.rtracebackerror())) if inline: command += ", {}={}".format(key, rinline) else: command += ", {}={}".format(key, key) command += ")" return command
def get_deseq_result(self, contrast=None, **kwargs): ''' DESeq2: result(dds, contrast) making a dds.deseq_result pandas dataframe ''' if contrast: if len(contrast) == 3: R_contrast = robjects.vectors.StrVector(np.array(contrast)) else: if len(contrast) != 2: raise ValueError('Contrast must be length of 3 or 2') R_contrast = robjects.ListVector( {None: con for con in contrast}) logger.info('Using contrast: %s' % contrast) self.result = deseq.results(self.dds, contrast=R_contrast, **kwargs) # Robject else: self.result = deseq.results(self.dds, **kwargs) # R object self.deseq_result = to_dataframe(self.result) # R dataframe with localconverter(robjects.default_converter + pandas2ri.converter): self.deseq_result = robjects.conversion.rpy2py( self.deseq_result) ## back to pandas dataframe self.deseq_result[self.gene_column] = self.gene_id.values
def eval(self, x): x = np.array(x, ndmin=1) if "X" in self.log10: x_new = ro.ListVector({"x": ro.FloatVector(np.log10(x))}) else: x_new = ro.ListVector({"x": ro.FloatVector(x)}) y_pred = self.mgcv.predict_gam(self.spline, newdata=x_new) if "Y" in self.log10: y_pred = 10 ** np.array(y_pred) else: y_pred = np.array(y_pred) # if len(y_pred) == 1: return y_pred[0] else: return y_pred
def testReprNonVectorInList(self): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec).split('\n') self.assertEqual('[IntVector, Formula]', s[2].strip())
def sarima(steps, path): index_name, my_trend = parse_csv(path) dta = pd.DataFrame(my_trend) dta.index = index_name dta = dta.rename(columns={0: 'search'}) #dta.plot(figsize=(10,4)) #============================================================================== # check stationarity #============================================================================== #r_df = com.convert_to_r_dataframe(DataFrame(dta)) #y = stats.ts(r_df) #ad = tseries.adf_test(y, alternative="stationary", k=52) #a = ad.names[:5] #{ad.names[i]:ad[i][0] for i in xrange(len(a))} #============================================================================== # check the seasonality #============================================================================== #diff1lev = dta.diff(periods=1).dropna() #diff1lev.plot(figsize=(12,6)) #diff1lev_season = diff1lev.diff(52).dropna() #r_df = com.convert_to_r_dataframe(DataFrame(diff1lev_season)) #diff1lev_season1lev = diff1lev_season.diff().dropna() #============================================================================== # check stationarity after difference #============================================================================== #y = stats.ts(r_df) #ad = tseries.adf_test(y, alternative="stationary", k=52) #a = ad.names[:5] #{ad.names[i]:ad[i][0] for i in xrange(len(a))} #============================================================================== # plot acf and pacf #============================================================================== #fig = plt.figure(figsize=(12,8)) #ax1 = fig.add_subplot(211) #fig = sm.graphics.tsa.plot_acf(diff1lev_season1lev.values.squeeze(), lags=150, ax=ax1) #ax2 = fig.add_subplot(212) #fig = sm.graphics.tsa.plot_pacf(diff1lev_season1lev, lags=150, ax=ax2) #fig r_df = com.convert_to_r_dataframe(dta) y = stats.ts(r_df) order = R.IntVector((1, 1, 1)) season = R.ListVector({'order': R.IntVector((0, 1, 0)), 'period': 52}) a = time.time() model = stats.arima(y, order=order, seasonal=season) print time.time() - a f = forecast.forecast(model, h=steps) future = [var for var in f[3]] dt = date_range(dta.index[-1], periods=len(future) + 1, freq='W')[1:] #создаем индекс из дат pr = Series(future, index=dt) # dta.plot(figsize=(12,6)) # pr.plot(color = 'red') return index_name, dt, my_trend, future
def _run_gsea(df, genesets, method='ssgsea', verbose=False, **kwargs): rdata = r('as.matrix')(df) rgenesets = robjects.ListVector(genesets) res = r('gsva')(rdata, rgenesets, method=method, verbose=verbose, **kwargs) py_res = pandas2ri.ri2py_dataframe(res) py_res.index = r('rownames')(res) # py_res.columns = r('colnames')(res) py_res.columns = df.columns return py_res
def getKruskal(wt_rankpt_dist, mut_rankpt_dist, mut_wt_conn_dist): return robjects.r["kruskal.test"](robjects.ListVector({ 'a': robjects.FloatVector(wt_rankpt_dist), 'b': robjects.FloatVector(mut_rankpt_dist), 'c': robjects.FloatVector(mut_wt_conn_dist) }))[2][0]
def test_repr_nonvectorinlist(): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec) assert s.startswith("R object with classes: (\'RTYPES.VECSXP',) " "mapped to:\n[IntVector, Formula]")
def test_repr_nonvectorinlist(): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec) assert s.startswith("R object with classes: ('list',) mapped to:%s" "[IntSexpVector, LangSexpVector]" % os.linesep)
def predict_unstructured(self, data, **kwargs): def _r_is_character(r_val): _is_character = ro.r("is.character") return bool(_is_character(r_val)) def _r_is_raw(r_val): _is_raw = ro.r("is.raw") return bool(_is_raw(r_val)) def _r_is_null(r_val): return r_val == ro.rinterface.NULL def _cast_r_to_py(r_val): # TODO: consider checking type against rpy2 proxy object like: isinstance(list_data_kwargs, ro.vectors.ListVector) # instead of calling R interpreter if _r_is_null(r_val): return None elif _r_is_raw(r_val): return bytes(r_val) elif _r_is_character(r_val): # Any scalar value is returned from R as one element vector, # so get this value. return str(r_val[0]) else: raise DrumCommonException( "Can not convert R value {} type {}".format( r_val, type(r_val))) def _rlist_to_dict(rlist): if _r_is_null(rlist): return None return {str(k): _cast_r_to_py(v) for k, v in rlist.items()} data_binary_or_text = data if UnstructuredDtoKeys.QUERY in kwargs: kwargs[UnstructuredDtoKeys.QUERY] = ro.ListVector( kwargs[UnstructuredDtoKeys.QUERY]) # if data_binary_or_text is str it will be auto converted into R character type; # otherwise if it is bytes, manually convert it into byte vector (raw) r_data_binary_or_text = data_binary_or_text if isinstance(data_binary_or_text, bytes): r_data_binary_or_text = ro.vectors.ByteVector(data_binary_or_text) kwargs_filtered = {k: v for k, v in kwargs.items() if v is not None} list_data_kwargs = r_handler.predict_unstructured( model=self._model, data=r_data_binary_or_text, **kwargs_filtered) if isinstance(list_data_kwargs, ro.vectors.ListVector): ret = _cast_r_to_py(list_data_kwargs[0]), _rlist_to_dict( list_data_kwargs[1]) else: raise DrumCommonException( "Wrong type returned in unstructured mode: {}".format( type(list_data_kwargs))) return ret
def test_repr_nonvectorinlist(): vec = robjects.ListVector( OrderedDict(( ('a', 1), ('b', robjects.Formula('y ~ x')), ))) s = repr(vec).split(os.linesep) assert s[1].startswith("R classes: ('list',)") assert s[2].startswith("[IntSexpVector, LangSexpVector]")
def toR(something): if isinstance(something, list): if isinstance(something, float): return ri.FloatSexpVector(something) elif isinstance(something, int): return ri.IntSexpVector(something) else: return ri.StrSexpVector(something) elif isinstance(something, dict): return ro.ListVector(something) return something
def heatmap_annotation_key(self, name, colors): ''' generates data frame for color key for the annotation from a dict ''' keyColors = ro.StrVector([c for c in colors.values()]) keyColors.names = colors.keys() key = OrderedDict() key[name] = keyColors return ro.ListVector(key)
def convert_dict(obj): if all([isinstance(x, str) for x in obj]): return ro.StrVector(obj) elif all([isinstance(x, int) | isinstance(x, float) for x in obj]): return ro.IntVector(obj) elif all([isinstance(x, bool) for x in obj]): return ro.BoolVector(obj) elif all([isinstance(x, float) for x in obj]): return ro.FloatVector(obj) return ro.ListVector(obj)
def on_pbExecute_clicked(self): R.r(''' source('Rasterise_dev_61.R') ''') for i in range(0,self.ui.tableWidget.rowCount()): self.Drivername[i] = self.ui.tableWidget.cellWidget(i,0).text() self.DictList[str(self.Drivername[i])] = self.__filename[i] self.DictLen = len(self.DictList) drvs = R.ListVector(self.DictList) genSummary = R.r['genrateStatisticalSummary'] self.res = genSummary(str(self.DriverType),self.__T0File,self.__T1File,drvs,int(self.ui.leNAValue.text())) self.ui.teSummary.setPlainText(str(self.res))
def d_bse_(d, N, type="quant"): v = robjects.FloatVector(numpy.array(d["se"])**2) f = robjects.FloatVector(d["frequency"].values) b = robjects.FloatVector(d["beta"].values) s = _s(d, N) return robjects.ListVector({ "beta": b, "varbeta": v, "MAF": f, "N": s, "type": type })
def train_elastic_net_wrapper(features_data_, features_, d_, data_annotation_, x_w=None, prune=True, nested_folds=10): x = numpy.array([features_data_[v] for v in features_.id.values]) dimnames = robjects.ListVector( [(1, robjects.StrVector(d_["individual"])), (2, robjects.StrVector(features_.id.values))]) x = robjects.r["matrix"](robjects.FloatVector(x.flatten()), ncol=features_.shape[0], dimnames=dimnames) y = robjects.FloatVector(d_[data_annotation_.gene_id]) nested_folds = robjects.FloatVector([nested_folds]) #py2ri chokes on None. if x_w is None: res = train_elastic_net(y, x, n_train_test_folds=nested_folds) else: res = train_elastic_net(y, x, penalty_factor=x_w, n_train_test_folds=nested_folds) # observation weights, not explanatory variable weight :( , x_weight = x_w) return pandas2ri.ri2py(res[0]), pandas2ri.ri2py(res[1])
def _filter_and_values_to_RList(d): """`d` is a dictionary of filters: values. Returns a StrVector and a ListVector of StrVectors""" # Could use ListVector directly with the dict, but want to guarantee # positional order of filters and values f = robjects.StrVector(list(d.keys())) v = robjects.ListVector( rpy2.rlike.container.TaggedList( d.values(), tags=list(d.keys()) ) ) return f, v
def pynlsfit(valuelis, formulastr='', startvalues=list(), filename='nlsfit.txt', gformat='pdf'): """ nonlinear fit of function to data """ rconsole = rpystatinit() rnonlinfit = rconsole("rnonlinfit") dataframe = pyobj2dataframe(valuelis) from rpy2.robjects.packages import importr grdevices = importr('grDevices') graphplotfile = fhutils.renamefilename(filename, suffix=gformat) grdevices.pdf(file=graphplotfile) nlfit = rnonlinfit(data=dataframe, formulastr=formulastr, startvalues=robjects.ListVector(dict(startvalues))) grdevices.dev_off() nlfit = rlisttodic(nlfit) return nlfit
def get_deseq_result(self, contrast=None, **kwargs): self.comparison = deseq.resultsNames(self.dds) if contrast: if len(contrast) == 3: contrast = robjects.numpy2ri.numpy2ri(np.array(contrast)) else: assert len(contrast) == 2, 'Contrast must be length of 3 or 2' contrast = robjects.ListVector({None: con for con in contrast}) print('Using contrast: ', contrast) self.deseq_result = deseq.results(self.dds, contrast=contrast, **kwargs) else: self.deseq_result = deseq.results(self.dds, **kwargs) self.deseq_result = to_dataframe(self.deseq_result) self.deseq_result = pandas2ri.ri2py_dataframe( self.deseq_result) ## back to pandas dataframe self.deseq_result[self.gene_column] = self.gene_id.values
def get_deseq_result(self, contrast=None, **kwargs): self.comparison = deseq.resultsNames(self.dds) if contrast: if len(contrast) == 3: contrast = robjects.numpy2ri.numpy2ri(np.array(contrast)) else: assert len(contrast) == 2, 'Contrast must be length of 3 or 2' contrast = robjects.ListVector({None: con for con in contrast}) print('Using contrast: ', contrast) self.deseq_result = deseq.results(self.dds, contrast=contrast, **kwargs) else: self.deseq_result = deseq.results(self.dds, **kwargs) self.deseq_result = to_dataframe(self.deseq_result) self.deseq_result = conversion.rpy2py(self.deseq_result) return (self.deseq_result)
def get_predictions(self, train_period_begin, train_period_end, prediction_count): #print 'Getting training data from', train_period_begin, 'to', train_period_end self.training_data = self.full_data[train_period_begin: train_period_end] self.forecast_period = prediction_count #print self.dataset.iloc[train_period_begin:train_period_end] r_df = com.convert_to_r_dataframe(DataFrame(self.dataset.iloc[train_period_begin:train_period_end])) y = PricePredictor.stats.ts(r_df) orderR = R.IntVector((1,0,1)) season = R.ListVector({'order': R.IntVector((1,0,1)), 'period' : 24}) model = PricePredictor.stats.arima(y, order = orderR, seasonal=season,method="ML") f = PricePredictor.forecast.forecast(model, h=self.forecast_period) #print f #print "\n" predValues = [] for item in f.items(): if item[0] == 'mean': for value in item[1].items(): predValues.append(value[1]) return predValues
def genesets2indices_r(genesets, geneLabels, remove_empty=True): """ genesets should be a dictionary of geneset lists, and geneLabels a list of gene labels >>> from .tests.test_retro import testGenesets, testCountsGenelabels >>> print(genesets2indices_r(testGenesets,testCountsGenelabels)) [[1]] [1] 1 2 <BLANKLINE> [[2]] [1] 2 3 <BLANKLINE> <BLANKLINE> TODO test function for gene level conversion between python and R """ limma = importr('limma') genesets_r = ro.ListVector( {gs: ro.StrVector(genesets[gs]) for gs in genesets}) return limma.ids2indices(genesets_r, geneLabels, remove_empty=remove_empty)