示例#1
0
    def _remember_dict(r, robj=None):
        new = ro.ListVector({name:Remember._remember_item(x) for name, x in r.items()})
        if robj is None:
            return new

        else:
            return ro.ListVector({**dict(robj.items()), **dict(new.items())})
示例#2
0
 def testNewListVector(self):
     vec = robjects.ListVector({'a': 1, 'b': 2})
     self._testNewListVector(vec)
     s = (('a', 1), ('b', 2))
     vec = robjects.ListVector(s)
     self._testNewListVector(vec)
     it = iter(s)
     vec = robjects.ListVector(s)
     self._testNewListVector(vec)
示例#3
0
    def viper(self, data_mx, subunit_set, subunit_tfms):
        from rpy2 import robjects
        from rpy2.robjects import r, pandas2ri
        from rpy2.robjects.conversion import localconverter
        from rpy2.robjects.packages import importr

        base = importr('base')
        try:
            vp = importr("viper")
        except:
            base.source("http://www.bioconductor.org/biocLite.R")
            biocinstaller = importr("BiocInstaller")
            biocinstaller.biocLite("viper")
            vp = importr("viper")

        # Conduct VIPER analysis
        r_networks = robjects.ListVector.from_length(len(subunit_tfms))

        for i, subunit_tfm in enumerate(subunit_tfms):
            regulons = []
            for subunit in subunit_set:
                tfmode = robjects.FloatVector(
                    np.asarray(subunit_tfm[subunit]).astype(float))
                tfmode.names = robjects.StrVector(subunit_set[subunit])
                likelihood = robjects.FloatVector(
                    np.repeat(1.0, len(subunit_set[subunit])))

                regulon = robjects.ListVector({
                    'tfmode': tfmode,
                    'likelihood': likelihood
                })
                regulons.append(regulon)

            # Generate R regulon
            r_networks[i] = robjects.ListVector(
                zip(subunit_set.keys(), regulons))

        # Generate R matrix
        mx_nr, mx_nc = data_mx.shape
        mx_vec = robjects.FloatVector(data_mx.values.transpose().reshape(
            (data_mx.size)))
        r_mx = robjects.r.matrix(mx_vec, nrow=mx_nr, ncol=mx_nc)
        r_mx.rownames = robjects.StrVector(data_mx.index)
        r_mx.colnames = robjects.StrVector(data_mx.columns)

        # Compute VIPER profile
        vpres = vp.viper(r_mx,
                         r_networks,
                         verbose=False,
                         minsize=1,
                         cores=self.threads)

        with localconverter(robjects.default_converter + pandas2ri.converter):
            vpres_df = robjects.conversion.rpy2py(vpres)
        pd_mx = pd.DataFrame(vpres_df, columns=vpres.colnames)
        pd_mx['query_id'] = vpres.rownames
        return (pd_mx)
示例#4
0
def call_function(id, function, add, *args, **kwargs):
    ro.globalenv["args"] = ro.ListVector([["wazzup", x] for x in args])
    ro.globalenv["kwargs"] = ro.ListVector([[name, x] for name, x in kwargs.items()])

    ro.r("names(args) <- NULL")

    if add:
        fun = f"{id} = do.call(dynwrap::{function}, c(list({id}), args, kwargs));NULL"
        ro.r(fun)
    else:
        fun = f"{id} = do.call(dynwrap::{function}, c(args, kwargs))"
        ro.r(fun)

    return id
示例#5
0
def sarima_test(steps, path):
    index_name, my_trend = parse_csv(path)
    dta = pd.DataFrame(my_trend)
    dta.index = index_name
    dta = dta.rename(columns={0: 'search'})
    r_df = com.convert_to_r_dataframe(dta)
    y = stats.ts(r_df)
    order = R.IntVector((1, 1, 1))
    season = R.ListVector({'order': R.IntVector((0, 1, 0)), 'period': 52})
    model = stats.arima(y[-5 * 52:-steps], order=order, seasonal=season)
    f = forecast.forecast(model, h=steps)
    future = [var for var in f[3]]
    y_pred = np.array(future)
    y_true = np.array(my_trend[-steps:])
    metrics_result = {
        'sarima_MAE': metrics.mean_absolute_error(y_true, y_pred),
        'sarima_MSE': metrics.mean_squared_error(y_true, y_pred),
        'sarima_MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    }
    p1 = plt.plot(my_trend[-steps:], '*-')
    p2 = plt.plot(future)
    #    p1 = plt.plot(index_name,my_trend,'r-')
    #    p2 = plt.plot(index_name_future,future,'g-')
    plt.ylabel('Search Intensity')
    plt.xlabel('Year')
    plt.title('Search Prediction of ' + path.split('/')[-1][:-4])
    plt.legend((p1[0], p2[0]), ["Actual", "Predicted"], loc=1)
    plt.grid(True)
    #    print metrics_result['sarima_MAPE']
    return metrics_result['sarima_MAPE']
示例#6
0
def get_gcindices_r(countsGeneLabels,
                    correctBackground=False,
                    remove_empty=True):
    """
    >>> indices_r = get_gcindices(counts.index,correctBackground=False) # doctest: +SKIP

    TODO overlapping with genesets2indices_r => refactor code
    """
    limma = importr('limma')
    gc = LSD.get_msigdb6()

    if correctBackground:
        gc = {
            gsc: {
                gs: [g for g in gc[gsc][gs] if g in countsGeneLabels]
                for gs in gc[gsc]
            }
            for gsc in gc
        }

    countsGeneLabels_r = ro.StrVector(countsGeneLabels)
    gc_indices_r = {
        gsc: limma.ids2indices(ro.ListVector(gc[gsc]),
                               countsGeneLabels_r,
                               remove_empty=remove_empty)
        for gsc in gc
    }

    return gc_indices_r
示例#7
0
    def identify(self, polarity, in_file, databases, non_empty, mzmatch_params,
                 mzmatch_outputs):

        group_dict = {}
        for group_label, index, description, files, abspath in non_empty:
            group_dict[group_label] = robjects.StrVector(files)
        groups = robjects.ListVector(group_dict)

        # turns out that 'stds.xml.db' always has a value, e.g. /home/pimp/media/projects/10/analysis_38/stds_db.xml
        # regardless of whether the file exists or not.
        # this behaviour is different from the old pipeline? So here we set it to R NULL if the file doesn't exist

        stds_xml_file = self.get_value(mzmatch_outputs, 'stds.xml.db')[0]
        stds_xml_file = os.path.abspath(stds_xml_file)
        if not os.path.isfile(stds_xml_file):
            logger.info('%s is not found, setting stds.xml.db to NULL',
                        stds_xml_file)
            self.set_value(mzmatch_outputs, 'stds.xml.db', robjects.r("NULL"))

        args = {
            'in_file': in_file,
            'databases': databases,
            'groups': groups,
            'mzmatch.outputs': mzmatch_outputs,
            'mzmatch.params': mzmatch_params,
            'polarity': polarity
        }

        raw_data = run_r('Pimp.identify.metabolites', **args)
        return raw_data
示例#8
0
    def setREnv(self, call, tsname='r_timeseries', inline=False, **kwargs):
        command = '{}({}'.format(call, tsname)
        for key, item in kwargs.items():
            if isinstance(item, bool):
                if inline: rinline = "TRUE" if item else "FALSE"
                else:
                    ro.globalenv[
                        key] = ro.rinterface.TRUE if item else ro.rinterface.FALSE
            elif isinstance(item, list) and all(
                    isinstance(x, float) for x in item):
                if inline: rinline = item  # This is not yet correct!
                else: ro.globalenv[key] = pandas2ri.FloatSexpVector(item)
            elif isinstance(item, list) and all(
                    isinstance(x, int) for x in item):
                rinline = None
                ro.globalenv[key] = pandas2ri.IntSexpVector(item)
            elif isinstance(item, dict):
                rinline = None
                ro.globalenv[key] = ro.ListVector(item)
            else:
                try:
                    if inline is False: ro.globalenv[key] = item
                    else:
                        if isinstance(item, str): rinline = '\"' + item + '\"'
                        else: rinline = item
                except:
                    logging.error('Variable {} - Traceback - {}'.format(
                        key, self.rtracebackerror()))

            if inline: command += ", {}={}".format(key, rinline)
            else: command += ", {}={}".format(key, key)
        command += ")"
        return command
示例#9
0
    def get_deseq_result(self, contrast=None, **kwargs):
        '''
        DESeq2: result(dds, contrast)
        making a dds.deseq_result pandas dataframe
        '''

        if contrast:
            if len(contrast) == 3:
                R_contrast = robjects.vectors.StrVector(np.array(contrast))
            else:
                if len(contrast) != 2:
                    raise ValueError('Contrast must be length of 3 or 2')
                R_contrast = robjects.ListVector(
                    {None: con
                     for con in contrast})
            logger.info('Using contrast: %s' % contrast)
            self.result = deseq.results(self.dds,
                                        contrast=R_contrast,
                                        **kwargs)  # Robject
        else:
            self.result = deseq.results(self.dds, **kwargs)  # R object
        self.deseq_result = to_dataframe(self.result)  # R dataframe
        with localconverter(robjects.default_converter + pandas2ri.converter):
            self.deseq_result = robjects.conversion.rpy2py(
                self.deseq_result)  ## back to pandas dataframe
        self.deseq_result[self.gene_column] = self.gene_id.values
示例#10
0
 def eval(self, x):
     x = np.array(x, ndmin=1)
     if "X" in self.log10:
         x_new = ro.ListVector({"x": ro.FloatVector(np.log10(x))})
     else:
         x_new = ro.ListVector({"x": ro.FloatVector(x)})
     y_pred = self.mgcv.predict_gam(self.spline, newdata=x_new)
     if "Y" in self.log10:
         y_pred = 10 ** np.array(y_pred)
     else:
         y_pred = np.array(y_pred)
     #
     if len(y_pred) == 1:
         return y_pred[0]
     else:
         return y_pred
示例#11
0
 def testReprNonVectorInList(self):
     vec = robjects.ListVector(
         OrderedDict((
             ('a', 1),
             ('b', robjects.Formula('y ~ x')),
         )))
     s = repr(vec).split('\n')
     self.assertEqual('[IntVector, Formula]', s[2].strip())
示例#12
0
def sarima(steps, path):
    index_name, my_trend = parse_csv(path)
    dta = pd.DataFrame(my_trend)
    dta.index = index_name
    dta = dta.rename(columns={0: 'search'})
    #dta.plot(figsize=(10,4))

    #==============================================================================
    # check stationarity
    #==============================================================================
    #r_df = com.convert_to_r_dataframe(DataFrame(dta))
    #y = stats.ts(r_df)
    #ad = tseries.adf_test(y, alternative="stationary", k=52)
    #a = ad.names[:5]
    #{ad.names[i]:ad[i][0] for i in xrange(len(a))}

    #==============================================================================
    # check the seasonality
    #==============================================================================
    #diff1lev = dta.diff(periods=1).dropna()
    #diff1lev.plot(figsize=(12,6))
    #diff1lev_season = diff1lev.diff(52).dropna()
    #r_df = com.convert_to_r_dataframe(DataFrame(diff1lev_season))
    #diff1lev_season1lev = diff1lev_season.diff().dropna()

    #==============================================================================
    # check stationarity after difference
    #==============================================================================
    #y = stats.ts(r_df)
    #ad = tseries.adf_test(y, alternative="stationary", k=52)
    #a = ad.names[:5]
    #{ad.names[i]:ad[i][0] for i in xrange(len(a))}

    #==============================================================================
    # plot acf and pacf
    #==============================================================================
    #fig = plt.figure(figsize=(12,8))
    #ax1 = fig.add_subplot(211)
    #fig = sm.graphics.tsa.plot_acf(diff1lev_season1lev.values.squeeze(), lags=150, ax=ax1)
    #ax2 = fig.add_subplot(212)
    #fig = sm.graphics.tsa.plot_pacf(diff1lev_season1lev, lags=150, ax=ax2)
    #fig

    r_df = com.convert_to_r_dataframe(dta)
    y = stats.ts(r_df)
    order = R.IntVector((1, 1, 1))
    season = R.ListVector({'order': R.IntVector((0, 1, 0)), 'period': 52})
    a = time.time()
    model = stats.arima(y, order=order, seasonal=season)
    print time.time() - a
    f = forecast.forecast(model, h=steps)
    future = [var for var in f[3]]
    dt = date_range(dta.index[-1], periods=len(future) + 1,
                    freq='W')[1:]  #создаем индекс из дат
    pr = Series(future, index=dt)
    #    dta.plot(figsize=(12,6))
    #    pr.plot(color = 'red')
    return index_name, dt, my_trend, future
示例#13
0
 def _run_gsea(df, genesets, method='ssgsea', verbose=False, **kwargs):
     rdata = r('as.matrix')(df)
     rgenesets = robjects.ListVector(genesets)
     res = r('gsva')(rdata, rgenesets, method=method, verbose=verbose, **kwargs)
     py_res = pandas2ri.ri2py_dataframe(res)
     py_res.index = r('rownames')(res)
     # py_res.columns = r('colnames')(res)
     py_res.columns = df.columns
     return py_res
示例#14
0
def getKruskal(wt_rankpt_dist, mut_rankpt_dist, mut_wt_conn_dist):
    return robjects.r["kruskal.test"](robjects.ListVector({
        'a':
        robjects.FloatVector(wt_rankpt_dist),
        'b':
        robjects.FloatVector(mut_rankpt_dist),
        'c':
        robjects.FloatVector(mut_wt_conn_dist)
    }))[2][0]
示例#15
0
def test_repr_nonvectorinlist():
    vec = robjects.ListVector(
        OrderedDict((
            ('a', 1),
            ('b', robjects.Formula('y ~ x')),
        )))
    s = repr(vec)
    assert s.startswith("R object with classes: (\'RTYPES.VECSXP',) "
                        "mapped to:\n[IntVector, Formula]")
示例#16
0
def test_repr_nonvectorinlist():
    vec = robjects.ListVector(
        OrderedDict((
            ('a', 1),
            ('b', robjects.Formula('y ~ x')),
        )))
    s = repr(vec)
    assert s.startswith("R object with classes: ('list',) mapped to:%s"
                        "[IntSexpVector, LangSexpVector]" % os.linesep)
    def predict_unstructured(self, data, **kwargs):
        def _r_is_character(r_val):
            _is_character = ro.r("is.character")
            return bool(_is_character(r_val))

        def _r_is_raw(r_val):
            _is_raw = ro.r("is.raw")
            return bool(_is_raw(r_val))

        def _r_is_null(r_val):
            return r_val == ro.rinterface.NULL

        def _cast_r_to_py(r_val):
            # TODO: consider checking type against rpy2 proxy object like: isinstance(list_data_kwargs, ro.vectors.ListVector)
            # instead of calling R interpreter
            if _r_is_null(r_val):
                return None
            elif _r_is_raw(r_val):
                return bytes(r_val)
            elif _r_is_character(r_val):
                # Any scalar value is returned from R as one element vector,
                # so get this value.
                return str(r_val[0])
            else:
                raise DrumCommonException(
                    "Can not convert R value {} type {}".format(
                        r_val, type(r_val)))

        def _rlist_to_dict(rlist):
            if _r_is_null(rlist):
                return None
            return {str(k): _cast_r_to_py(v) for k, v in rlist.items()}

        data_binary_or_text = data

        if UnstructuredDtoKeys.QUERY in kwargs:
            kwargs[UnstructuredDtoKeys.QUERY] = ro.ListVector(
                kwargs[UnstructuredDtoKeys.QUERY])

        # if data_binary_or_text is str it will be auto converted into R character type;
        # otherwise if it is bytes, manually convert it into byte vector (raw)
        r_data_binary_or_text = data_binary_or_text
        if isinstance(data_binary_or_text, bytes):
            r_data_binary_or_text = ro.vectors.ByteVector(data_binary_or_text)

        kwargs_filtered = {k: v for k, v in kwargs.items() if v is not None}
        list_data_kwargs = r_handler.predict_unstructured(
            model=self._model, data=r_data_binary_or_text, **kwargs_filtered)
        if isinstance(list_data_kwargs, ro.vectors.ListVector):
            ret = _cast_r_to_py(list_data_kwargs[0]), _rlist_to_dict(
                list_data_kwargs[1])
        else:
            raise DrumCommonException(
                "Wrong type returned in unstructured mode: {}".format(
                    type(list_data_kwargs)))

        return ret
示例#18
0
def test_repr_nonvectorinlist():
    vec = robjects.ListVector(
        OrderedDict((
            ('a', 1),
            ('b', robjects.Formula('y ~ x')),
        )))
    s = repr(vec).split(os.linesep)
    assert s[1].startswith("R classes: ('list',)")
    assert s[2].startswith("[IntSexpVector, LangSexpVector]")
示例#19
0
def toR(something):
    if isinstance(something, list):
        if isinstance(something, float):
            return ri.FloatSexpVector(something)
        elif isinstance(something, int):
            return ri.IntSexpVector(something)
        else:
            return ri.StrSexpVector(something)
    elif isinstance(something, dict):
        return ro.ListVector(something)
    return something
示例#20
0
    def heatmap_annotation_key(self, name, colors):
        '''
        generates data frame for color key for the annotation
        from a dict
        '''
        keyColors = ro.StrVector([c for c in colors.values()])
        keyColors.names = colors.keys()
        key = OrderedDict()
        key[name] = keyColors

        return ro.ListVector(key)
示例#21
0
def convert_dict(obj):
    if all([isinstance(x, str) for x in obj]):
        return ro.StrVector(obj)
    elif all([isinstance(x, int) | isinstance(x, float) for x in obj]):
        return ro.IntVector(obj)
    elif all([isinstance(x, bool) for x in obj]):
        return ro.BoolVector(obj)
    elif all([isinstance(x, float) for x in obj]):
        return ro.FloatVector(obj)

    return ro.ListVector(obj)
示例#22
0
 def on_pbExecute_clicked(self):
   R.r('''  source('Rasterise_dev_61.R')  ''')
   for i in range(0,self.ui.tableWidget.rowCount()):
     self.Drivername[i] = self.ui.tableWidget.cellWidget(i,0).text()
     self.DictList[str(self.Drivername[i])] = self.__filename[i]
   self.DictLen = len(self.DictList)
     
   
   drvs = R.ListVector(self.DictList)
   genSummary = R.r['genrateStatisticalSummary']
   self.res = genSummary(str(self.DriverType),self.__T0File,self.__T1File,drvs,int(self.ui.leNAValue.text()))
   self.ui.teSummary.setPlainText(str(self.res))
示例#23
0
def d_bse_(d, N, type="quant"):
    v = robjects.FloatVector(numpy.array(d["se"])**2)
    f = robjects.FloatVector(d["frequency"].values)
    b = robjects.FloatVector(d["beta"].values)
    s = _s(d, N)
    return robjects.ListVector({
        "beta": b,
        "varbeta": v,
        "MAF": f,
        "N": s,
        "type": type
    })
def train_elastic_net_wrapper(features_data_, features_, d_, data_annotation_, x_w=None, prune=True, nested_folds=10):
    x = numpy.array([features_data_[v] for v in features_.id.values])
    dimnames = robjects.ListVector(
        [(1, robjects.StrVector(d_["individual"])), (2, robjects.StrVector(features_.id.values))])
    x = robjects.r["matrix"](robjects.FloatVector(x.flatten()), ncol=features_.shape[0], dimnames=dimnames)
    y = robjects.FloatVector(d_[data_annotation_.gene_id])
    nested_folds = robjects.FloatVector([nested_folds])
    #py2ri chokes on None.
    if x_w is None:
        res = train_elastic_net(y, x, n_train_test_folds=nested_folds)
    else:
        res = train_elastic_net(y, x, penalty_factor=x_w, n_train_test_folds=nested_folds)  # observation weights, not explanatory variable weight :( , x_weight = x_w)
    return pandas2ri.ri2py(res[0]), pandas2ri.ri2py(res[1])
示例#25
0
 def _filter_and_values_to_RList(d):
     """`d` is a dictionary of filters: values.  Returns a StrVector and
     a ListVector of StrVectors"""
     # Could use ListVector directly with the dict, but want to guarantee
     # positional order of filters and values
     f = robjects.StrVector(list(d.keys()))
     v = robjects.ListVector(
         rpy2.rlike.container.TaggedList(
             d.values(),
             tags=list(d.keys())
         )
     )
     return f, v
示例#26
0
def pynlsfit(valuelis, formulastr='', startvalues=list(), filename='nlsfit.txt', gformat='pdf'):
    """
    nonlinear fit of function to data
    """
    rconsole = rpystatinit()
    rnonlinfit = rconsole("rnonlinfit")
    dataframe = pyobj2dataframe(valuelis)

    from rpy2.robjects.packages import importr

    grdevices = importr('grDevices')
    graphplotfile = fhutils.renamefilename(filename, suffix=gformat)
    grdevices.pdf(file=graphplotfile)
    nlfit = rnonlinfit(data=dataframe, formulastr=formulastr, startvalues=robjects.ListVector(dict(startvalues)))
    grdevices.dev_off()

    nlfit = rlisttodic(nlfit)
    return nlfit
示例#27
0
    def get_deseq_result(self, contrast=None, **kwargs):

        self.comparison = deseq.resultsNames(self.dds)
        if contrast:
            if len(contrast) == 3:
                contrast = robjects.numpy2ri.numpy2ri(np.array(contrast))
            else:
                assert len(contrast) == 2, 'Contrast must be length of 3 or 2'
                contrast = robjects.ListVector({None: con for con in contrast})
            print('Using contrast: ', contrast)
            self.deseq_result = deseq.results(self.dds,
                                              contrast=contrast,
                                              **kwargs)
        else:
            self.deseq_result = deseq.results(self.dds, **kwargs)
        self.deseq_result = to_dataframe(self.deseq_result)
        self.deseq_result = pandas2ri.ri2py_dataframe(
            self.deseq_result)  ## back to pandas dataframe
        self.deseq_result[self.gene_column] = self.gene_id.values
示例#28
0
    def get_deseq_result(self, contrast=None, **kwargs):

        self.comparison = deseq.resultsNames(self.dds)
        if contrast:
            if len(contrast) == 3:
                contrast = robjects.numpy2ri.numpy2ri(np.array(contrast))
            else:
                assert len(contrast) == 2, 'Contrast must be length of 3 or 2'
                contrast = robjects.ListVector({None: con for con in contrast})
            print('Using contrast: ', contrast)
            self.deseq_result = deseq.results(self.dds,
                                              contrast=contrast,
                                              **kwargs)
        else:
            self.deseq_result = deseq.results(self.dds, **kwargs)
        self.deseq_result = to_dataframe(self.deseq_result)
        self.deseq_result = conversion.rpy2py(self.deseq_result)

        return (self.deseq_result)
 def get_predictions(self, train_period_begin, train_period_end, prediction_count):
     #print 'Getting training data from', train_period_begin, 'to', train_period_end
     self.training_data = self.full_data[train_period_begin: train_period_end]
     self.forecast_period = prediction_count
     #print self.dataset.iloc[train_period_begin:train_period_end]
     r_df = com.convert_to_r_dataframe(DataFrame(self.dataset.iloc[train_period_begin:train_period_end]))
     y = PricePredictor.stats.ts(r_df)
     orderR = R.IntVector((1,0,1))
     season = R.ListVector({'order': R.IntVector((1,0,1)), 'period' : 24})
     model = PricePredictor.stats.arima(y, order = orderR, seasonal=season,method="ML")
     f = PricePredictor.forecast.forecast(model, h=self.forecast_period)
     #print f
     #print "\n"
     predValues = []
     for item in f.items():
         if item[0] == 'mean':
             for value in item[1].items():
                 predValues.append(value[1])
     return predValues
示例#30
0
def genesets2indices_r(genesets, geneLabels, remove_empty=True):
    """
    genesets should be a dictionary of geneset lists, and geneLabels a list of gene labels
    >>> from .tests.test_retro import testGenesets, testCountsGenelabels
    >>> print(genesets2indices_r(testGenesets,testCountsGenelabels))
    [[1]]
    [1] 1 2
    <BLANKLINE>
    [[2]]
    [1] 2 3
    <BLANKLINE>
    <BLANKLINE>

    TODO test function for gene level conversion between python and R
    """
    limma = importr('limma')
    genesets_r = ro.ListVector(
        {gs: ro.StrVector(genesets[gs])
         for gs in genesets})
    return limma.ids2indices(genesets_r, geneLabels, remove_empty=remove_empty)