Пример #1
0
    def __init__(self, count_matrix, design_matrix, design_formula, gene_column):
        try:
            assert gene_column in count_matrix.columns, 'Wrong gene id column name'

        except AttributeError:
            raise Exception('Wrong Pandas dataframe?')

        import rpy2
        from rpy2.robjects import pandas2ri, Formula, conversion

        pandas2ri.activate()

        self.dds = None
        self.deseq_result = None
        self.comparison = None
        self.normalized_count_matrix = None
        self.gene_column = gene_column
        self.gene_id = count_matrix[self.gene_column]

        count_matrix = count_matrix.drop(gene_column, axis=1)

        logging.info(
            f'Number of columns in counts data {count_matrix.shape[1]} | '
            f'Number of rows in design matrix {design_matrix.shape[0]}'
        )

        # Load dataframe into R environment
        # Important: Change to r.data() if you use numpys and rpy2 latest versions
        self.count_matrix = conversion.py2rpy(count_matrix)

        # Assign columns to NULL
        self.count_matrix.names = rpy2.rinterface.NULL
        self.count_matrix = count_matrix
        self.design_matrix = conversion.py2rpy(design_matrix)
        self.design_formula = Formula(design_formula)
Пример #2
0
 def __init__(self, *args, **kwargs):
     od = OrdDict()
     for item in args:
         od[None] = conversion.py2rpy(item)
     for k, v in kwargs.items():
         od[k] = conversion.py2rpy(v)
     res = self._constructor.rcall(tuple(od.items()), robjects.globalenv)
     super().__init__(res.__sexp__)
Пример #3
0
    def test_scalar(self):
        i32 = numpy.int32(100)
        i32_r = conversion.py2rpy(i32)
        i32_test = numpy.array(i32_r)[0]
        assert i32 == i32_test

        i64 = numpy.int64(100)
        i64_r = conversion.py2rpy(i64)
        i64_test = numpy.array(i64_r)[0]
        assert i64 == i64_test
Пример #4
0
 def __call__(self, *args, **kwargs):
     new_args = [conversion.py2rpy(a) for a in args]
     new_kwargs = {}
     for k, v in kwargs.items():
         # TODO: shouldn't this be handled by the conversion itself ?
         if isinstance(v, rinterface.Sexp):
             new_kwargs[k] = v
         else:
             new_kwargs[k] = conversion.py2rpy(v)
     res = super(Function, self).__call__(*new_args, **new_kwargs)
     res = conversion.rpy2py(res)
     return res
Пример #5
0
    def test_array(self):

        i2d = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='i')
        i2d_r = conversion.py2rpy(i2d)
        assert r['storage.mode'](i2d_r)[0] == 'integer'
        assert tuple(r['dim'](i2d_r)) == (2, 3)

        # Make sure we got the row/column swap right:
        assert r['['](i2d_r, 1, 2)[0] == i2d[0, 1]

        f3d = numpy.arange(24, dtype='f').reshape((2, 3, 4))
        f3d_r = conversion.py2rpy(f3d)

        assert r['storage.mode'](f3d_r)[0] == 'double'
        assert tuple(r['dim'](f3d_r)) == (2, 3, 4)
Пример #6
0
 def check_homogeneous(self, obj, mode, storage_mode):
     converted = conversion.py2rpy(obj)
     assert r["mode"](converted)[0] == mode
     assert r["storage.mode"](converted)[0] == storage_mode
     assert list(obj) == list(converted)
     assert r["is.array"](converted)[0] is True
     return converted
Пример #7
0
 def test_vector_bytes(self):
     l = [b'a', b'b', b'c']
     s = numpy.array(l, dtype='|S1')
     converted = conversion.py2rpy(s)
     assert r["mode"](converted)[0] == 'raw'
     assert r["storage.mode"](converted)[0] == 'raw'
     assert bytearray(b''.join(l)) == bytearray(converted)
Пример #8
0
def numpy2rpy(o):
    """ Augmented conversion function, converting numpy arrays into
    rpy2.rinterface-level R structures. """
    if not o.dtype.isnative:
        raise ValueError('Cannot pass numpy arrays with non-native '
                         'byte orders at the moment.')

    # Most types map onto R arrays:
    if o.dtype.kind in _kinds:
        res = _numpyarray_to_r(o, _kinds[o.dtype.kind])
    # R does not support unsigned types:
    elif o.dtype.kind == 'u':
        res = unsignednumpyint_to_rint(o)
    # Array-of-PyObject is treated like a Python list:
    elif o.dtype.kind == 'O':
        res = numpy_O_py2rpy(o)
    # Record arrays map onto R data frames:
    elif o.dtype.kind == 'V':
        if o.dtype.names is None:
            raise ValueError('Nothing can be done for this numpy array '
                             'type "%s" at the moment.' % (o.dtype,))
        df_args = []
        for field_name in o.dtype.names:
            df_args.append((field_name,
                            conversion.py2rpy(o[field_name])))
        res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv)
    # It should be impossible to get here:
    else:
        raise ValueError('Unknown numpy array type "%s".' % str(o.dtype))
    return res
Пример #9
0
def py2rpy_pandasseries(obj):
    if obj.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted '
                      'to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])
        ]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        # TODO: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    elif (obj.dtype == dt_O_type):
        homogeneous_type = None
        for x in obj.values:
            if x is None:
                continue
            if homogeneous_type is None:
                homogeneous_type = type(x)
                continue
            if type(x) is not homogeneous_type:
                raise ValueError('Series can only be of one type, or None.')
        # TODO: Could this be merged with obj.type.name == 'O' case above ?
        res = {
            int: IntVector,
            bool: BoolVector,
            None: BoolVector,
            str: StrVector,
            bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        }[homogeneous_type](obj)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy

        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Пример #10
0
 def test_object_array(self):
     o = numpy.array([1, "a", 3.2], dtype=numpy.object_)
     o_r = conversion.py2rpy(o)
     assert r['mode'](o_r)[0] == 'list'
     assert r['[['](o_r, 1)[0] == 1
     assert r['[['](o_r, 2)[0] == 'a'
     assert r['[['](o_r, 3)[0] == 3.2
Пример #11
0
def numpy_O_py2rpy(o):
    if all(isinstance(x, str) for x in o):
        res = StrSexpVector(o)
    elif all(isinstance(x, bytes) for x in o):
        res = ByteSexpVector(o)
    else:
        res = conversion.py2rpy(list(o))
    return res
Пример #12
0
def py2rpy_dict(obj: Mapping) -> ListVector:
    """Try converting everything. For nested dicts, this needs itself to be registered"""
    converted = {}
    for k, v in obj.items():
        try:
            converted[str(k)] = conversion.py2rpy(v)
        except NotImplementedError as e:
            warn(str(e), NotConvertedWarning)
    # This tries to convert everything again. This works because py2rpy(Sexp) is the identity function
    return ListVector(converted)
Пример #13
0
 def test_record_array(self):
     rec = numpy.array([(1, 2.3), (2, -0.7), (3, 12.1)],
                       dtype=[("count", "i"), ("value", numpy.double)])
     rec_r = conversion.py2rpy(rec)
     assert r["is.data.frame"](rec_r)[0] is True
     assert tuple(r["names"](rec_r)) == ("count", "value")
     count_r = rec_r[rec_r.names.index('count')]
     value_r = rec_r[rec_r.names.index('value')]
     assert r["storage.mode"](count_r)[0] == 'integer'
     assert r["storage.mode"](value_r)[0] == 'double'
     assert count_r[1] == 2
     assert value_r[2] == 12.1
Пример #14
0
def py2rpy_pandasdataframe(obj):
    od = OrderedDict()
    for name, values in obj.iteritems():
        try:
            od[name] = conversion.py2rpy(values)
        except Exception as e:
            warnings.warn('Error while trying to convert '
                          'the column "%s". Fall back to string conversion. '
                          'The error is: %s' % (name, str(e)))
            od[name] = StrVector(values)

    return DataFrame(od)
Пример #15
0
def dict_to_named_list(dct):
    if (isinstance(dct, dict) or isinstance(dct, Parameter)
            or isinstance(dct, pd.core.series.Series)):
        dct = {key: val for key, val in dct.items()}
        # convert numbers, numpy arrays and pandas dataframes to builtin
        # types before conversion (see rpy2 #548)
        with conversion.localconverter(default_converter +
                                       pandas2ri.converter +
                                       numpy2ri.converter):
            for key, val in dct.items():
                dct[key] = conversion.py2rpy(val)
        r_list = ListVector(dct)
        return r_list
    return dct
Пример #16
0
def py2rpy_pandasseries(obj):
    if numpy.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [IntVector([x.year for x in obj]),
             IntVector([x.month for x in obj]),
             IntVector([x.day for x in obj]),
             IntVector([x.hour for x in obj]),
             IntVector([x.minute for x in obj]),
             IntVector([x.second for x in obj])]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy
        res = func(obj)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res=as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Пример #17
0
def numpy2rpy(o):
    """ Augmented conversion function, converting numpy arrays into
    rpy2.rinterface-level R structures. """
    if not o.dtype.isnative:
        raise(ValueError('Cannot pass numpy arrays with non-native '
                         'byte orders at the moment.'))

    # Most types map onto R arrays:
    if o.dtype.kind in _kinds:
        # "F" means "use column-major order"
        vec = _kinds[o.dtype.kind](o.ravel('F'))
        dim = ro.vectors.IntVector(o.shape)
        #TODO: no dimnames ?
        #TODO: optimize what is below needed/possible ?
        #  (other ways to create R arrays ?)
        res = rinterface.baseenv['array'](vec, dim=dim)
    # R does not support unsigned types:
    elif o.dtype.kind == 'u':
        raise(ValueError('Cannot convert numpy array of unsigned values '
                         '-- R does not have unsigned integers.'))
    # Array-of-PyObject is treated like a Python list:
    elif o.dtype.kind == 'O':
        res = numpy_O_py2rpy(o)
    # Record arrays map onto R data frames:
    elif o.dtype.kind == 'V':
        if o.dtype.names is None:
            raise(ValueError('Nothing can be done for this numpy array '
                             'type "%s" at the moment.' % (o.dtype,)))
        df_args = []
        for field_name in o.dtype.names:
            df_args.append((field_name,
                            conversion.py2rpy(o[field_name])))
        res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv)
    # It should be impossible to get here:
    else:
        raise(ValueError('Unknown numpy array type "%s".' % str(o.dtype)))
    return res
Пример #18
0
 def new(cls, data):
     """ Constructor for the class GGplot. """
     data = conversion.py2rpy(data)
     res = cls(cls._constructor(data))
     return res
Пример #19
0
 def new(cls, data, mapping=_AES_RLANG, **kwargs):
     """ Constructor for the class GGplot. """
     data = conversion.py2rpy(data)
     res = cls(cls._constructor(data, mapping=mapping, **kwargs))
     return res
Пример #20
0
 def validobject(self, test = False, complete = False):
     """ Return whether the instance is 'valid' for its class. """
     test = conversion.py2rpy(test)
     complete = conversion.py2rpy(complete)
     return methods_env['validObject'](self, test = test,
                                       complete = complete)[0]
Пример #21
0
 def isclass(name):
     """ Return whether the given name is a defined class. """
     name = conversion.py2rpy(name)
     return methods_env['isClass'](name)[0]
Пример #22
0
def py2rpy_pandasseries(obj):
    if obj.dtype.name == 'O':
        warnings.warn('Element "%s" is of dtype "O" and converted '
                      'to R vector of strings.' % obj.name)
        res = StrVector(obj)
    elif obj.dtype.name == 'category':
        res = py2rpy_categoryseries(obj)
        res = FactorVector(res)
    elif is_datetime64_any_dtype(obj.dtype):
        # time series
        tzname = obj.dt.tz.zone if obj.dt.tz else ''
        d = [IntVector([x.year for x in obj]),
             IntVector([x.month for x in obj]),
             IntVector([x.day for x in obj]),
             IntVector([x.hour for x in obj]),
             IntVector([x.minute for x in obj]),
             FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])]
        res = ISOdatetime(*d, tz=StrSexpVector([tzname]))
        # TODO: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    elif obj.dtype.type == str:
        res = _PANDASTYPE2RPY2[str](obj)
    elif obj.dtype.name in integer_array_types:
        res = _PANDASTYPE2RPY2[int](obj)
        if len(obj.shape) == 1:
            if obj.dtype != dt_O_type:
                # force into an R vector
                res = as_vector(res)
    elif (obj.dtype == dt_O_type):
        homogeneous_type = None
        for x in obj.values:
            if x is None:
                continue
            if homogeneous_type is None:
                homogeneous_type = type(x)
                continue
            if ((type(x) is not homogeneous_type)
                and not
                ((isinstance(x, float) and math.isnan(x))
                 or pandas.isna(x))):
                raise ValueError(
                    'Series can only be of one type, or None '
                    '(and here we have %s and %s). If happening with '
                    'a pandas DataFrame the method infer_objects() '
                    'will normalize data types before conversion.' %
                    (homogeneous_type, type(x)))
        # TODO: Could this be merged with obj.type.name == 'O' case above ?
        res = _PANDASTYPE2RPY2[homogeneous_type](obj)
    else:
        # converted as a numpy array
        func = numpy2ri.converter.py2rpy.registry[numpy.ndarray]
        # current conversion as performed by numpy

        res = func(obj.values)
        if len(obj.shape) == 1:
            if (obj.dtype != dt_O_type):
                # force into an R vector
                res = as_vector(res)

    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames',
                           SexpVector(conversion.py2rpy(obj.index)))
    return res
Пример #23
0
from rpy2.robjects import conversion, pandas2ri, default_converter
from rpy2.robjects.conversion import localconverter
from rpy2.robjects.vectors import ListVector
from rpy2.robjects.methods import RS4

from . import conv_name
from .conv import converter, mat_converter, full_converter
from .rpy2_ext import importr


class NotConvertedWarning(Warning):
    pass


dict_converter = conversion.Converter("Converter handling dicts")
dict_converter.py2rpy.register(np.bool_, lambda x: conversion.py2rpy(bool(x)))
dict_converter.py2rpy.register(np.int_, lambda x: conversion.py2rpy(int(x)))
dict_converter.py2rpy.register(np.float_, lambda x: conversion.py2rpy(float(x)))
dict_converter.py2rpy.register(np.bytes_, lambda x: conversion.py2rpy(bytes(x)))
dict_converter.py2rpy.register(np.str_, lambda x: conversion.py2rpy(str(x)))


@dict_converter.py2rpy.register(Mapping)
def py2rpy_dict(obj: Mapping) -> ListVector:
    """Try converting everything. For nested dicts, this needs itself to be registered"""
    converted = {}
    for k, v in obj.items():
        try:
            converted[str(k)] = conversion.py2rpy(v)
        except NotImplementedError as e:
            warn(str(e), NotConvertedWarning)
Пример #24
0
 def test_bad_array(self):
     u = numpy.array([1, 2, 3], dtype=numpy.uint32)
     with pytest.raises(ValueError):
         conversion.py2rpy(u)
Пример #25
0
 def __setitem__(self, key, value):
     rpy2_value = conversion.py2rpy(value)
     self._robj.do_slot_assign(key, rpy2_value)
Пример #26
0
 def test_scalar_int(self, constructor):
     np_value = constructor(100)
     r_vec = conversion.py2rpy(np_value)
     r_scalar = numpy.array(r_vec)[0]
     assert np_value == r_scalar
Пример #27
0
 def test_scalar_f128(self):
     f128 = numpy.float128(100.000000003)
     f128_r = conversion.py2rpy(f128)
     f128_test = numpy.array(f128_r)[0]
     assert f128 == f128_test