def __init__(self, count_matrix, design_matrix, design_formula, gene_column): try: assert gene_column in count_matrix.columns, 'Wrong gene id column name' except AttributeError: raise Exception('Wrong Pandas dataframe?') import rpy2 from rpy2.robjects import pandas2ri, Formula, conversion pandas2ri.activate() self.dds = None self.deseq_result = None self.comparison = None self.normalized_count_matrix = None self.gene_column = gene_column self.gene_id = count_matrix[self.gene_column] count_matrix = count_matrix.drop(gene_column, axis=1) logging.info( f'Number of columns in counts data {count_matrix.shape[1]} | ' f'Number of rows in design matrix {design_matrix.shape[0]}' ) # Load dataframe into R environment # Important: Change to r.data() if you use numpys and rpy2 latest versions self.count_matrix = conversion.py2rpy(count_matrix) # Assign columns to NULL self.count_matrix.names = rpy2.rinterface.NULL self.count_matrix = count_matrix self.design_matrix = conversion.py2rpy(design_matrix) self.design_formula = Formula(design_formula)
def __init__(self, *args, **kwargs): od = OrdDict() for item in args: od[None] = conversion.py2rpy(item) for k, v in kwargs.items(): od[k] = conversion.py2rpy(v) res = self._constructor.rcall(tuple(od.items()), robjects.globalenv) super().__init__(res.__sexp__)
def test_scalar(self): i32 = numpy.int32(100) i32_r = conversion.py2rpy(i32) i32_test = numpy.array(i32_r)[0] assert i32 == i32_test i64 = numpy.int64(100) i64_r = conversion.py2rpy(i64) i64_test = numpy.array(i64_r)[0] assert i64 == i64_test
def __call__(self, *args, **kwargs): new_args = [conversion.py2rpy(a) for a in args] new_kwargs = {} for k, v in kwargs.items(): # TODO: shouldn't this be handled by the conversion itself ? if isinstance(v, rinterface.Sexp): new_kwargs[k] = v else: new_kwargs[k] = conversion.py2rpy(v) res = super(Function, self).__call__(*new_args, **new_kwargs) res = conversion.rpy2py(res) return res
def test_array(self): i2d = numpy.array([[1, 2, 3], [4, 5, 6]], dtype='i') i2d_r = conversion.py2rpy(i2d) assert r['storage.mode'](i2d_r)[0] == 'integer' assert tuple(r['dim'](i2d_r)) == (2, 3) # Make sure we got the row/column swap right: assert r['['](i2d_r, 1, 2)[0] == i2d[0, 1] f3d = numpy.arange(24, dtype='f').reshape((2, 3, 4)) f3d_r = conversion.py2rpy(f3d) assert r['storage.mode'](f3d_r)[0] == 'double' assert tuple(r['dim'](f3d_r)) == (2, 3, 4)
def check_homogeneous(self, obj, mode, storage_mode): converted = conversion.py2rpy(obj) assert r["mode"](converted)[0] == mode assert r["storage.mode"](converted)[0] == storage_mode assert list(obj) == list(converted) assert r["is.array"](converted)[0] is True return converted
def test_vector_bytes(self): l = [b'a', b'b', b'c'] s = numpy.array(l, dtype='|S1') converted = conversion.py2rpy(s) assert r["mode"](converted)[0] == 'raw' assert r["storage.mode"](converted)[0] == 'raw' assert bytearray(b''.join(l)) == bytearray(converted)
def numpy2rpy(o): """ Augmented conversion function, converting numpy arrays into rpy2.rinterface-level R structures. """ if not o.dtype.isnative: raise ValueError('Cannot pass numpy arrays with non-native ' 'byte orders at the moment.') # Most types map onto R arrays: if o.dtype.kind in _kinds: res = _numpyarray_to_r(o, _kinds[o.dtype.kind]) # R does not support unsigned types: elif o.dtype.kind == 'u': res = unsignednumpyint_to_rint(o) # Array-of-PyObject is treated like a Python list: elif o.dtype.kind == 'O': res = numpy_O_py2rpy(o) # Record arrays map onto R data frames: elif o.dtype.kind == 'V': if o.dtype.names is None: raise ValueError('Nothing can be done for this numpy array ' 'type "%s" at the moment.' % (o.dtype,)) df_args = [] for field_name in o.dtype.names: df_args.append((field_name, conversion.py2rpy(o[field_name]))) res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv) # It should be impossible to get here: else: raise ValueError('Unknown numpy array type "%s".' % str(o.dtype)) return res
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj]) ] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if type(x) is not homogeneous_type: raise ValueError('Series can only be of one type, or None.') # TODO: Could this be merged with obj.type.name == 'O' case above ? res = { int: IntVector, bool: BoolVector, None: BoolVector, str: StrVector, bytes: numpy2ri.converter.py2rpy.registry[numpy.ndarray] }[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def test_object_array(self): o = numpy.array([1, "a", 3.2], dtype=numpy.object_) o_r = conversion.py2rpy(o) assert r['mode'](o_r)[0] == 'list' assert r['[['](o_r, 1)[0] == 1 assert r['[['](o_r, 2)[0] == 'a' assert r['[['](o_r, 3)[0] == 3.2
def numpy_O_py2rpy(o): if all(isinstance(x, str) for x in o): res = StrSexpVector(o) elif all(isinstance(x, bytes) for x in o): res = ByteSexpVector(o) else: res = conversion.py2rpy(list(o)) return res
def py2rpy_dict(obj: Mapping) -> ListVector: """Try converting everything. For nested dicts, this needs itself to be registered""" converted = {} for k, v in obj.items(): try: converted[str(k)] = conversion.py2rpy(v) except NotImplementedError as e: warn(str(e), NotConvertedWarning) # This tries to convert everything again. This works because py2rpy(Sexp) is the identity function return ListVector(converted)
def test_record_array(self): rec = numpy.array([(1, 2.3), (2, -0.7), (3, 12.1)], dtype=[("count", "i"), ("value", numpy.double)]) rec_r = conversion.py2rpy(rec) assert r["is.data.frame"](rec_r)[0] is True assert tuple(r["names"](rec_r)) == ("count", "value") count_r = rec_r[rec_r.names.index('count')] value_r = rec_r[rec_r.names.index('value')] assert r["storage.mode"](count_r)[0] == 'integer' assert r["storage.mode"](value_r)[0] == 'double' assert count_r[1] == 2 assert value_r[2] == 12.1
def py2rpy_pandasdataframe(obj): od = OrderedDict() for name, values in obj.iteritems(): try: od[name] = conversion.py2rpy(values) except Exception as e: warnings.warn('Error while trying to convert ' 'the column "%s". Fall back to string conversion. ' 'The error is: %s' % (name, str(e))) od[name] = StrVector(values) return DataFrame(od)
def dict_to_named_list(dct): if (isinstance(dct, dict) or isinstance(dct, Parameter) or isinstance(dct, pd.core.series.Series)): dct = {key: val for key, val in dct.items()} # convert numbers, numpy arrays and pandas dataframes to builtin # types before conversion (see rpy2 #548) with conversion.localconverter(default_converter + pandas2ri.converter + numpy2ri.converter): for key, val in dct.items(): dct[key] = conversion.py2rpy(val) r_list = ListVector(dct) return r_list return dct
def py2rpy_pandasseries(obj): if numpy.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj])] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res=as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
def numpy2rpy(o): """ Augmented conversion function, converting numpy arrays into rpy2.rinterface-level R structures. """ if not o.dtype.isnative: raise(ValueError('Cannot pass numpy arrays with non-native ' 'byte orders at the moment.')) # Most types map onto R arrays: if o.dtype.kind in _kinds: # "F" means "use column-major order" vec = _kinds[o.dtype.kind](o.ravel('F')) dim = ro.vectors.IntVector(o.shape) #TODO: no dimnames ? #TODO: optimize what is below needed/possible ? # (other ways to create R arrays ?) res = rinterface.baseenv['array'](vec, dim=dim) # R does not support unsigned types: elif o.dtype.kind == 'u': raise(ValueError('Cannot convert numpy array of unsigned values ' '-- R does not have unsigned integers.')) # Array-of-PyObject is treated like a Python list: elif o.dtype.kind == 'O': res = numpy_O_py2rpy(o) # Record arrays map onto R data frames: elif o.dtype.kind == 'V': if o.dtype.names is None: raise(ValueError('Nothing can be done for this numpy array ' 'type "%s" at the moment.' % (o.dtype,))) df_args = [] for field_name in o.dtype.names: df_args.append((field_name, conversion.py2rpy(o[field_name]))) res = ro.baseenv["data.frame"].rcall(tuple(df_args), ro.globalenv) # It should be impossible to get here: else: raise(ValueError('Unknown numpy array type "%s".' % str(o.dtype))) return res
def new(cls, data): """ Constructor for the class GGplot. """ data = conversion.py2rpy(data) res = cls(cls._constructor(data)) return res
def new(cls, data, mapping=_AES_RLANG, **kwargs): """ Constructor for the class GGplot. """ data = conversion.py2rpy(data) res = cls(cls._constructor(data, mapping=mapping, **kwargs)) return res
def validobject(self, test = False, complete = False): """ Return whether the instance is 'valid' for its class. """ test = conversion.py2rpy(test) complete = conversion.py2rpy(complete) return methods_env['validObject'](self, test = test, complete = complete)[0]
def isclass(name): """ Return whether the given name is a defined class. """ name = conversion.py2rpy(name) return methods_env['isClass'](name)[0]
def py2rpy_pandasseries(obj): if obj.dtype.name == 'O': warnings.warn('Element "%s" is of dtype "O" and converted ' 'to R vector of strings.' % obj.name) res = StrVector(obj) elif obj.dtype.name == 'category': res = py2rpy_categoryseries(obj) res = FactorVector(res) elif is_datetime64_any_dtype(obj.dtype): # time series tzname = obj.dt.tz.zone if obj.dt.tz else '' d = [IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), FloatSexpVector([x.second + x.microsecond * 1e-6 for x in obj])] res = ISOdatetime(*d, tz=StrSexpVector([tzname])) # TODO: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) elif obj.dtype.type == str: res = _PANDASTYPE2RPY2[str](obj) elif obj.dtype.name in integer_array_types: res = _PANDASTYPE2RPY2[int](obj) if len(obj.shape) == 1: if obj.dtype != dt_O_type: # force into an R vector res = as_vector(res) elif (obj.dtype == dt_O_type): homogeneous_type = None for x in obj.values: if x is None: continue if homogeneous_type is None: homogeneous_type = type(x) continue if ((type(x) is not homogeneous_type) and not ((isinstance(x, float) and math.isnan(x)) or pandas.isna(x))): raise ValueError( 'Series can only be of one type, or None ' '(and here we have %s and %s). If happening with ' 'a pandas DataFrame the method infer_objects() ' 'will normalize data types before conversion.' % (homogeneous_type, type(x))) # TODO: Could this be merged with obj.type.name == 'O' case above ? res = _PANDASTYPE2RPY2[homogeneous_type](obj) else: # converted as a numpy array func = numpy2ri.converter.py2rpy.registry[numpy.ndarray] # current conversion as performed by numpy res = func(obj.values) if len(obj.shape) == 1: if (obj.dtype != dt_O_type): # force into an R vector res = as_vector(res) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index))) else: res.do_slot_assign('dimnames', SexpVector(conversion.py2rpy(obj.index))) return res
from rpy2.robjects import conversion, pandas2ri, default_converter from rpy2.robjects.conversion import localconverter from rpy2.robjects.vectors import ListVector from rpy2.robjects.methods import RS4 from . import conv_name from .conv import converter, mat_converter, full_converter from .rpy2_ext import importr class NotConvertedWarning(Warning): pass dict_converter = conversion.Converter("Converter handling dicts") dict_converter.py2rpy.register(np.bool_, lambda x: conversion.py2rpy(bool(x))) dict_converter.py2rpy.register(np.int_, lambda x: conversion.py2rpy(int(x))) dict_converter.py2rpy.register(np.float_, lambda x: conversion.py2rpy(float(x))) dict_converter.py2rpy.register(np.bytes_, lambda x: conversion.py2rpy(bytes(x))) dict_converter.py2rpy.register(np.str_, lambda x: conversion.py2rpy(str(x))) @dict_converter.py2rpy.register(Mapping) def py2rpy_dict(obj: Mapping) -> ListVector: """Try converting everything. For nested dicts, this needs itself to be registered""" converted = {} for k, v in obj.items(): try: converted[str(k)] = conversion.py2rpy(v) except NotImplementedError as e: warn(str(e), NotConvertedWarning)
def test_bad_array(self): u = numpy.array([1, 2, 3], dtype=numpy.uint32) with pytest.raises(ValueError): conversion.py2rpy(u)
def __setitem__(self, key, value): rpy2_value = conversion.py2rpy(value) self._robj.do_slot_assign(key, rpy2_value)
def test_scalar_int(self, constructor): np_value = constructor(100) r_vec = conversion.py2rpy(np_value) r_scalar = numpy.array(r_vec)[0] assert np_value == r_scalar
def test_scalar_f128(self): f128 = numpy.float128(100.000000003) f128_r = conversion.py2rpy(f128) f128_test = numpy.array(f128_r)[0] assert f128 == f128_test