Пример #1
0
def R_rdc(X, y):
    numpy2ri.activate()
    rstring = """
        library(foreach)
        library(doParallel)

        rdc <- function(x,y,k=20,s=1/6,f=sin) {
            x <- cbind(apply(as.matrix(x),2,function(u)rank(u)/length(u)),1)
            y <- cbind(apply(as.matrix(y),2,function(u)rank(u)/length(u)),1)
            x <- s/ncol(x)*x%*%matrix(rnorm(ncol(x)*k),ncol(x))
            y <- s/ncol(y)*y%*%matrix(rnorm(ncol(y)*k),ncol(y))
            tryCatch(cancor(cbind(f(x),1),cbind(f(y),1))$cor[1], error = function(e){0})
        }

        rdcs_for_all <- function(X, y) {
            cl<-makeCluster(40)
            clusterExport(cl, c('rdc'), envir=environment())
            registerDoParallel(cl)
            res = list()
	    res <- foreach (c_=c(1:ncol(X))) %dopar% {
	        rdc(y, X[,c_])
	    }
            stopCluster(cl)
	    return(res)
        }
    """ 
    
    rfunc=robjects.r(rstring)
    res = rfunc(X, y)
    return np.array([x[0] for x in res])
Пример #2
0
def make_plot(pre, post):
    """
    Plot the pre return values and post return values
    """
    data = make_returns_data(pre, post)
    numpy2ri.activate()
    column = robjects.r["c"]
    sequence = robjects.r["seq"]
    robjects.r["plot"](
        x=2, 
        y=2, 
        xlim=column(0, len(data["pre"]) - 1), 
        ylim=column(data["min"] - .1, data["max"] + .1),
        xlab="Event number", 
        ylab="Returns"
    )
    robjects.r["points"](
        x=sequence(0, len(data["pre"]) - 1), y=data["pre"], col="red", pch=19
    )
    robjects.r["points"](
        x=sequence(0, len(data["pre"]) - 1), y=data["post"], col="blue", pch=19
    )
    robjects.r["abline"](h=0)
    signal.signal(signal.SIGINT, lambda a, b: sys.exit(0))
    signal.pause()
Пример #3
0
    def extractTableFactor(self, tableFactor):
        formulaModel = []
        formulaErrorTerm = []

        numpy2ri.activate()
        for t in self.tableFactor:
            factorName = t[0]
            factorType = t[1]
            factorData = t[2]
            # sending Data to global variable in R (Factor definition for
            # Subject, Within or Between Type and FloatVector for Covariate
            if factorType == 'Covariate':
                tmp = robjects.FloatVector(factorData)
                robjects.globalenv[factorName] = tmp
            else:
                tmp = robjects.r.factor(factorData)
                robjects.globalenv[factorName] = tmp
            # Creating Fromula for R - different treatement for within and
            # between subject Factor
            if factorType == 'Subject':
                subjectName = factorName
                self.FactorSubject = factorData
            elif factorType == 'Within':
                formulaModel.append(factorName)
                formulaErrorTerm.append(factorName)
            else:
                formulaModel.append(factorName)

        return formulaModel, formulaErrorTerm, subjectName
Пример #4
0
def activate():
    global original_converter
    # If module is already activated, there is nothing to do
    if original_converter is not None: 
        return

    original_converter = conversion.make_converter('snapshot before pandas conversion',
                                                   template=conversion.converter)
    numpy2ri.activate()
    new_converter = conversion.make_converter('snapshot before pandas conversion',
                                              template=conversion.converter)
    numpy2ri.deactivate()

    for k,v in py2ri.registry.items():
        if k is object:
            continue
        new_converter.py2ri.register(k, v)

    for k,v in ri2ro.registry.items():
        if k is object:
            continue
        new_converter.ri2ro.register(k, v)
    
    for k,v in py2ro.registry.items():
        if k is object:
            continue
        new_converter.py2ro.register(k, v)

    for k,v in ri2py.registry.items():
        if k is object:
            continue
        new_converter.ri2py.register(k, v)

    conversion.converter = new_converter
    name, conversion.ri2ro, conversion.py2ri, conversion.py2ro, conversion.ri2py, lineage = new_converter
 def __init__(self, data, nclusters=2, eigengap=False):
     ro.r("source('Rcode/kNNutils.R')")
     numpy2ri.activate()
     self.data = data
     self.eigengap = eigengap
     self.dataDim = data.shape[1]
     self.dataSize = data.shape[0]
     self.nclusters = nclusters
     self.kNN = ro.r['getKNearestNeighbors']
Пример #6
0
    def OnHeatmap(self,event):
        number = self.notebook.GetListTabId().index(self.notebook.GetCurrentTabId())
        heatmapDlg = DialogHeatmap(title=u"Input for heatmap")
        pars = heatmapDlg.GetValue()
        
        print(pars)
        
        matStart = pars[0]-1     
        data = self.data[number]
        numData = numpy.array(data)[1:,matStart: ].astype(float)
        
        # get numpy data column items
        items = numpy.array(data)[0,matStart: ].astype(str)          
                   
        import rpy2.robjects as robjects
        from rpy2.robjects.packages import importr
        base = importr("base")
        from rpy2.robjects import numpy2ri        
        numpy2ri.activate()   # transfer the numpy array to matrix in R 
        
        # transfer numpy data to r matrix
        numDataR = transposeNumpyMat2R(numData)             
        numDataR.rownames = robjects.StrVector(items) # the numData column now is the row names of R matrix, heatmap3 use this format        
        
        # get column side annotation colors
        # get color list for legend
        annoCols = [ x-1 for x in pars[1]]         
        #annoColDicList =[]
        for n, annoCol in enumerate(annoCols):            
            anno = numpy.array(data)[1:, int(annoCol)]
            annoColDic = getCategoryColorDic (list(set(anno)), colsDic)
            cols = getMemberColor(anno, annoColDic)
            if (n==0):
                annoColor1 = robjects.StrVector(cols)  
                annoColDicList =  [annoColDic]    
                ColSideColors = base.cbind(annoColor1) # should use matrix in R instead of dataframe
                print     annoColDicList               
            if (n==1):
                annoColor2 = robjects.StrVector(cols)
                ColSideColors = base.cbind(annoColor1 , annoColor2)
                annoColDicList = annoColDicList + [annoColDic]
                print annoColDicList
            if (n>=2):
                annoColorX = robjects.StrVector(cols)              
                ColSideColors = base.cbind(ColSideColors , annoColorX)
                annoColDicList = [annoColDicList, annoColDic]  # for legend
        print base.dim(ColSideColors)
        annoName = robjects.StrVector(numpy.array(data)[0, annoCols])
        ColSideColors.colnames = annoName

        outputDlg = OutputDialog()
        outPath = outputDlg.GetPath()
        print outPath        
        fileName = outPath + "/heatmap.pdf"          
       
        heatmap3py(numDataR, ColSideColors, annoColDicList, fileName=fileName, outPath=outPath)
        heatmap3py(numDataR, ColSideColors, annoColDicList)
Пример #7
0
 def testActivateTwice(self):
     # setUp method has already activated numpy converter
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.activate()
     self.assertEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
     rpyn.deactivate()
     self.assertNotEqual(rpyn.numpy2ri, robjects.conversion.py2ri)
Пример #8
0
 def testActivate(self):
     rpyn.deactivate()
     #FIXME: is the following still making sense ?
     self.assertNotEqual(rpyn.py2ri, conversion.py2ri)
     l = len(conversion.py2ri.registry)
     k = set(conversion.py2ri.registry.keys())
     rpyn.activate()
     self.assertTrue(len(conversion.py2ri.registry) > l)
     rpyn.deactivate()
     self.assertEqual(l, len(conversion.py2ri.registry))
     self.assertEqual(k, set(conversion.py2ri.registry.keys()))
Пример #9
0
def load_ipython_extension(ip):
    """Load the extension in IPython."""
       
    if pandas2ri:
        pandas2ri.activate()
    else:
        numpy2ri.activate()

    ip.register_magics(RMagics)
    # Initialising rpy2 interferes with readline. Since, at this point, we've
    # probably just loaded rpy2, we reset the delimiters. See issue gh-2759.
    if ip.has_readline:
        ip.readline.set_completer_delims(ip.readline_delims)
Пример #10
0
def wilcoxon_signed_rank_test(x, conflevel=0.95):
    """Wilcoxon signed rank test, with confidence interval. Requires rpy2."""
    from rpy2.robjects import numpy2ri
    from rpy2.robjects.packages import importr
    numpy2ri.activate()
    r_stats = importr('stats')
    d = {'conf.int': True, 'conf.level': conflevel}
    r = r_stats.wilcox_test(np.array(x), **d)
    r = dict(
        statistic=np.asscalar(np.asarray(r.rx('statistic'))),
        pvalue=np.asscalar(np.asarray(r.rx('p.value'))),
        confint=[np.asscalar(x) for x in np.asarray(r.rx('conf.int')).flat],
        estimate=np.asscalar(np.asarray(r.rx('estimate'))),
        )
    return r
Пример #11
0
def activate():
    global original_py2ri, original_ri2ro

    # If module is already activated, there is nothing to do
    if original_py2ri: 
        return

    #FIXME: shouldn't the use of numpy conversion be made
    #       explicit in the pandas conversion ?
    #       (and this remove the need to activate it ?)
    numpy2ri.activate()
    original_py2ri = conversion.py2ri
    original_ri2ro = conversion.ri2ro
    conversion.py2ri = pandas2ri
    conversion.ri2ro = ri2pandas 
Пример #12
0
def _init_r():
    """Private function to initialise R, only executed when needed.

    """

    global _r_initialised
    global rpy2
    global ro
    global r

    if not _r_initialised:
        import rpy2  # noqa
        import rpy2.robjects as ro  # noqa
        from rpy2.robjects import r  # noqa
        import rpy2.robjects.numpy2ri as numpy2ri
        numpy2ri.activate()
        _r_initialised = True
Пример #13
0
 def OnPCA(self, event):   # this function rely on vegan package
     number = self.notebook.GetCurrentDataId()
     pcaDlg = DialogPCA(title=u"Input for PCA")   
     startColNum, groupColNum = pcaDlg.GetValue()
     import rpy2.robjects as robjects
     from rpy2.robjects.packages import importr
     base = importr("base")
     vegan = importr("vegan")
     graphics = importr("graphics")
     stats = importr("stats") 
            
     data = self.notebook.data[number]
     
     # need to transpose if the sample is arranged along with column
     # python array is arranged by row
     # PCA is calculated with row
     numData = numpy.array(data)[1:,startColNum: ].astype(float)
     grp = numpy.array(data)[1:, groupColNum]
     # colsDic = {1:"red", 2:"orange", 3:"blue", 4:"forestgreen"}
     groupColDic = getCategoryColorDic (list(set(grp)), colsDic)
     cols = getMemberColor(grp, groupColDic)        
     # col = [colsDic[x] for x in grp]
     # print col
     from rpy2.robjects import numpy2ri
     numpy2ri.activate()   # transfer the numpy array to matrix in R
     pca = vegan.rda(numData, scale = True)       
     
     col4R = robjects.StrVector(cols) # for vector, need to transfer explicitly, numpy2ri didn't work
     scl = 1 ## scaling
     
     graphics.plot(pca, display = "sites", scaling = scl , type = "n")
     # stats.biplot(pca, main = "biplot")
     graphics.points(pca, display = "sites", scaling = scl, col = col4R, pch = 16)  # color map to the group
     # lev = base.levels(grp)
     lev = list(set(grp))
     # print lev
     # please note the order of group set from python and R is different. next time just try to use one method
     for i in range(len(lev)):
         ## draw ellipse per group
         vegan.ordiellipse(pca, display = "sites", kind = "se", scaling = scl, groups =  robjects.StrVector(grp), col = groupColDic[lev[i]], show_groups = lev[i])
     ## centroids
     scrs = base.as_data_frame(vegan.scores(pca, display = "sites", scaling = scl, choices = robjects.IntVector([1,2])))
     cent = base.do_call(base.rbind, base.lapply(base.split(scrs, robjects.StrVector(grp)), base.colMeans))  # split map scores to group
     centRowname = base.row_names(cent)
     centCols = [groupColDic[x] for x in centRowname]
     graphics.points(cent, col = robjects.StrVector(centCols), pch = 3, cex = 1.1)
Пример #14
0
 def run(self, x, init=None, kRange = None):
     self.shape = x.shape
     numpy2ri.activate()
     r = robjects.r
     r.options(warn=-1)
     r.library('mclust')
     
     if kRange is None:
         kRane = arange(1,10)
     
     if init is None:
         mcr = r.Mclust(x)
     else:
         subset = random.sample(np.arange(self.shape[0]),init)
         subsetR = r['list'](subset=subset)
         mcr = r.Mclust(x,initialization=subsetR)
     self.mclustRes = dict([(i[0],i[1]) for i in mcr.iteritems()])
     return self
Пример #15
0
def init_rpy():
    global _rpy_initialized
    if _rpy_initialized:
        return
    _rpy_initialized = True

    from rpy2 import robjects
    from rpy2.robjects import numpy2ri
    import os
    path = os.path.dirname(__file__)

    robjects.r("options(warn=-1)")

    with open(path + "/rdc.R", "r") as rfile:
        code = ''.join(rfile.readlines())
        robjects.r(code)

    numpy2ri.activate()
Пример #16
0
def cqn(matrix, gc_content, lengths):
    """
    Conditional quantile normalization (CQN) with the ``cqn`` R library.
    It uses GC content and length of regulatory elements as covariates.

    Requires the R package "cqn" to be installed:

    .. highlight:: R
    .. code-block:: R

        if (!requireNamespace("BiocManager", quietly = TRUE))
            install.packages("BiocManager")
        BiocManager::install("cqn")

    Parameters
    ----------
    matrix : :class:`pandas.DataFrame`
        DataFrame to normalize.
    gc_content : :class:`pandas.Series`
        Series with GC content of each feature in ``matrix``.
    lengths : :class:`pandas.Series`
        Series with length of each feature in ``matrix``.

    Returns
    ----------
    :class:`pandas.DataFrame`
        Normalized DataFrame
    """
    from rpy2.robjects import numpy2ri, pandas2ri, r
    from rpy2.robjects.packages import importr

    numpy2ri.activate()
    pandas2ri.activate()

    importr("cqn")

    cqn_out = r.cqn(matrix, x=gc_content, lengths=lengths)

    y_r = cqn_out[list(cqn_out.names).index("y")]
    y = pd.DataFrame(np.array(y_r), index=matrix.index, columns=matrix.columns)
    offset_r = cqn_out[list(cqn_out.names).index("offset")]
    offset = pd.DataFrame(np.array(offset_r), index=matrix.index, columns=matrix.columns)

    return y + offset
Пример #17
0
    def __init__(self, *args, **kwargs):

        # Task specific arguments.
        self.snp_set = kwargs.pop("snp_set_file", None)
        if self.snp_set:
            filename = self.snp_set
            self.snp_set = self._parse_snp_set(self.snp_set)

            m = ("Using SNP sets from '{}'. Found a total of {} variants in {}"
                 " different SNP sets.")
            m = m.format(filename, self.snp_set.shape[0],
                         self.snp_set["set"].nunique())
            logger.info(m)

        self.skat_o = kwargs.pop("SKAT-O", False)
        if self.skat_o:
            logger.info("Using the SKAT-O test.")

        # Task initalization using the abstract implementation.
        super(SKATTest, self).__init__(*args, **kwargs)

        # Check installation.
        SKATTest.check_skat()

        # Import rpy2.
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()  # Support for numpy arrays.

        import rpy2.robjects
        self.robjects = rpy2.robjects
        self.r = rpy2.robjects.r

        from rpy2.robjects.packages import importr

        # Load the SKAT package.
        try:
            self.skat = importr("SKAT")
        except Exception:
            raise EnvironmentError(
                1,
                "SKAT needs to be installed in your R environment to use "
                "SKATTest."
            )
    def algorithm(X, y):

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''
y = as.matrix(y)
D = data.frame(X, y)
glm(y ~ ., family=binomial(link='probit'), data=D)
M = glm(y ~ ., family=binomial(link='probit'), data=D)
M0 = glm(y ~ 1, family=binomial(link='probit'), data=D)
Mselect = step(M, direction='both', scope=list(upper=M, lower=M0), trace=FALSE)
selected_vars = names(coef(Mselect))
''')
        selected_vars = ' + '.join(sorted(list(rpy.r('selected_vars'))))
        selected_vars = selected_vars.replace('(Intercept)', '1')
        numpy2ri.deactivate()

        return tuple(selected_vars.split(' + '))
Пример #19
0
def cqn(matrix, gc_content, lengths):
    from rpy2.robjects import numpy2ri, pandas2ri, r
    from rpy2.robjects.packages import importr

    numpy2ri.activate()
    pandas2ri.activate()

    importr("cqn")

    cqn_out = r.cqn(matrix, x=gc_content, lengths=lengths)

    y_r = cqn_out[list(cqn_out.names).index("y")]
    y = pd.DataFrame(np.array(y_r), index=matrix.index, columns=matrix.columns)
    offset_r = cqn_out[list(cqn_out.names).index("offset")]
    offset = pd.DataFrame(np.array(offset_r),
                          index=matrix.index,
                          columns=matrix.columns)

    return y + offset
Пример #20
0
def gaussian_setup(X, Y, run_CV=True):
    """

    Some calculations that can be reused by methods:
    
    lambda.min, lambda.1se, lambda.theory and Reid et al. estimate of noise

    """
    n, p = X.shape

    Xn = X / np.sqrt((X**2).sum(0))[None, :]

    numpy2ri.activate()
    rpy.r.assign('X', X)
    rpy.r.assign('Y', Y)
    rpy.r('X=as.matrix(X)')
    rpy.r('Y=as.numeric(Y)')
    rpy.r('sigma_ds=estimate_sigma_data_splitting(X,Y)')
    sigma_ds = rpy.r('sigma_ds')

    l_theory = np.fabs(Xn.T.dot(np.random.standard_normal(
        (n, 500)))).max(1).mean() * np.ones(p) * sigma_ds

    if run_CV:
        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('Y', Y)
        rpy.r('X=as.matrix(X)')
        rpy.r('Y=as.numeric(Y)')
        rpy.r('G = cv.glmnet(X, Y, intercept=FALSE, standardize=FALSE)')
        rpy.r(
            'sigma_reid = selectiveInference:::estimate_sigma(X, Y, coef(G, s="lambda.min")[-1]) # sigma via Reid et al.'
        )
        rpy.r("L = G[['lambda.min']]")
        rpy.r("L1 = G[['lambda.1se']]")
        L = rpy.r('L')
        L1 = rpy.r('L1')
        sigma_reid = rpy.r('sigma_reid')[0]
        numpy2ri.deactivate()
        return L * np.sqrt(X.shape[0]), L1 * np.sqrt(
            X.shape[0]), l_theory, sigma_reid
    else:
        return None, None, l_theory, None
Пример #21
0
    def python_to_r_object(cls, item):
        """ 把python对象转化为R对象,类方法

        :param item: Python对象,可以转换的类型有:list,tuple,pd.Series, np.ndarray, pd.DataFrame
        :return: R对象
        """
        numpy2ri.activate()
        if isinstance(item,(list, tuple, pd.Series)):
            return np.array(item)
        elif isinstance(item, pd.DataFrame):
            data_dict = {col_names: np.array(item[col_names]) for col_names in item.columns}
            rdataframe = DataFrame(data_dict)
            rdataframe.rownames = np.array(item.index)
            return rdataframe
        elif isinstance(item, (np.ndarray,bool,int,float,str)):
            return item
        else:
            print('Unsuported type: ',type(item))
            raise Exception
Пример #22
0
def convert_to_r_data(data):
    # Input is sumu.Data

    init_r()

    numpy2ri.activate()
    datar = r.matrix(data.all().flatten(),
                     nrow=data.N,
                     ncol=data.n,
                     byrow=True)
    numpy2ri.deactivate()

    discrete = data.discrete
    arities = True if data.arities is not False else False

    datar = r['datapath_or_matrix_to_numeric_dataframe'](datar,
                                                         discrete=discrete,
                                                         arities=arities)
    return datar
Пример #23
0
def zero_inflate(file):
    utils = importr("utils")
    numpy2ri.activate()
    nr, nc = file.shape
    Br = ro.r.matrix(file, nrow=nr, ncol=nc)
    ro.r.assign("tab", Br)
    zr = ro.r('''
    set.seed(1)
    dat <- tab
    fo <- as.matrix((dat != 0))
    mode(fo) <- "integer"
    m <- matrix(sample(0:1,nrow(dat)*ncol(dat), replace=TRUE, prob=c(1,3)),nrow(dat),ncol(dat))
    dat <- dat*m
    zr <- fo*(1-m)
    ''')
    zeros = ro.r('fo')
    data = ro.r('dat')
    numpy2ri.deactivate()
    return data, zr, zeros
Пример #24
0
def scran_normalize(adata):
    import numpy as np
    import rpy2.robjects as ro
    from rpy2.robjects import numpy2ri
    from rpy2.robjects.packages import importr
    importr('scran')
    numpy2ri.activate()
    ro.r.assign('mat', adata.X.T)
    qclust_params = 'mat'
    # qclust_params = f'mat, min.size={min_size}, max.size={max_size}'
    ro.reval(f'cl <- quickCluster({qclust_params})')
    csf_params = f'mat, clusters=cl'
    # csf_params = f'mat, clusters=cl, min.mean={min_mean}'
    sf = np.asarray(ro.reval(f'computeSumFactors({csf_params})'))
    adata.obs['sf'] = sf
    adata.layers['counts'] = adata.X.copy()
    adata.X /= adata.obs['sf'].values[:, None]
    numpy2ri.deactivate()
    return adata
Пример #25
0
def is_Ckmeans_installed():
    try:
        import rpy2
        import rpy2.robjects.numpy2ri as numpy2ri
        try:
            from importlib import reload
            reload(rpy2.robjects.numpy2ri)
        except:
            pass
        import rpy2.robjects as ro
        ro.conversion.py2ri = numpy2ri
        numpy2ri.activate()
        from rpy2.robjects.packages import importr
        importr('Ckmeans.1d.dp')
        median_seg_func = ro.r('Ckmedian.1d.dp')
        mean_seg_func = ro.r('Ckmeans.1d.dp')
    except:
        return False
    return True
Пример #26
0
def find_modules(data, sym=False):
    """
    Take an adjacency matrix, and return the modules that are found and the
    reordering of the matrix and the corresponding strings for vis.js.
    """
    # If it's a bipartite graph we need to use a different modularity finding
    # algorithm: http://rsos.royalsocietypublishing.org/content/3/1/140536
    if not sym:
        numpy2ri.activate()
        res = r_code.run_bivar_modules(np.abs(data.values))
        rix = np.array(res[0]) - 1
        cix = np.array(res[1]) - 1
        rl = np.array(res[2])
        cl = np.array(res[3])
        numpy2ri.deactivate()

    # Else we can use the highly popular: https://arxiv.org/abs/0803.0476
    else:
        graph = create_network(r=data, p=None, sym=True, modules=True)
        part = community.best_partition(graph)

        # not all indices make it into part. those that don't, will be assigned
        # to a dummy module with k=max(modules)+1.
        part_complete = []
        part_non_complete = []
        max_module_n = max(part.values())
        for i in data.index:
            if i in part:
                part_complete.append(part[i])
                part_non_complete.append(part[i])
            else:
                part_complete.append(max_module_n + 1)

        # reorder data matrix so modules end up next to each other
        cix = rix = np.argsort(part_complete)
        # we only want to keep the true module labels not the dummy ones
        part_non_complete = np.array(part_non_complete)
        cl = rl = part_non_complete[np.argsort(part_non_complete)]

    # transform the selected modules into strings that are understood by vis.js
    modules = get_mod_strings(rl, cl, sym)
    return list(rix), list(cix), modules
Пример #27
0
def map_loinc_system():
    if config.print_status == 'Y':
        print('Mapping LOINC System')
    if os.path.exists(config.out_dir + "LOINC_System_to_Long.csv"):
        system_map = pd.read_csv(config.out_dir + "LOINC_System_to_Long.csv",
                                 sep="|")
    else:
        numpy2ri.activate()
        stringdist = importr('stringdist', lib_loc=config.lib_loc)
        loinc_syst = parsed_loinc_fields[['System', 'LongName']]
        loinc_syst = loinc_syst[(~pd.isnull(loinc_syst.System))
                                & (loinc_syst.System != '')].reset_index(
                                    drop=True)
        loinc_syst.System = loinc_syst.System.str.split(" ")
        loinc_syst.LongName = loinc_syst.LongName.str.split(" ")
        system_tokens = pd.Series([y for x in loinc_syst.System
                                   for y in x]).unique()
        longname_tokens = pd.Series(
            [y for x in loinc_syst.LongName for y in x]).unique()
        system_df = pd.DataFrame(0,
                                 index=system_tokens,
                                 columns=longname_tokens)
        n_rows = loinc_syst.shape[0]
        for i in range(n_rows):
            for j, term in enumerate(loinc_syst.System[i]):
                dists = stringdist.stringdist(term,
                                              loinc_syst.LongName[i],
                                              method='jw',
                                              p=0)
                bestMatch = loinc_syst.LongName[i][np.argmin(dists)]
                system_df.loc[term,
                              bestMatch] = system_df.loc[term, bestMatch] + 1
        high_count = system_df.idxmax(axis=1).values
        system_map = pd.DataFrame({
            'SystemToken': system_tokens,
            'SystemMap': high_count
        })
        if config.write_file_loinc_parsed:
            system_map.to_csv(config.out_dir + "LOINC_System_to_Long.csv",
                              sep="|",
                              index=False)
    return system_map
Пример #28
0
def mtcorrect(p_value_dict, **kwargs):
    """ Apply MT correction.  This is a wrapper for R's p.adjust function.

    Arguments:
     p_value_dict: a dict with keys = probe names, and values of p-values

    kwargs:
     method: MT correction method, from R.  See mtcorrect_methods.  Default is 'none'
      
    Returns:
     adjusted_p
     
 
    """
    continue_flag = True

    method = test_kwarg('method', kwargs, mtcorrect_methods)
    
    try:
        from rpy2.robjects import r
        from rpy2 import robjects
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()
    except ImportError:
        print "ImportError: networkstatistics.mtcorrect() requires a functional rpy2 and R, exiting..."
        continue_flag = False

    if continue_flag:
        row_names = [x for x in p_value_dict.keys()]
        p_values_list = [p_value_dict[id] for id in row_names]
        # need to create an r object first
        p_values_list_r = robjects.FloatVector(p_values_list)
        # need to assign the r object into the r namespace
        r.assign('p_values_list_r', p_values_list_r)
        method_r = mtcorrect_py_2_r_names[method]
        r('corrected_data = p.adjust(p_values_list_r, method = ' + str(method_r) + ')')
        adjusted_p = robjects.numpy2ri.ri2numpy(r('corrected_data'))
        adjusted_p.tolist
        adjusted_p = {id: adjusted_p[i] for i, id in enumerate(row_names)}
        return adjusted_p
    else:
        return {}
Пример #29
0
    def differential_gene_expression(self, epoch):

        generated_raw, generated_labels = self.load_samples(epoch)
        true_raw, true_labels = self.input_data.get_raw_data()

        import rpy2.robjects as ro
        from rpy2.robjects.packages import importr
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()
        utils = importr('utils')
        utils.install_packages('ROTS', repos='http://cran.us.r-project.org')

        diff_expression = ro.r['source'](
            "libraries/differential_gene_expression.R")[0]

        result = diff_expression(true_raw, true_labels, generated_raw,
                                 generated_labels)
        numpy2ri.deactivate()

        return np.asarray(result)
Пример #30
0
def estimateFullCovMatrix_mvnmle(data):
    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr
    from rpy2.robjects import numpy2ri
    numpy2ri.activate()
    
    importr("mvnmle")
    r = robjects.r
    
    # r('data(apple)')
    robjects.globalenv["data"] = data
    # robjects.globalenv["data"] = r('apple')
    # print("my data = ")
    # print(robjects.globalenv["data"])
    
    covarianceMatrixEstimate = r('''mlest(data)$sigmahat''')
    
    # print("covarianceMatrixEstimate = ")
    # print(covarianceMatrixEstimate)
    return covarianceMatrixEstimate
Пример #31
0
def isotonic_unimodal_regression_R(x, y, normalize_data=True):
    """
    Perform unimodal isotonic regression via the Iso package in R
    """

    numpy2ri.activate()
    # n_instances = x.shape[0]
    # assert y.shape[0] == n_instances

    importr('Iso')
    z = robjects.r["ufit"](y, x=x, type='b')
    iso_x, iso_y = numpy.array(z.rx2('x')), numpy.array(z.rx2('y'))

    if normalize_data:
        auc = numpy.trapz(iso_y, iso_x)
        iso_y = iso_y / auc

    assert is_piecewice_linear_pdf(iso_x, iso_y), numpy.trapz(iso_y, iso_x)

    return iso_x, iso_y
Пример #32
0
    def select(self):
        active_set = self._method.generate_pvalues()[
            0]  # gives us selected variables at 1SE
        if len(active_set) > 0:
            numpy2ri.activate()
            rpy.r.assign("X", self.X[:, active_set])
            rpy.r.assign("Y", self.Y)
            rpy.r.assign("K", self.POSI_K)
            rpy.r('M = lm(Y ~ X - 1)')
            rpy.r('L = coef(M) - K * sqrt(diag(vcov(M)))')
            rpy.r('U = coef(M) + K * sqrt(diag(vcov(M)))')
            L = rpy.r('L')
            U = rpy.r('U')
            numpy2ri.deactivate()

            pre_select = np.nonzero((L > 0) + (U < 0))[0]
            selected = [active_set[i] for i in pre_select]
            return selected, active_set
        else:
            return [], []
Пример #33
0
def install_cl():
    """Load the `causalLearning` R package and activate necessary conversion

    :return: The robject for `causalLearning`
    """

    # robjects.r is a singleton
    robjects.r.options(download_file_method="curl")
    numpy2ri.activate()
    package_names = ["devtools"]
    utils = rpackages.importr("utils")
    utils.chooseCRANmirror(ind=0)

    names_to_install = [
        x for x in package_names if not rpackages.isinstalled(x)
    ]
    if len(names_to_install) > 0:
        utils.install_packages(StrVector(names_to_install))

    return importr("causalLearning")
Пример #34
0
    def fit(self, X, Y):
        numpy2ri.activate()
        rPMA = importr('PMA')
        typex, typez = _check_penalty_type(self.penalty)
        X, x_mean, x_std = _center_data(X)
        Y, y_mean, y_std = _center_data(Y)
        if self.n_component is None:
            self.n_component = np.min([X.shape[1], Y.shape[1]])
        out = rPMA.CCA(x=X, z=Y, K=self.n_component, \
                niter=self.n_iter, standardize=False, \
                typex=typex, typez=typez, \
                penaltyx=self.C[0], penaltyz=self.C[1], \
                trace=False)

        self.u = numpy2ri.ri2py(out[0])
        self.v = numpy2ri.ri2py(out[1])
        self._x_score, self._y_score = self.transform(X, Y)
        self._cancorr = _cancorr(X, Y, self.u, self.v)
        numpy2ri.deactivate()
        return self
Пример #35
0
    def generate_pvalues(self):
        try:
            numpy2ri.activate()
            rpy.r.assign('X', self.X)
            rpy.r.assign('y', self.Y)
            rpy.r.assign('sigma_reid', self.sigma_reid)
            rpy.r('y = as.numeric(y)')

            rpy.r.assign('lam', self.lagrange[0])
            rpy.r('''
        p = ncol(X);
        n = nrow(X);

        sigma_est = 1.
        if (p >= n) { 
            sigma_est = sigma_reid
        } else {
            sigma_est = sigma(lm(y ~ X - 1))
        }

        penalty_factor = rep(1, p);
        lam = lam / sqrt(n);  # lambdas are passed a sqrt(n) free from python code
        soln = selectiveInference:::solve_problem_glmnet(X, y, lam, penalty_factor=penalty_factor, loss="ls")
        PVS = selectiveInference:::inference_group_lasso(X, y, 
                                                         soln, groups=1:ncol(X), 
                                                         lambda=lam, penalty_factor=penalty_factor, 
                                                         sigma_est, loss="ls", algo="Q", 
                                                         construct_ci=FALSE)
        active_vars=PVS$active_vars - 1 # for 0-based
        pvalues = PVS$pvalues
        ''')

            pvalues = np.asarray(rpy.r('pvalues'))
            active_set = np.asarray(rpy.r('active_vars'))
            numpy2ri.deactivate()
            if len(active_set) > 0:
                return active_set, pvalues
            else:
                return [], []
        except:
            return [np.nan], [np.nan] # some R failure occurred 
Пример #36
0
    def __init__(self, *args, **kwargs):

        # Task specific arguments.
        self.snp_set = kwargs.pop("snp_set_file", None)
        if self.snp_set:
            filename = self.snp_set
            self.snp_set = self._parse_snp_set(self.snp_set)

            m = ("Using SNP sets from '{}'. Found a total of {} variants in {}"
                 " different SNP sets.")
            m = m.format(filename, self.snp_set.shape[0],
                         self.snp_set["set"].nunique())
            logger.info(m)

        self.skat_o = kwargs.pop("SKAT-O", False)
        if self.skat_o:
            logger.info("Using the SKAT-O test.")

        # Task initalization using the abstract implementation.
        super(SKATTest, self).__init__(*args, **kwargs)

        # Check installation.
        SKATTest.check_skat()

        # Import rpy2.
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()  # Support for numpy arrays.

        import rpy2.robjects
        self.robjects = rpy2.robjects
        self.r = rpy2.robjects.r

        from rpy2.robjects.packages import importr

        # Load the SKAT package.
        try:
            self.skat = importr("SKAT")
        except Exception:
            raise EnvironmentError(
                1, "SKAT needs to be installed in your R environment to use "
                "SKATTest.")
Пример #37
0
def calculatingAovR(tableFactor, Data, Formula):
    """Computes and fits an Analysis of Variance Model"""
    numpy2ri.activate()
    for t in tableFactor:
        factorName = t[0]
        factorType = t[1]
        factorData = t[2]
        # sending Data to global variable in R (Factor definition for
        # Subject, Within or Between Type and FloatVector for Covariate
        if factorType == 'Covariate':
            tmp = robjects.FloatVector(factorData)
            robjects.globalenv[factorName] = tmp
        else:
            tmp = robjects.r.factor(factorData)
            robjects.globalenv[factorName] = tmp

    DataR = robjects.Matrix(Data.T)
    robjects.globalenv["DataR"] = DataR
    TextR = 'aov(%s)' % Formula
    express = robjects.r.parse(text=TextR)
    Fit = robjects.r.eval(express)
    robjects.globalenv["Fit"] = Fit
    raw = robjects.r.summary(Fit)
    df = []
    for r in raw:
        for d in r[0][0][:-1]:
            df.append([int(d), int(r[0][0][-1])])
    pValue = np.hstack([np.array([c[4][:-1] for c in r]) for r in raw])
    FValue = np.hstack([np.array([c[3][:-1] for c in r]) for r in raw])
    terms = []
    if len(raw) == 1:
        for r in raw[0]:
            for t in r.rownames[0:-1]:
                terms.append(t.replace(' ', ''))
    else:
        for i in raw:
            for r in i:
                for t in r.rownames[0:-1]:
                    terms.append(t.replace(' ', ''))

    return pValue, FValue, terms, df
Пример #38
0
def calculatingAovR(tableFactor, Data, Formula):
    """Computes and fits an Analysis of Variance Model"""
    numpy2ri.activate()
    for t in tableFactor:
        factorName = t[0]
        factorType = t[1]
        factorData = t[2]
        # sending Data to global variable in R (Factor definition for
        # Subject, Within or Between Type and FloatVector for Covariate
        if factorType == 'Covariate':
            tmp = robjects.FloatVector(factorData)
            robjects.globalenv[factorName] = tmp
        else:
            tmp = robjects.r.factor(factorData)
            robjects.globalenv[factorName] = tmp

    DataR = robjects.Matrix(Data.T)
    robjects.globalenv["DataR"] = DataR
    TextR = 'aov(%s)' % Formula
    express = robjects.r.parse(text=TextR)
    Fit = robjects.r.eval(express)
    robjects.globalenv["Fit"] = Fit
    raw = robjects.r.summary(Fit)
    df=[]
    for r in raw:
        for d in r[0][0][:-1]:
            df.append([int(d),int(r[0][0][-1])])
    pValue = np.hstack([np.array([c[4][:-1] for c in r]) for r in raw])
    FValue = np.hstack([np.array([c[3][:-1] for c in r]) for r in raw])
    terms = []
    if len(raw) == 1:
        for r in raw[0]:
            for t in r.rownames[0:-1]:
                terms.append(t.replace(' ', ''))
    else:
        for i in raw:
            for r in i:
                for t in r.rownames[0:-1]:
                    terms.append(t.replace(' ', ''))

    return pValue, FValue, terms,df
Пример #39
0
def activate():
    """
    Activate conversion between sparse matrices from Scipy and R’s Matrix package.

    Does nothing if this is the active conversion.
    """
    global original_converter

    if original_converter is not None:
        return

    original_converter = conversion.converter

    numpy2ri.activate()
    new_converter = conversion.Converter("scipy conversion",
                                         template=conversion.converter)
    numpy2ri.deactivate()

    overlay_converter(converter, new_converter)

    conversion.set_conversion(new_converter)
Пример #40
0
    def select(self, X, topic_range):
        if len(topic_range) == 1:
            if topic_range[0] == 2:
                topic_range_ = np.array([2, 3])
            else:
                topic_range_ = np.array([2, topic_range[0]])
        else:
            topic_range_ = topic_range
        numpy2ri.activate()
        X_r = np.array(X)
        lda = maptpx.topics(X_r, K=np.array(topic_range_), verb=self.verbose)
        criteria = np.array(dollar(lda, "BF"))
        numpy2ri.deactivate()

        if len(topic_range) == 1:
            if topic_range[0] == 2:
                return np.array([criteria[0]])
            else:
                return np.array([criteria[1]])
        else:
            return np.array(criteria)
Пример #41
0
def calculate_preprocessing(spc_df):

    numpy2ri.activate()
    base = importr('base')
    utils = importr('utils')
    prospectr = importr('prospectr')

    # Ignore first 200 * 0.5 = 100 nm, pick every 20 * 0.5 = 10 nm
    subsample = list(range(200, spc_df.shape[1], 20))
    # This can be used only if the original spectral wavelengths are retained,
    # i.e. not for SG*-based spectra

    # SG0
    sg0 = np.array(prospectr.savitzkyGolay(spc_df.to_numpy(), m=0, w=101, p=3))

    # SG1
    sg1 = np.array(prospectr.savitzkyGolay(spc_df.to_numpy(), m=1, w=101, p=3))

    # Common for both SG filters because they use the same width
    sg_subsample = list(range(150, sg1.shape[1], 20))

    return {
        "Absorbances":
        spc_df.iloc[:, subsample].to_numpy(),
        "Absorbances-SG0-SNV":
        np.array(prospectr.standardNormalVariate(sg0))[:, sg_subsample],
        "Absorbances-SG1":
        sg1[:, sg_subsample],
        "Absorbances-SG1-SNV":
        np.array(prospectr.standardNormalVariate(sg1))[:, sg_subsample],
        "CR":
        np.array(prospectr.continuumRemoval(spc_df.to_numpy(),
                                            type="A"))[:, subsample],
        "Absorbances-SNV-DT":
        np.array(
            prospectr.detrend(
                spc_df.to_numpy(),
                wav=rpy2.robjects.FloatVector(
                    spc_df.columns.astype('float').to_numpy())))[:, subsample]
    }
def probit_MLE(X, y, formula_terms, truth=None, alpha=0.1):

    numpy2ri.activate()
    rpy.r.assign('X', X)
    rpy.r.assign('y', y)
    rpy.r('D = data.frame(X, y)')
    rpy.r('M = glm(y ~ %s, family=binomial(link="probit"), data=D)' %
          ' + '.join(formula_terms))
    beta_hat = rpy.r('coef(M)')
    target_cov = rpy.r('vcov(M)')

    if truth is None:
        truth = np.zeros_like(beta_hat)
    SE = np.sqrt(np.diag(target_cov))

    Z = (beta_hat - truth) / SE
    Z0 = beta_hat / SE

    pvalues = normal_dbn.cdf(Z0)
    pvalues = 2 * np.minimum(pvalues, 1 - pvalues)

    pivots = normal_dbn.cdf(Z)
    pivots = 2 * np.minimum(pivots, 1 - pivots)

    upper = beta_hat + normal_dbn.ppf(1 - 0.5 * alpha) * SE
    lower = beta_hat - normal_dbn.ppf(1 - 0.5 * alpha) * SE

    covered = (upper > truth) * (lower < truth)

    results_df = pd.DataFrame({
        'naive_pivot': pivots,
        'naive_pvalue': pvalues,
        'naive_coverage': covered,
        'naive_length': upper - lower,
        'naive_upper': upper,
        'naive_lower': lower,
        'variable': formula_terms,
    })

    return beta_hat, target_cov, results_df
Пример #43
0
    def select(self):

        numpy2ri.activate()
        rpy.r.assign('chol_k', self.knockoff_chol)
        rpy.r('''
        knockoffs = function(X) {
           mu = rep(0, ncol(X))
           mu_k = X # sweep(X, 2, mu, "-") %*% SigmaInv_s
           X_k = mu_k + matrix(rnorm(ncol(X) * nrow(X)), nrow(X)) %*% chol_k
           return(X_k)
        }
            ''')
        numpy2ri.deactivate()

        if True:
            numpy2ri.activate()
            rpy.r.assign('X', self.X)
            rpy.r.assign('Y', self.Y)
            rpy.r.assign('q', self.q)
            if self.forward_step:
                rpy.r(
                    'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs, stat=stat.forward_selection)$selected'
                )
            elif self.sqrt_lasso:
                rinterface.set_writeconsole_regular(null_print)
                rpy.r(
                    'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs, stat=stat.sqrt_lasso)$selected'
                )
                rinterface.set_writeconsole_regular(rinterface.consolePrint)
            else:
                rpy.r(
                    'V = knockoff.filter(X, Y, fdr=q, knockoffs=knockoffs)$selected'
                )
            rpy.r('if (length(V) > 0) {V = V-1}')
            V = rpy.r('V')
            numpy2ri.deactivate()
            return np.asarray(V, np.int), np.asarray(V, np.int)
        else:  # except:
            return [], []
Пример #44
0
def cv_glmnet_lam(X, Y, seed=0):
    """

    Some calculations that can be reused by methods:
    
    lambda.min, lambda.1se, lambda.theory and Reid et al. estimate of noise

    """
    numpy2ri.activate()
    rpy.r('set.seed(%d)' % seed)
    rpy.r.assign('X', X.copy())
    rpy.r.assign('Y', Y.copy())
    rpy.r('X=as.matrix(X)')
    rpy.r('Y=as.numeric(Y)')
    rpy.r('set.seed(1)')
    rpy.r('G = cv.glmnet(X, Y, intercept=FALSE, standardize=FALSE)')
    rpy.r("L = G[['lambda.min']]")
    rpy.r("L1 = G[['lambda.1se']]")
    L = rpy.r('L')
    L1 = rpy.r('L1')
    numpy2ri.deactivate()
    return float(1.00001 * L[0]), float(1.00001 * L1[0]),
Пример #45
0
def _get_tree_onepiece_R(points, n_nodes=25):
    """
    Get a tree from a set of points.

    Wrapping around ElPiGraph.R computeElasticPrincipalTree.
    """
    from rpy2.robjects.packages import importr
    from rpy2.robjects import numpy2ri
    elpi = importr("ElPiGraph.R")

    numpy2ri.activate()
    tmp = elpi.computeElasticPrincipalTree(X=points, NumNodes=n_nodes,
                                           drawAccuracyComplexity=False,
                                           drawEnergy=False,
                                           drawPCAView=False,
                                           verbose=False
                                           )
    numpy2ri.deactivate()
    nodes = np.array(tmp[0][0])
    edges = np.array(tmp[0][1][0]) - 1

    return nodes, edges
Пример #46
0
def run_minet(filename, algo):

    rn.activate()

    code = """library(minet)
    filename <- '""" + filename + """'
    first <- readLines(filename, n=1)
    names <- strsplit(first, '\t')
    names <- unlist(names, use.names=FALSE)
    d <- read.table(filename, skip=1, col.names = names)

    mim <- build.mim(d, estimator = "mi.empirical", disc = "equalfreq")

    weight_adjacency_matrix <- minet(mim, method='""" + algo + """', estimator="mi.empirical", disc="equalfreq");

    weight_adjacency_matrix;
    """

    f = ro.r(code)

    weight_adjacency_matrix = np.array(f)
    return weight_adjacency_matrix
Пример #47
0
def multipleImputationMethod(data, nrImputedDataSets=5):
    import rpy2.robjects as robjects
    from rpy2.robjects.packages import importr
    from rpy2.robjects import numpy2ri
    numpy2ri.activate()

    importr("mice")
    r = robjects.r

    robjects.globalenv["data"] = data
    r('''myImputerForData <- mice(data, m = 5, method = 'pmm', seed = 101)''')

    allDataImputed = []
    for i in range(1, nrImputedDataSets + 1):
        imputedData = r('data.matrix(complete(myImputerForData,' + str(i) +
                        '))')
        # for the remaining NAN use simple mean imputation
        imputedData = meanImputation(imputedData)
        assert (not numpy.any(numpy.isnan(imputedData)))
        allDataImputed.append(imputedData)

    return allDataImputed
Пример #48
0
    def naive_estimator(self, active_set):
        """
        selected model
        """

        numpy2ri.activate()
        if self.model_target == 'selected':
            rpy.r.assign("X", self.X[:, active_set])
        else:
            n, p = self.X.shape
            if n > p:
                rpy.r.assign("X", self.X)
            else:
                return (active_set, np.ones(p) * np.nan)
        rpy.r.assign("Y", self.Y)
        rpy.r('beta_hat = coef(lm(Y ~ X - 1))')
        beta_hat = np.asarray(rpy.r('beta_hat'))
        n, p = self.X.shape
        beta_full = np.zeros(p)
        beta_full[active_set] = beta_hat

        return active_set, beta_full
Пример #49
0
    def __init__(self,step_pattern="symmetric2",window_type=None,window_size=10000,
        distance_only=False,open_end=False,open_begin=False,rdtw=None):
        self.step_pattern = step_pattern
        self.window_type = window_type
        self.window_size = window_size
        self.distance_only = distance_only
        self.open_end = open_end
        self.open_begin = open_begin
        # # parameter check
        # if self.window_type is not None and window_size is None:
        #     raise ValueError("must specify window_size if window_type is not None.")
        """"""
        if rdtw is None:
            # rdtw package object
            self._rdtw = importr("dtw")
        else:
            self._rdtw = rdtw
        # array conversion activation
        numpy2ri.activate()
        pandas2ri.activate()

        # set window type if it's none
        if self.window_type is None: self.window_type = "none"
def compute_results(y, X, sigma, active,
                    full_results={},
                    do_knockoff=False,
                    do_AIC=True,
                    do_BIC=True,
                    do_glmnet=True,
                    alpha=0.05,
                    maxstep=np.inf,
                    compute_maxT_identify=True,
                    burnin=2000,
                    ndraw=8000):

    n, p = X.shape

    results, FS = compute_pvalues(y, X, active, sigma, maxstep=maxstep,
                                  compute_maxT_identify=compute_maxT_identify,
                                  burnin=burnin,
                                  ndraw=ndraw)
    completion_idx = completion_index(results['variable_selected'], active)
    full_results.setdefault('completion_idx', []).append(completion_idx)

    for column in results.columns:
        for i in range(results.shape[0]):
            full_results.setdefault('%s_%d' % (str(column), i+1), []).append(results[column][i])

    for i in range(len(active)):
        full_results.setdefault('active_%d' % (i+1,), []).append(active[i])

    full_results.setdefault('alpha', []).append(alpha)

    if do_knockoff:

        # this will probably not work on miller
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)

        # knockoff

        rpy.r.assign('alpha', alpha)

        knockoff = np.array(rpy.r("""
        library(knockoff)
        knockoff.filter(X = X, y = y, fdr=alpha, knockoffs=create.fixed, offset=0)$selected
    """)) - 1
        knockoff_R = knockoff.shape[0]
        knockoff_V = knockoff_R - len(set(active).intersection(knockoff))
        knockoff_screen = set(knockoff).issuperset(active)

        knockoff_plus = np.array(rpy.r("""
        knockoff.filter(X = X, y = y, fdr=alpha, knockoffs=create.fixed, offset=1)$selected
    """)) - 1
        knockoff_plus_R = knockoff_plus.shape[0]
        knockoff_plus_V = knockoff_plus_R - len(set(active).intersection(knockoff_plus))
        knockoff_plus_screen = set(knockoff_plus).issuperset(active)

        full_results.setdefault('knockoff_R', []).append(knockoff_R)
        full_results.setdefault('knockoff_V', []).append(knockoff_V)
        full_results.setdefault('knockoff_screen', []).append(knockoff_screen)

        full_results.setdefault('knockoff_plus_R', []).append(knockoff_plus_R)
        full_results.setdefault('knockoff_plus_V', []).append(knockoff_plus_V)
        full_results.setdefault('knockoff_plus_screen', []).append(knockoff_plus_screen)

        numpy2ri.deactivate()

    if do_AIC:

        # this will probably not work on miller
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''M = step(lm(y ~ 1, 
                             data=data.frame(X, y)), 
                             scope=list(upper="~ %s"), 
                             direction="forward", 
                             trace=FALSE)''' % ' + '.join(['X%d' % i for i in range(1, p+1)]))
        AIC = np.asarray([int(v[1:]) for v in rpy.r("all.vars(M$call$formula[[3]])")]) - 1 # subtract 1 for 0-based indexing

        AIC_R = AIC.shape[0]
        AIC_V = AIC_R - len(set(active).intersection(AIC))
        AIC_screen = set(AIC).issuperset(active)

        full_results.setdefault('AIC_R', []).append(AIC_R)
        full_results.setdefault('AIC_V', []).append(AIC_V)
        full_results.setdefault('AIC_screen', []).append(AIC_screen)

        numpy2ri.deactivate()

    if do_BIC:
        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''M = step(lm(y ~ 1, 
                             data=data.frame(X, y)), 
                             scope=list(upper="~ %s"), 
                             direction="forward", 
                             k=log(nrow(X)),
                             trace=FALSE)''' % ' + '.join(['X%d' % i for i in range(1, p+1)]))
        BIC = np.asarray([int(v[1:]) for v in rpy.r("all.vars(M$call$formula[[3]])")]) - 1 # subtract 1 for 0-based indexing

        BIC_R = BIC.shape[0]
        BIC_V = BIC_R - len(set(active).intersection(BIC))
        BIC_screen = set(BIC).issuperset(active)

        full_results.setdefault('BIC_R', []).append(BIC_R)
        full_results.setdefault('BIC_V', []).append(BIC_V)
        full_results.setdefault('BIC_screen', []).append(BIC_screen)

        numpy2ri.deactivate()

    if do_glmnet:

        import rpy2.robjects as rpy
        from rpy2.robjects import numpy2ri
        rpy.conversion.py2ri = numpy2ri.numpy2ri

        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('y', y)
        rpy.r('''library(glmnet);
                 y = as.matrix(y);
                 X = as.matrix(X); 
                 CVG = cv.glmnet(X, y);
                 G = glmnet(X, y);
                 B = coef(G, s=CVG$lambda.min, exact=TRUE);
                 selected = which(B[2:length(B)] != 0);
                 B2 = coef(G, s=CVG$lambda.1se, exact=TRUE);
                 selected2 = which(B2[2:length(B2)] != 0);
                 ''')

        GLMnet = np.asarray(rpy.r("selected")) - 1 # subtract 1 for 0-based indexing
        GLMnet_R = GLMnet.shape[0]
        GLMnet_V = GLMnet_R - len(set(active).intersection(GLMnet))
        GLMnet_screen = set(GLMnet).issuperset(active)

        full_results.setdefault('GLMnet_R', []).append(GLMnet_R)
        full_results.setdefault('GLMnet_V', []).append(GLMnet_V)
        full_results.setdefault('GLMnet_screen', []).append(GLMnet_screen)

        GLMnet1se = np.asarray(rpy.r("selected2")) - 1 # subtract 1 for 0-based indexing
        GLMnet1se_R = GLMnet1se.shape[0]
        GLMnet1se_V = GLMnet1se_R - len(set(active).intersection(GLMnet1se))
        GLMnet1se_screen = set(GLMnet1se).issuperset(active)

        full_results.setdefault('GLMnet1se_R', []).append(GLMnet1se_R)
        full_results.setdefault('GLMnet1se_V', []).append(GLMnet1se_V)
        full_results.setdefault('GLMnet1se_screen', []).append(GLMnet1se_screen)

        numpy2ri.deactivate()

    for pval, rule_ in product(['maxT_identify_pvalue',
                                'maxT_identify_unknown_pvalue',
                                'maxT_unknown_pvalue',
                                'saturated_pvalue',
                                'nominal_pvalue',
                                'nominalT_pvalue',
                                'maxT_pvalue'],
                               zip([simple_stop, 
                                    strong_stop,
                                    forward_stop],
                                   ['simple',
                                    'strong',
                                    'forward'])):
        rule, rule_name = rule_
        (R, 
         V_var, 
         V_model, 
         screen,
         FWER_model,
         FDP_model,
         FDP_var,
         S_var) = summary(np.asarray(results['variable_selected']),
                          results[pval],
                          active,
                          rule, 
                          alpha)

        pval_name = '_'.join(pval.split('_')[:-1])
        for (n, value) in zip(['R', 'V_var', 'V_model', 'FDP_model', 'FDP_var', 'S_var', 'FWER_model', 'screen'],
                              [R, V_var, V_model, FDP_model, FDP_var, S_var, FWER_model, screen]):
            full_results.setdefault('%s_%s_%s' % (pval_name, rule_name, n), []).append(value)
        
    return full_results, FS
Пример #51
0
## This file contains the methods needed to perform the factor anaylsis
## to derive the competencies of the informants.

import pandas
from sklearn import preprocessing
import numpy
import rpy2.robjects.packages as rpackages
import rpy2.robjects.numpy2ri as np2ri
import rpy2.robjects.pandas2ri as pandas2ri
import rpy2.robjects as ro
np2ri.activate()
psych = rpackages.importr('psych')

# Convert the string response of each informant into a matrix
# with a column for each word, where each cell[i,j] holds
# informant i's rank of word j.
def buildMatrix(responses):
    results = {}
    for i,l in responses.str.split(",").iteritems():
        row = {}
        for pos,j in enumerate(l):
            row[j] = pos + 1
        results[i] = row
    matrix =  pandas.DataFrame(results)
    matrix = matrix.T
    return matrix.reset_index(drop=True)

# Build a response matrix for each scale, excluding informants who did not
# perform the task for that scale
def buildMatrices(dataFrame):
    matrices = {}
Пример #52
0
Requires: R (3.1+)      
"""
__author__ = "Zhou Fang"
__email__ = "*****@*****.**"

from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
from sklearn.metrics import r2_score
from sklearn.utils.validation import NotFittedError, check_is_fitted
from sklearn.utils import check_array, check_X_y
from rpy2.robjects import r
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
import rpy2.robjects.numpy2ri as rpyn

rpyn.activate()

# import R's utility package
utils = rpackages.importr("utils")
# select a mirror for R packages
utils.chooseCRANmirror(ind=1)  # select the first mirror in the list

# R package names we need
packnames = "mgcv"

# Install necessary packages that haven't been installed yet
packnames_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(packnames_to_install) > 0:
    utils.install_packages(robjects.StrVector(packnames_to_install))

Пример #53
0
    def _exec_r_module(self):
        try:
            import rpy2.robjects
            from rpy2.robjects import numpy2ri
            from rpy2.robjects import pandas2ri
            from rpy2.robjects.packages import importr
        except ImportError:
            raise ImportError(
                'R module cannot be run, because '
                '"rpy2" package is not installed.'
            )
        module_name = os.path.splitext(os.path.basename(self.source_file))[0]
        logger.debug(
            'import module "%s" from source file: %s', self.source_file
        )
        logger.debug('source module: "%s"', self.source_file)
        rpy2.robjects.r('source("{0}")'.format(self.source_file))
        module = rpy2.robjects.r[module_name]
        version = module.get('VERSION')[0]
        if version != self.handles.version:
            raise PipelineRunError(
                'Version of source and handles is not the same.'
            )
        func = module.get('main')
        numpy2ri.activate()   # enables use of numpy arrays
        pandas2ri.activate()  # enable use of pandas data frames
        kwargs = self.keyword_arguments
        logger.debug(
            'evaluate main() function with INPUTS: "%s"',
            '", "'.join(kwargs.keys())
        )
        # R doesn't have unsigned integer types
        for k, v in kwargs.iteritems():
            if isinstance(v, np.ndarray):
                if v.dtype == np.uint16 or v.dtype == np.uint8:
                    logging.debug(
                        'module "%s" input argument "%s": '
                        'convert unsigned integer data type to integer',
                        self.name, k
                    )
                    kwargs[k] = v.astype(int)
            elif isinstance(v, pd.DataFrame):
                # TODO: We may have to translate pandas data frames explicitly
                # into the R equivalent.
                # pandas2ri.py2ri(v)
                kwargs[k] = v
        args = rpy2.robjects.ListVector({k: v for k, v in kwargs.iteritems()})
        base = importr('base')
        r_out = base.do_call(func, args)

        for handle in self.handles.output:
            # NOTE: R functions are supposed to return a list. Therefore
            # we can extract the output argument using rx2().
            # The R equivalent would be indexing the list with "[[]]".
            if isinstance(r_out.rx2(handle.name), rpy2.robjects.vectors.DataFrame):
                handle.value = pandas2ri.ri2py(r_out.rx2(handle.name))
                # handle.value = pd.DataFrame(r_out.rx2(handle.name))
            else:
                # NOTE: R doesn't have an unsigned integer data type.
                # So we cast to uint16.
                handle.value = numpy2ri.ri2py(r_out.rx2(handle.name)).astype(
                    np.uint16
                )
                # handle.value = np.array(r_out.rx2(handle.name), np.uint16)

        return self.handles.output
Пример #54
0
def infer_pseudotime(data, output_directory, tag = '', pcv_method = 'Rprincurve',
                     anchor_gene = None, markers = None):

    assert pcv_method in {'Rprincurve'} # taking into account the possibility of adding
                                        # in future versions other methods 
                                        # for principal curve analysis
    
    N_dim = 3
    model = TSNE(n_components = N_dim)
    TSNE_data = model.fit_transform(data)
    
    if pcv_method == 'Rprincurve':
        with open(path.join(output_directory, "{0}_TSNE_d{1}.tsv".format(tag, N_dim)),
                  'w') as f:
            f.write('\t'.join(['T{0}'.format(k) for k in xrange(1, N_dim + 1)]))
            f.write('\n')
            np.savetxt(f, TSNE_data, fmt = '%.6f', delimiter = '\t')
        
        numpy2ri.activate()
        princurve = importr('princurve')
        
        procedure = princurve.principal_curve
        fitpc = procedure(TSNE_data, NULL, 0.001, TRUE, 200, 2, 'lowess')
        curve_projections_matrix = np.array(fitpc.rx('s')[0])
        pseudotime_series = np.array(fitpc.rx('lambda')[0])
        
        with open(path.join(output_directory, "{0}_TSNE_d{1}_pcv.tsv".format(tag,
                  N_dim)), 'w') as f:
            np.savetxt(f, curve_projections_matrix, fmt = '%.6f', delimiter = '\t')
            
        with open(path.join(output_directory, "{0}_TSNE_d{1}_lambda.tsv".format(tag,
                  N_dim)), 'w') as f:
            np.savetxt(f, pseudotime_series, fmt = '%.6f', delimiter = '\t')
         
    else:
        print("ERROR: PySCUBA: Preprocessing: infer_pseudotime:\n"
              "your choice of method for principal curve analysis is not supported "
              "by the present version of PySCUBA.")
        exit(1)
        
    if anchor_gene:
        assert isinstance(anchor_gene, str)
        assert markers is not None
        
        N_cells_anchor = 1000
        
        gene_idx = np.where(markers == anchor_gene)[0]
        pseudotime_idx = np.argsort(pseudotime_series)
        
        anchor_gene_avg_beg = np.mean(data[pseudotime_idx[:N_cells_anchor], gene_idx])
        anchor_gene_avg_end = np.mean(data[pseudotime_idx[N_cells_anchor:], gene_idx])
        
        if anchor_gene_avg_end > anchor_gene_avg_beg:
            pseudotime_series = np.max(pseudotime_series) - pseudotime_series
        
    t_min = np.min(pseudotime_series)
    t_max = np.max(pseudotime_series)
    t_bins = 8
    
    cell_stages = t_bins * (pseudotime_series - t_min + 0.0001) / (t_max - t_min + 0.0002)
    cell_stages = np.ceil(cell_stages).astype(int).astype('str')
    
    return cell_stages
Пример #55
0
def get_pvalue_from_scores(result_dict, permutation_dict, **kwargs):
    """ Uses network scores to calculate p-values.

    Arguments:
     result_dict: a dict of results from the network propagation.  e.g. {node id : score value}
     permutation_dict: a dict of permutation results, e.g. {node id: [list of score values]}

    kwargs:
     verbose

    returns:
     ttp_dict: dictionary of p-values from a two-tailed test of significant
     side_dict: a dict to indicate if the result lies in the upper or lower tail ('+' or '-')
 
    """
    continue_flag = True

    verbose = test_kwarg('verbose', kwargs, [False, True])

    
    try:
        from rpy2.robjects import r
        from rpy2 import robjects
        from rpy2.robjects import numpy2ri
        numpy2ri.activate()
    except ImportError:
        print "networkstatistics.get_pvalue_from_scores requires a functional rpy2 and R, exiting..."
        continue_flag = False

    if continue_flag:
        from time import time
        start_time = time()
    
        ttp_dict = {}
        side_dict = {}
    
        # Also require the pareto extrapolation code
        the_file = 'ParetoExtrapolation.R'
        import nampy
        import platform 
        # This may need some fix, I've only tried
        # this on OSX so far.
        if platform.system() == 'Windows':
            pareto_file_path = nampy.__path__[0] + '\\rfiles\\'
        else:
            pareto_file_path = nampy.__path__[0] + '/rfiles/'
        r('source(%s)' %('"' + pareto_file_path + the_file + '"'))
        test_ids = result_dict.keys()
        n_tests = len(test_ids)
        start_time = time()
        for i, the_id in enumerate(test_ids):
            test_statistic = result_dict[the_id]
            null_distribution = permutation_dict[the_id]
            n_permutations = len(null_distribution)
            # This won't work by definition if we have less than 20 permutations
            if n_permutations < 20:
                if verbose:
                    print 'Warning, null distribution is too small.  Run more permutations.  Exiting...'
                return (None, None)
            M1 = sum(null_distribution > test_statistic)
            M2 = sum(null_distribution < test_statistic)
            # Want to know which end the test statistics lies towards
            if M2 < M1:
                side = '-'
                M = M2
            else:
                side = '+'
                M = M1
            if (M >= 10):
                estimated_p = 2. * M / n_permutations
            else:
                # need to create an r object first
                null_distribution_r = robjects.FloatVector(null_distribution)
                # need to assign the r object into the r namespace
                r.assign('null_distribution_r', null_distribution_r)
                r.assign('test_statistic', test_statistic)
                # now run our r scripts
                r('fit = getParetoFit(null_distribution_r, side="two-sided")') 
                r('distCDF = paretoExtrapolate(test_statistic, fit)')
                estimated_p = robjects.numpy2ri.ri2numpy(r('distCDF'))[0]
            ttp_dict[the_id] = estimated_p
            side_dict[the_id] = side
            if verbose:
                if (i+1) % 1000 == 0:
                    print 'Test %i of %i, el: %f hr' %((i + 1), n_tests, (time() - start_time)/3600.)
        return ttp_dict, side_dict
    else:
        return {}, {}
Пример #56
0
from __future__ import division

import numpy as np

from itertools import izip
from csv import DictReader, writer
from sys import argv, stdout, stderr
from getopt import getopt, GetoptError
from rpy2.robjects import packages, numpy2ri    # R in Python


## activate R functionality

stats = packages.importr('stats')               # R stats
numpy2ri.activate()                             # automatic numpy-to-R

## globals

DEFAULT_GROUP_COLUMN = ''
DEFAULT_OUTPUT_FID = stdout
DEFAULT_P_THRESHOLD = .2
MAX_DROP_PCT = .75
USAGE = """
USAGE: {} -a GRP1 -b GRP2 [-d] -m FEATS [-g GCOL] [-o OUTPUT] [-p P] INPUT
""".format(__file__)


## helper functions

def colmean(x):
Пример #57
0
    def numpy2ri_close(cls):
        """ 关闭R对象和numpy对象的自动转换

        :return: 无返回值
        """
        numpy2ri.activate()
 def setUp(self):
     #self._py2ri = robjects.conversion.py2ri
     #self._ri2py = robjects.conversion.ri2py
     rpyn.activate()
Пример #59
0
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri
numpy2ri.activate()
from sklearn.base import BaseEstimator, ClassifierMixin
import warnings
import numpy as np
R = robjects.r

class LogisticRegressionCV(BaseEstimator, ClassifierMixin):
    def __init__(self, binary_classification=True, n_jobs=1, random_state=None, glmnet_params={}):
        """
        binary_classification: if True use family = binomial, else multinomial
        random_state: int or None, set.seed
        n_jobs: number of workers
        glmnet_params: params for glmnet
        """
        self.random_state = random_state
        self.glmnet_params = glmnet_params
        self.binary_classification = binary_classification
        warnings.warn('No validity check for glmnet_params')
        self.n_jobs = n_jobs
    def __del__(self):
        if  hasattr(self, 'cluster_'):
            R['stopCluster'](self.cluster_)
    def fit(self, X, y):
        importr('glmnet')
        family = 'binomial' if self.binary_classification else 'multinomial'
        if self.random_state is not None:
            R['set.seed'](self.random_state)
        if self.n_jobs > 1:
Пример #60
0
def _init_r():
    """Private function to initialise R, only executed when needed.

    """

    global _r_initialised
    global r
    global ro
    global grdevices
    global ape

    if not _r_initialised:

        import rpy2.robjects as ro  # noqa
        from rpy2.robjects import r
        from rpy2.robjects.packages import importr
        import rpy2.robjects.numpy2ri as numpy2ri
        numpy2ri.activate()
        grdevices = importr('grDevices')
        ape = importr(
            'ape',
            robject_translations={
                'delta.plot': 'delta_dot_plot',
                'dist.dna': 'dist_dot_dna',
                'dist.nodes': 'dist_dot_nodes',
                'node.depth': 'node_dot_depth',
                'node.depth.edgelength': 'node_dot_depth_dot_edgelength',
                'node.height': 'node_dot_height',
                'node.height.clado': 'node_dot_height_dot_clado',
                'prop.part': 'prop_dot_part',
            }
        )

        # Define custom R functions to help with coloring tree edges by
        # population. These functions were written by Jacob Almagro-Garcia
        # <*****@*****.**> at the Wellcome Trust Sanger Institute.
        r("""
library(ape)


######################################################################################################################
#' Computes the number of leaves of each group that hang from each branch.
#' @param phylotree A tree of class phylo.
#' @param labelgroups A vector with the group of the tip labels (named with the labels).
#' @return A named matrix with the membership counts for each interior edge of the tree.
######################################################################################################################

computeEdgeGroupCounts <- function(phylotree, labelgroups) {

  labels <- phylotree$tip.label
  num_tips <- length(labels)
  edge_names <- unique(sort(c(phylotree$edge)))

  # This matrix will keep track of the group counts for each edge.
  edge_group_counts <- matrix(0, nrow=length(edge_names), ncol=length(unique(sort(labelgroups))))
  rownames(edge_group_counts) <- edge_names
  colnames(edge_group_counts) <- unique(labelgroups)

  # Init the leaf branches.
  sapply(1:num_tips, function(l) {
    edge_group_counts[as.character(l), as.character(labelgroups[phylotree$tip.label[l]])] <<- 1
  })

  # Sort edges by the value of the descendent
  # The first segment will contain the leaves whereas the second the branches (closer to leaves first).
  # We need to do this because leaves are numbered 1:num_tips and the branches CLOSER to the leaves
  # with higher numbers.
  edges <- phylotree$edge[order(phylotree$edge[,2]),]
  branches <- edges[num_tips:nrow(edges),]
  edges[num_tips:nrow(edges),] <- branches[order(branches[,1],decreasing=T),]
  invisible(apply(edges, 1, function(edge) {
    # Check if we are connecting a leaf.
    if(edge[2] <= num_tips) {
      e <- as.character(edge[1])
      g <- as.character(labelgroups[phylotree$tip.label[edge[2]]])
      edge_group_counts[e,g] <<- edge_group_counts[e,g] + 1
    }
    else {
      e1 <- as.character(edge[1])
      e2 <- as.character(edge[2])
      edge_group_counts[e1,] <<- edge_group_counts[e1,] + edge_group_counts[e2,]
    }
  }))
  return(edge_group_counts)
}


######################################################################################################################
#' Assigns the color of the majority group (hanging from) each branch.
#' @param phylotree A tree of class phylo.
#' @param edge_group_counts A named matrix with the group counts for each branch.
#' @param groupcolors A named vector with the color of each group.
#' @param equality_color The color to be used if there is no majority group.
#' @return A vector with the colors to be used with the tree branches.
######################################################################################################################

assignMajorityGroupColorToEdges <- function(phylotree, edge_group_counts, groupcolors, equality_color="gray") {
  edge_colors <- apply(phylotree$edge, 1, function(branch) {
    e <- as.character(branch[2])
    major_group_index <- which.max(edge_group_counts[e,])
    if(all(edge_group_counts[e,] == edge_group_counts[e,major_group_index]))
      return(equality_color)
    else
      return(groupcolors[colnames(edge_group_counts)[major_group_index]])
  })
  return(edge_colors)
}
""")  # noqa

        _r_initialised = True