示例#1
0
 def write_raov(self, fname, aov_name, stat_name='NULL'):
     """Save ANOVA and descriptive data R objects as a csv file
           using a custom R function
     """
     robjects.r("""source('~/R_Functions/WriteAOV.R')""")
     toeval = "WriteAOV({0}, {1}, data={2})".format("'" + fname + "'", aov_name, stat_name)
     robjects.r(toeval)
示例#2
0
def get_correlations_for_tickers(tickers, show_exception=False):
  corrs = []
  start_time = datetime.datetime.now()
  first = True
  for ticker in tickers:
    if  not first:
      time_left =  get_time_left(
                       start_time, 
                       len(corrs), 
                       scipy.special.comb(len(tickers), 2)
                     )
      print 'Finding Correlations for %s. Time remaining: %f minutes' % (ticker,time_left.seconds/60)
    first = False
    try:
      t_data = get_t_data(ticker)
    except Exception as e:
      if show_exception: print "throwing exception", e
      continue
    for ticker_2 in tickers:
      if ticker_2 == ticker: continue
      try:
        tdata_2 = get_t_data(ticker_2)
      except Exception as e:
        if show_exception: print "throwing exception", e, ticker_2
        continue
      if len(t_data) != len(tdata_2):
        t_data, tdata_2 = du.remap_data(t_data, tdata_2)  
      corr = get_correlation(t_data, tdata_2)[0]
      ident = '%s/%s' % (ticker, ticker_2)
      corrs.append((ident, corr)) 
      r('gc()')
      gc.collect()
    gc.collect()
  return corrs
示例#3
0
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError("Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T
        
        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
示例#4
0
def rocbees(ARGVS):
   '''
   ROC curves and the beeswarm plot 
   from beeswarm R package
   '''
   beeswarm = importr('beeswarm')
   Cairo = importr('Cairo')
   ROC   = importr('ROC')
   filename = ARGVS['file']
   data     = ARGVS['data']
   title    = ARGVS['title']
   category = ARGVS['opts']
   filewrite = ROOT_PATH + '/media/tmp/' + filename 
   resp    = []
   expr    = []
   names = data.keys()
   for name in names:
      resp.append(data[name]['resp'])
      expr.append(data[name]['expression'])
   robjects.r ('''
    approx3 <- function(x, y = NULL, theta = 0.001) {
     xy <- xy.coords(x, y)
     dx <- diff(xy$x)/(max(xy$x) - min(xy$x))
     dy <- diff(xy$y)/(max(xy$y) - min(xy$y))
     angle <- atan2(dy, dx)
     diff.angle <- diff(angle)%%pi
     abs.diff.angle <- pmin(diff.angle, pi - diff.angle)
     keep <- c(TRUE, abs.diff.angle > theta, TRUE)
     xy$x <- xy$x[keep]
     xy$y <- xy$y[keep]
     xy
    }
    aronroc <- function(x, truth, type = "l", xlab = expression(1 -
     specificity), ylab = "Sensitivity", ...) {
     require(ROC)
     r <- rocdemo.sca(truth, x)
     xy <- list(x = 1 - r@spec, y = r@sens)
     xy.trimmed <- approx3(xy)
     plot(xy.trimmed, type = type, xlab = xlab, ylab = ylab, ...)
     invisible(xy.trimmed)
    }
    plotResps <- function (filename,expr,resp,category, main='') {
     expr  = as.numeric(expr)
     resp  = as.character(resp)
     CairoPNG(filename=filename,width = 800, height = 400)
     par(oma = c(0,0,1,0))
     layout(matrix(1:2, nrow = 1), widths = c(1,1))
     beeswarm(expr ~ resp,col=c(1:length(unique(resp))),
      pch=16,xlab='Response Categories', ylab='Expression')
     par(xpd = NA)
     aronroc (expr, resp == category)
     title(main,outer=TRUE)
     dev.off()
    }    
   ''')
   try:
      robjects.r['plotResps'](filename = filewrite, expr = expr, resp = resp, category = category, main = title)
      return filename
   except:
      return 'Error'
示例#5
0
文件: init.py 项目: cjh1/VisTrails
 def run_code(self, code_str,
              use_input=False,
              use_output=False,
              excluded_inputs=set(),
              excluded_outputs=set()):
     """run_code runs a piece of code as a VisTrails module.
     use_input and use_output control whether to use the inputport
     and output port dictionary as local variables inside the
     execution."""
     import vistrails.core.packagemanager
     def fail(msg):
         raise ModuleError(self, msg)
     def cache_this():
         self.is_cacheable = lambda *args, **kwargs: True
     if use_input:
         inputDict = dict([(k, self.getInputFromPort(k))
                           for k in self.inputPorts
                           if k not in excluded_inputs])
         for k,v in inputDict.iteritems():
             robjects.globalEnv[k] = v
     robjects.r(code_str)
     if use_output:
         for k in self.outputPorts:
             if k not in excluded_outputs and k in robjects.globalEnv:
                 self.setResult(k, robjects.globalEnv[k])
示例#6
0
 def ilmoitus_tilastot(self, vaalipiiri=False):
     valinta = ["puolue_lyh",]
     if vaalipiiri:
         valinta.append("vaalipiiri")
     summat = robjects.r('''function(df)summarise(df, 
                              ilmoittaneita = length(df$etunimi),
                              rahoitus_tot = sum(df$rahoitus_kaikki),
                              kulut_tot = sum(df$kulut_kaikki),
                              omat_varat = sum(df$omat_varat),
                              lainat = sum(df$lainat),
                              yksityinen_tuki = sum(df$yksityinen_tuki),
                              yritys_tuki = sum(df$yritys_tuki),
                              puolue_tuki = sum(df$puolue_tuki),
                              puolueyhdistys_tuki = sum(df$puolueyhdistys_tuki),
                              valitettu_tuki = sum(df$valitetty_tuki),
                              muu_tuki = sum(df$muu_tuki))
                              ''')
     
     data_puolueet = plyr.ddply(self._ilmoitukset,  robjects.StrVector(valinta), summat)
     data_puolueet = base.merge(data_puolueet, 
                                self.ehdokas_tilastot(vaalipiiri=vaalipiiri))
     data_puolueet.colnames[-1] = "ehdokkaita_tot"
     
     osuudet = robjects.r('function(a, b)return(a / b)')
     
     data_puolueet = data_puolueet.cbind(data_puolueet, 
                                 base.round(osuudet(data_puolueet.rx("ilmoittaneita"), 
                                 data_puolueet.rx("ehdokkaita_tot")), 2), 
                                 base.round(osuudet(data_puolueet.rx("rahoitus_tot"), 
                                 data_puolueet.rx("ilmoittaneita")), 2),
                                 robjects.IntVector([2011, ]))
     data_puolueet.colnames[-3] = "ilmoittaneita_pros"
     data_puolueet.colnames[-2] = "rahoitus_suht"
     data_puolueet.colnames[-1] = "vuosi"
     return data_puolueet
示例#7
0
def Rconverter(Robj, dataframe=False):
    """
    Convert an object in R's namespace to one suitable
    for ipython's namespace.

    For a data.frame, it tries to return a structured array.
    It first checks for colnames, then names.
    If all are NULL, it returns np.asarray(Robj), else
    it tries to construct a recarray

    Parameters
    ----------

    Robj: an R object returned from rpy2
    """
    is_data_frame = ro.r('is.data.frame')
    colnames = ro.r('colnames')
    rownames = ro.r('rownames') # with pandas, these could be used for the index
    names = ro.r('names')

    if dataframe:
        as_data_frame = ro.r('as.data.frame')
        cols = colnames(Robj)
        _names = names(Robj)
        if cols != ri.NULL:
            Robj = as_data_frame(Robj)
            names = tuple(np.array(cols))
        elif _names != ri.NULL:
            names = tuple(np.array(_names))
        else: # failed to find names
            return np.asarray(Robj)
        Robj = np.rec.fromarrays(Robj, names = names)
    return np.asarray(Robj)
    def geno_to_rqtl_function(
        self
    ):  # TODO: Need to figure out why some genofiles have the wrong format and don't convert properly
        print("Adding some custom helper functions to the R environment")
        ro.r(
            """
           trim <- function( x ) { gsub("(^[[:space:]]+|[[:space:]]+$)", "", x) }

           getGenoCode <- function(header, name = 'unk'){
             mat = which(unlist(lapply(header,function(x){ length(grep(paste('@',name,sep=''), x)) })) == 1)
             return(trim(strsplit(header[mat],':')[[1]][2]))
           }

           GENOtoCSVR <- function(genotypes = 'BXD.geno', out = 'cross.csvr', phenotype = NULL, sex = NULL, verbose = FALSE){
             header = readLines(genotypes, 40)                                                                                 # Assume a geno header is not longer than 40 lines
             toskip = which(unlist(lapply(header, function(x){ length(grep("Chr\t", x)) })) == 1)-1                            # Major hack to skip the geno headers
             
             genocodes <- c(getGenoCode(header, 'mat'), getGenoCode(header, 'het'), getGenoCode(header, 'pat'))                # Get the genotype codes 
             type <- getGenoCode(header, 'type')
             genodata <- read.csv(genotypes, sep='\t', skip=toskip, header=TRUE, na.strings=getGenoCode(header,'unk'), colClasses='character', comment.char = '#')
             cat('Genodata:', toskip, " ", dim(genodata), genocodes, '\n')
             if(is.null(phenotype)) phenotype <- runif((ncol(genodata)-4))                                                     # If there isn't a phenotype, generate a random one
             if(is.null(sex)) sex <- rep('m', (ncol(genodata)-4))                                                              # If there isn't a sex phenotype, treat all as males
             outCSVR <- rbind(c('Pheno', '', '', phenotype),                                                                   # Phenotype
                              c('sex', '', '', sex),                                                                           # Sex phenotype for the mice
                              cbind(genodata[,c('Locus','Chr', 'cM')], genodata[, 5:ncol(genodata)]))                          # Genotypes
             write.table(outCSVR, file = out, row.names=FALSE, col.names=FALSE,quote=FALSE, sep=',')                           # Save it to a file
             require(qtl)
             cross = read.cross(file=out, 'csvr', genotypes=genocodes)                                                         # Load the created cross file using R/qtl read.cross  
             if(type == 'riset') cross <- convert2riself(cross)                                                                # If its a RIL, convert to a RIL in R/qtl
             return(cross)
          }
        """
        )
示例#9
0
 def _normalize_tabbed(self, rfile):
     """input: raw tabbed matrix file (with column and row headers)
     return: normalized tabbed matrix file (with column and row headers)"""
     nfile = Ipy.TMP_DIR+'/norm.'+random_str()+'.tab'
     rcmd = 'source("%s")\nMGRAST_preprocessing(file_in="%s", file_out="%s", produce_fig="FALSE")\n'%(Ipy.LIB_DIR+'/preprocessing.r', rfile, nfile)
     ro.r(rcmd)
     return nfile
示例#10
0
def annotated_refs(in_file, ref, config, out_file):
    """Use BioMart at Ensembl to add descriptions to each row.
    """
    rpy2.r.assign('in.file', in_file)
    rpy2.r.assign('out.file', out_file)
    rpy2.r.assign("org", ref.get("ensembl_name", ""))
    rpy2.r('''
    library(biomaRt)
    options(stringsAsFactors=FALSE)
    in.tbl <- read.csv(in.file, header=TRUE)
    in.tbl$pctsimilar <- in.tbl$hitidentities / in.tbl$hitlength
    print(summary(in.tbl))
    nohits <- sum(in.tbl$hit == "")
    total <- length(in.tbl$hit)
    print(c("No hits", nohits, "Percent hit", (total - nohits) / total))

    if (org != "") {
        txs <- unique(in.tbl$hit)
        mart <- useMart("ensembl", dataset=org)
        attrs <- c("ensembl_transcript_id", "embl", "description")
        filters <- c("ensembl_transcript_id")
        mart.result <- getBM(attributes=attrs, filters=filters, values=txs, mart=mart)
        names(mart.result) <- c("hit", "genbank.id", "description")
        final <- merge(in.tbl, mart.result, by="hit", all.x=TRUE)
        final.sort <- final[order(final$query),]
        print(head(final.sort))
        write.csv(final.sort, out.file, row.names=FALSE, na="")
    }
    ''')
示例#11
0
    def save_model(self, file_name="stm_model.RData"):
        '''
        Save the fitted model as an R object.

        Parameters:
          file_name - string
             The name of the file where the data will be saved
        '''
        if self.trained == False:
            print "The model has not been fitted yet."

        else:
            modsave = {"mu": robjects.ListVector(self.mu), 
                "sigma": self.sigma, 
                "beta": robjects.ListVector({"beta": self.logbeta, "logbeta": self.logbeta}),
                "settings": robjects.ListVector(self.settings),
                "vocab": self.vocab,
                "convergence": self.convergence,
                "theta": self.theta,
                "eta": self.eta,
                "invsigma": self.invsigma}

            robjects.globalenv["modsave"] = robjects.ListVector(modsave)
            robjects.r("save(modsave, file='" + filename + "')" )
            print "STM model saved as R object."
示例#12
0
 def start(self):
     filePath = QFileInfo(self.outName).absoluteFilePath()
     filePath.replace("\\", "/")
     file_name = QFileInfo(self.outName).baseName()
     driver_list = self.driver.split("(")
     self.driver = driver_list[0]
     self.driver.chop(1)
     extension = driver_list[1].right(5)
     extension.chop(1)
     if self.driver == "GeoTIFF": self.driver = "GTiff"
     elif self.driver == "Erdas Imagine Images": self.driver = "HFA"
     elif self.driver == "Arc/Info ASCII Grid": self.driver = "AAIGrid"
     elif self.driver == "ENVI Header Labelled": self.driver = "ENVI"
     elif self.driver == "JPEG-2000 part 1": self.driver = "JPEG2000"
     elif self.driver == "Portable Network Graphics": self.driver = "PNG"
     elif self.driver == "USGS Optional ASCII DEM": self.driver = "USGSDEM"
     if not filePath.endsWith(extension, Qt.CaseInsensitive) and self.driver != "ENVI":
         filePath = filePath.append(extension)
     if not filePath.isEmpty():
         if self.driver == "AAIGrid" or self.driver == "JPEG2000" or \
         self.driver == "PNG" or self.driver == "USGSDEM":
             r_code = "saveDataset(dataset=copyDataset(create2GDAL(dataset=%s, type='Float32'), driver='%s'), filename='%s')" % (unicode(self.layerName),
             unicode(self.driver), unicode(filePath))
             robjects.r(r_code)
         else:
             r_code = "writeGDAL(dataset=%s, fname='%s', drivername='%s', type='Float32')" % (unicode(self.layerName), 
             unicode(filePath), unicode(self.driver))
     robjects.r(r_code)
     rlayer = QgsRasterLayer(unicode(filePath), unicode(file_name))
     return rlayer
示例#13
0
def test_anova():
    "Test ANOVA"
    from rpy2.robjects import r
    r_require('car')

    ds = datasets.get_uv()
    ds.to_r('ds')

    # fixed effects
    aov = test.anova('fltvar', 'A*B', ds=ds)
    print aov
    fs = run_on_lm_fitter('fltvar', 'A*B', ds)
    r_res = r("Anova(lm(fltvar ~ A * B, ds, type=2))")
    assert_f_tests_equal(aov.f_tests, r_res, fs, 'Anova')

    # random effects
    aov = test.anova('fltvar', 'A*B*rm', ds=ds)
    print aov
    fs = run_on_lm_fitter('fltvar', 'A*B*rm', ds)
    r('test.aov <- aov(fltvar ~ A * B + Error(rm / (A * B)), ds)')
    print r('test.summary <- summary(test.aov)')
    r_res = r['test.summary'][1:]
    assert_f_tests_equal(aov.f_tests, r_res, fs, 'rmaov')

    # not fully specified model with random effects
    assert_raises(NotImplementedError, test.anova, 'fltvar', 'A*rm', ds=ds)

    # empty cells
    dss = ds.sub("A%B != ('a2', 'b2')")
    assert_raises(NotImplementedError, test.anova, 'fltvar', 'A*B', ds=dss)
    assert_raises(NotImplementedError, run_on_lm_fitter, 'fltvar', 'A*B', ds=dss)
    dss = ds.sub("A%B != ('a1', 'b1')")
    assert_raises(NotImplementedError, test.anova, 'fltvar', 'A*B', ds=dss)
    assert_raises(NotImplementedError, run_on_lm_fitter, 'fltvar', 'A*B', ds=dss)
示例#14
0
def draw_quality_plot(db_file, plot_file, position_select, title):
    """Draw a plot of remapped qualities using ggplot2.

    Remapping information is pulled from the sqlite3 database using sqldf
    according to the position select attribute, which is a selection phrase like
    '> 50' or '=28'.

    plyr is used to summarize data by the original and remapped score for all
    selected positions.

    ggplot2 plots a heatmap of remapped counts at each (original, remap)
    coordinate, with a x=y line added for reference.
    """
    robjects.r.assign('db.file', db_file)
    robjects.r.assign('plot.file', plot_file)
    robjects.r.assign('position.select', position_select)
    robjects.r.assign('title', title)
    robjects.r('''
      library(sqldf)
      library(plyr)
      library(ggplot2)
      sql <- paste("select * from data WHERE position", position.select, sep=" ")
      exp.data <- sqldf(sql, dbname=db.file)
      remap.data <- ddply(exp.data, c("orig", "remap"), transform, count=sum(count))
      p <- ggplot(remap.data, aes(orig, remap)) +
           geom_tile(aes(fill = count)) +
           scale_fill_gradient(low = "white", high = "steelblue", trans="log") +
           opts(panel.background = theme_rect(fill = "white"),
                title=title) +
           geom_abline(intercept=0, slope=1)
      ggsave(plot.file, p, width=6, height=6)
    ''')
示例#15
0
    def _plot_stats(self, bam_name):
        robjects.r.assign('rep_cnt',numpy2ri.numpy2ri(self.frag_rep.keys()))
        robjects.r.assign('rep_freq',numpy2ri.numpy2ri(self.frag_rep.values()))
        robjects.r.assign('size_distr',numpy2ri.numpy2ri(self.frag_size.keys()))
        robjects.r.assign('size_freq',numpy2ri.numpy2ri(self.frag_size.values()))
        robjects.r.assign('nb_frag',self.nb_frag)
        robjects.r.assign('main',bam_name)
        robjects.r("""
rep_cnt = as.integer(rep_cnt)
Od = order(rep_cnt)
rep_freq = as.integer(rep_freq)[Od]*1e-6
rep_cnt = rep_cnt[Od]
I100 = rep_cnt<100
rep_cnt = c(rep_cnt[I100],100)
rep_freq = c(rep_freq[I100],sum(rep_freq[!I100]))
size_distr = as.integer(size_distr)
Od = order(size_distr)
size_freq = as.integer(size_freq)[Od]/nb_frag
size_distr = size_distr[Od]
par(mfrow=c(2,1),lwd=2,cex=1.1,cex.main=1.3,cex.lab=1.1,cex.axis=.8,oma=c(0,0,3,0),mar=c(5,5,1,1),las=1,pch=20)
plot(rep_cnt,rep_freq,type='s',main='Fragment redundancy',xlab='Nb of copies',ylab='Frequency (millions)',
     log='y',xlim=c(1,100),xaxt='n',ylim=c(1e-6,nb_frag*1e-6))
abline(h=nb_frag*1e-6,col='red')
text(50,nb_frag*1e-6,nb_frag,col='red',pos=1)
axis(side=1,at=seq(10,100,by=10),labels=c(seq(10,90,by=10),">100"))
plot(size_distr,size_freq,type='s',main='Fragment size distribution',xlab='Size',ylab='Density')
title(main=main,outer=T)
""")
示例#16
0
文件: radapter.py 项目: ecotox/balder
 def transformDFByRFunction(self, df, function, library="", *args):
     r_df= pandas_df_to_r_df(df)  
     if library != "":
         import_=r.r("library(%s)"%library)
     function_=r.r(function)
     r_updated_df= function_(r_df, *args)
     return r_matrix_to_dataframe(r_updated_df)
	def openRecodificar(self):
		# Abro el dialogo
		self.dialogUi = self.d_recodificar
		self.dialogUi.setWindowTitle("Recodificar")
		self.dialogUi.show()

		# Inicializo los combobox
		def f(x):
			print x
		rinterface.set_writeconsole(f)
		n_items = "length(names(datos))"
		n_items = robjects.r(n_items)
		n_items = n_items[0]
		self.dialogUi.cb_variable_reco.clear()
		self.dialogUi.cb_variable_reco.addItem("**Variable no seleccionada**")
		for i in range(n_items):
			item_factor = "names(datos)[" + str(i+1) + "]"
			item_factor = robjects.r(item_factor)
			self.dialogUi.cb_variable_reco.addItem(str(item_factor[0]))

		self.dialogUi.cb_variable_reco.currentIndexChanged.connect(self.changeComboBox)

		rinterface.set_writeconsole(rinterface.consolePrint)

		# Signals
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("accepted()"), self.accept, QtCore.Qt.UniqueConnection)
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("rejected()"), self.cancel, QtCore.Qt.UniqueConnection)
示例#18
0
 def estimate_clusters_numbers(self, X):
     nr,nc = X.shape
     X_trainr = ro.r.matrix(X, nrow=nr, ncol=nc)
     ro.r.assign("matrix_val", X_trainr)
     ro.r("NUMC = 2:20")
     ro.r("out <- SIMLR_Estimate_Number_of_Clusters(t(log(1+matrix_val)), NUMC = NUMC, cores.ratio = 0)")
     self.estimated_clusters = ro.r("out$K1")
	def read_spss_to_df(self):
		"""Use R functions to read SPSS files

		Input ->
		NULL
		====================================================================================================
		Output ->
		Return a tuple of a python DataFrame and an np array of descriptions of column names (i.e. features descriptions)
		"""
		from rpy2.robjects import r
		from string import Template
		from rpy2.robjects import pandas2ri
		import unicodedata
		file_location = self._file_path # or "./1 - 110778/110778.sav"
		file_location_csv = file_location[:-4] + ".csv"
		r_code = Template('''
		library(foreign)
		library(plyr)

		df <- read.spss ("$origin_file", to.data.frame=TRUE)
		desc <- attr(df,"variable.labels")
		write.csv(df, file="$output_file", na="")
		''')
		r_code = r_code.substitute(origin_file=file_location, output_file=file_location_csv) # Substitute input and output file with variables presented in python
		r(r_code) # Run the above r code in r global environment

		df = pandas2ri.ri2py(r('df')) # convert from r data frame into pandas data frame
		df = df.applymap(lambda x: unicodedata.normalize('NFKD', x).encode('ascii','ignore') if type(x) == unicode else x) # Translate unicode encoding into ascii encoding

		desc = pandas2ri.ri2py(r('desc')) # convert into python variable
		for j, ele in enumerate(desc):
			if type(desc[j]) == np.unicode_:
				desc[j] = str(unicodedata.normalize('NFKD', desc[j]).encode('ascii','ignore')) # http://stackoverflow.com/questions/1207457/convert-a-unicode-string-to-a-string-in-python-containing-extra-symbols
		desc = desc.astype(np.string_)
		return df, desc
	def openPercentiles(self):
		self.dialogUi = self.d_percentiles
		self.dialogUi.setWindowTitle("Percentiles")
		self.dialogUi.show()

		# Inicializo los combobox
		def f(x):
			print x
		rinterface.set_writeconsole(f)
		n_items = "length(names(datos))"
		n_items = robjects.r(n_items)
		n_items = n_items[0]
		self.dialogUi.cb_variable.clear()
		self.dialogUi.cb_factor.clear()
		self.dialogUi.cb_factor.addItem(str("Sin factor"))
		for i in range(n_items):
			item_factor = "names(datos)[" + str(i+1) + "]"
			item_factor = robjects.r(item_factor)
			self.dialogUi.cb_variable.addItem(str(item_factor[0]))
			self.dialogUi.cb_factor.addItem(str(item_factor[0]))

		rinterface.set_writeconsole(rinterface.consolePrint)

		# Signals
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("accepted()"), self.acceptPercentiles, QtCore.Qt.UniqueConnection)
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("rejected()"), self.cancel, QtCore.Qt.UniqueConnection)
	def openHistograma(self):
		self.dialogUi = self.d_histograma
		self.dialogUi.setWindowTitle("Histograma")
		self.dialogUi.show()

		# Abro el multiselector
		self.listMulti = []
		self.dialogUi.var_select.clear()
		QtCore.QObject.connect(self.dialogUi.pushMultiSelector, QtCore.SIGNAL("clicked()"), self.openMultiSelector, QtCore.Qt.UniqueConnection)

		# Inicializo los combobox
		def f(x):
			print x
		rinterface.set_writeconsole(f)
		n_items = "length(names(datos))"
		n_items = robjects.r(n_items)
		n_items = n_items[0]
		self.dialogUi.cb_variable.clear()
		for i in range(n_items):
			item_factor = "names(datos)[" + str(i+1) + "]"
			item_factor = robjects.r(item_factor)
			self.dialogUi.cb_variable.addItem(str(item_factor[0]))

		rinterface.set_writeconsole(rinterface.consolePrint)

		# Signals
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("accepted()"), self.acceptHistograma, QtCore.Qt.UniqueConnection)
		QtCore.QObject.connect(self.dialogUi.buttonBox, QtCore.SIGNAL("rejected()"), self.cancel, QtCore.Qt.UniqueConnection)
示例#22
0
文件: data.py 项目: silpol/tryton-bef
 def create_dataframe(self):
     """create & save a R dataframe in dataframe field"""
     if self.model is not None:
         ModelField = Pool().get('ir.model.field')
         model_fields = ModelField.search([('model', '=', self.model)])
         model = Pool().get(self.model.model)
         records = model.search([])
         fields_info = [FieldInfo(field.name, field.ttype)
                        for field in model_fields]
         df = dataframe(records, fields_info)
         self.data = buffer(pickle.dumps(df))
         self.save()
     elif self.script is not None:
         # clean R workspace
         # robjects.r['source'] could be used instead of robjects.r
         robjects.r("""rm(list = ls(all.names=TRUE))""")
         try:
             # run code uploaded by users
             try:
                 robjects.r(self.script.code)
             except RRuntimeError, err:
                 self.raise_user_error('r_error', (err,))
             globalenv = robjects.r["globalenv"]()
             try:
                 obj = globalenv['out']
             except LookupError:
                 obj = None
             if isinstance(obj, robjects.DataFrame):
                 self.data = buffer(pickle.dumps(obj))
             else:
                 self.data = None
         finally:
示例#23
0
    def read(self, filename):
        """Parse content and metadata of markdown files"""
        QUIET = self.settings.get('RMD_READER_KNITR_QUIET', True)
        ENCODING = self.settings.get('RMD_READER_KNITR_ENCODING', 'UTF-8')
        CLEANUP = self.settings.get('RMD_READER_CLEANUP', True)
        RENAME_PLOT = self.settings.get('RMD_READER_RENAME_PLOT', True)
        logger.debug("RMD_READER_KNITR_QUIET = %s", QUIET)
        logger.debug("RMD_READER_KNITR_ENCODING = %s", ENCODING)
        logger.debug("RMD_READER_CLEANUP = %s", CLEANUP)
        logger.debug("RMD_READER_RENAME_PLOT = %s", RENAME_PLOT)
        # replace single backslashes with double backslashes
        filename = filename.replace('\\', '\\\\')
        # parse Rmd file - generate md file
        md_filename = filename.replace('.Rmd', '.aux').replace('.rmd', '.aux')
        if RENAME_PLOT:
            chunk_label = os.path.splitext(os.path.basename(filename))[0]
            logger.debug('Chunk label: %s', chunk_label)
            robjects.r('''
opts_knit$set(unnamed.chunk.label="{unnamed_chunk_label}")
render_markdown()
hook_plot <- knit_hooks$get('plot')
knit_hooks$set(plot=function(x, options) hook_plot(paste0("{{filename}}/", x), options))
            '''.format(unnamed_chunk_label=chunk_label))
        knitr.knit(filename, md_filename, quiet=QUIET, encoding=ENCODING)
        # read md file - create a MarkdownReader
        md_reader = readers.MarkdownReader(self.settings)
        content, metadata = md_reader.read(md_filename)
        # remove md file
        if CLEANUP:
            os.remove(md_filename)
        return content, metadata
示例#24
0
def adjust_pvalue(input_path,method):
    ''' Uses R to adjust the pvalues
    '''
    try:
        if method == "None": return
        saved_stdout, saved_stderr = sys.stdout, sys.stderr
        sys.stdout = sys.stderr = open(os.devnull, "w")
        r_script = """
            t5 = as.matrix(read.table(\""""+input_path+"""\",sep="\t", header=T, row.names=1))

            rptemp <- t5

            rp1 <- apply(abs(t5), 2, function(x) {p.adjust(x, \""""+method+"""\")})

            for (i in 1:nrow(t5)){
              for (j in (1:ncol(t5))) {
                if (rptemp[i,j] < 0) { rp1[i,j] <- -1*rp1[i,j]}
              }
            }
            write.table(rp1,\""""+input_path+"""\",sep="\t")
        """
        robjects.r(r_script)
        sys.stdout, sys.stderr = saved_stdout, saved_stderr
        
    except Exception, e:
        logger.error("R CRASHED")
        logger.error(traceback.print_exc())
        logger.error(str(e))
示例#25
0
def adjust_detailed_pvalue(input_path,method):   
    try:
        if method == "None": return
        saved_stdout, saved_stderr = sys.stdout, sys.stderr
        sys.stdout = sys.stderr = open(os.devnull, "w")
        r_script = """
            t5 <- as.data.frame(read.table(\""""+input_path+"""\", sep="\t", header=TRUE))

            rp1 <- p.adjust(abs(t5$P.value))
            for (i in 1:length(t5$P.value)) {
              if(t5$P.value[i] < 0) {
                rp1[i] <- -1*rp1[i]
              } 
            }

            t5 <- cbind(t5, P.adj=rp1)

            write.table(t5, \""""+input_path+"""\", sep="\t", row.names=F,quote=F)

        """
        robjects.r(r_script)
        sys.stdout, sys.stderr = saved_stdout, saved_stderr
        
    except Exception, e:
        logger.error("R CRASHED")
        logger.error(traceback.print_exc())        
        logger.error(str(e))
示例#26
0
    def testRS4Auto_Type(self):
        robjects.r("library(stats4)")

        class MLE(robjects.methods.RS4):
            __metaclass__ = robjects.methods.RS4Auto_Type
            __rname__ = "mle"
            __rpackagename__ = "stats4"
示例#27
0
def save_rdata(ids, model_name, filename):
    """save data from model and one level of joined data (one2one, one2many, many2many and many2one)"""
    
    list_map = {}
    add_to_map( set(ids), model_name, list_map ) 

    df = {}

    tmpdir = os.path.dirname(filename)
    imagedir = tmpdir + "/images"
    os.mkdir(imagedir)

    for mod_name, id_list in list_map.iteritems():
        model = Pool().get(mod_name)
        records = model.search([('id', 'in', list(id_list))])
        fields_info = [FieldInfo(name, ttype._type)
                           for name,ttype in model._fields.iteritems()
                           if  ttype._type in py2r]
        df[mod_name] = dataframe(records, fields_info)
        print "saving in Rdata: ", mod_name, list(id_list)
        """ save images """        
        for name,ttype in model._fields.iteritems():
            if ttype._type == "binary" and name[-4:] == "_map":
                for record in records:
                    value = getattr(record, name)
                    imgpath = os.path.join(imagedir, (str(record)+'_'+name).replace(',','_').replace('.','_')+'.png')
                    print "SAVING ", imgpath
                    imgfile = open(imgpath,'wb')
                    imgfile.write(value)        

    for mod_name, dfr in df.iteritems(): 
        robjects.r.assign(mod_name, dfr)
    robjects.r("save(list=c("+','.join(["'"+mod_name+"'" for mod_name, dfr in df.iteritems() ])+
            "), file='"+filename+"')")
示例#28
0
    def testRS4_TypeAccessors(self):
        robjects.r['setClass']("R_A", robjects.r('list(foo="numeric")'))
        robjects.r['setMethod']("length", signature="R_A",
                                definition = robjects.r("function(x) 123"))

        
        class R_A(methods.RS4):
            __metaclass__ = methods.RS4_Type
            __slots__ = ('get_length', 'length')
            __accessors__ = (('length', None,
                              'get_length', False, 'get the length'),
                             ('length', None,
                              None, True, 'length'))
            def __init__(self):
                obj = robjects.r['new']('R_A')
                self.__sexp__ = obj.__sexp__

        class A(R_A):
            __rname__ = 'R_A'


        ra = R_A()
        self.assertEquals(123, ra.get_length()[0])
        self.assertEquals(123, ra.length[0])

        a = A()
        self.assertEquals(123, a.get_length()[0])
        self.assertEquals(123, a.length[0])
示例#29
0
    def testRS4_TypeAccessors(self):
        robjects.r["setClass"]("R_A", robjects.r('list(foo="numeric")'))
        robjects.r["setMethod"]("length", signature="R_A", definition=robjects.r("function(x) 123"))

        class R_A(methods.RS4):
            __metaclass__ = methods.RS4_Type
            __slots__ = ("get_length", "length")
            __accessors__ = (
                ("length", None, "get_length", False, "get the length"),
                ("length", None, None, True, "length"),
            )

            def __init__(self):
                obj = robjects.r["new"]("R_A")
                self.__sexp__ = obj.__sexp__

        class A(R_A):
            __rname__ = "R_A"

        ra = R_A()
        self.assertEqual(123, ra.get_length()[0])
        self.assertEqual(123, ra.length[0])

        a = A()
        self.assertEqual(123, a.get_length()[0])
        self.assertEqual(123, a.length[0])
示例#30
0
 def updateRObjects(self):
     splayers = currentRObjects()
     for widget in self.widgets:
         if isinstance(widget, SpComboBox) \
         or isinstance(widget, SpListWidget):
             sptypes = widget.spTypes()
             for sptype in sptypes:
                 for layer in splayers.keys():
                     if splayers[layer] == sptype.strip() \
                     or sptype.strip() == "all":
                         value = layer
                         widget.addItem(value)
                     if splayers[layer] in VECTORTYPES \
                     and (sptype.strip() == "data.frame" \
                     or sptype.strip() == "all"):
                         value = layer+"@data"
                         widget.addItem(value)
                     if splayers[layer] in VECTORTYPES \
                     or splayers[layer] == "data.frame":
                         for item in list(robjects.r('names(%s)' % (layer))):
                             if splayers[layer] == "data.frame":
                                 value = layer+"$"+item
                             else:
                                 value = layer+"@data$"+item
                             if str(robjects.r('class(%s)' % (value))[0]) == sptype.strip() \
                             or sptype.strip() == "all":
                                 widget.addItem(value)
示例#31
0
import os
from IPython.display import Image
import rpy2.robjects as robjects
import pandas as pd
from rpy2.robjects import pandas2ri
from rpy2.robjects import default_converter
from rpy2.robjects.conversion import localconverter

read_delim = robjects.r('read.delim')
seq_data = read_delim('sequence.index', header=True, stringsAsFactors=False)
#In R:
# seq.data <- read.delim('sequence.index', header=TRUE,
#stringsAsFactors=FALSE)
def init_r_system():
    r('require("missForest")')
    r('require("MICE")')
    r('require("EMB")')
    r('require("Amelia")')
    r('require("matrix_completion")')
    r('require("softImpute")')
示例#33
0
def pca_outliers(adata, min_genes_per_cell=5, verbose=True):
    """
    Function to filter outliers using scater PCA on quality measures
    """
    import numpy as np
    import rpy2.robjects as ro
    import anndata2ri
    import scanpy as sc
    from rpy2.robjects import pandas2ri
    from scipy.sparse import issparse
    import rpy2.rinterface_lib.callbacks
    import logging
    if not verbose:
        rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

    ro.r('library(scater)')

    pandas2ri.activate()
    anndata2ri.activate()

    print("Loading objects into R")
    if issparse(adata.X):
        ro.globalenv['rawMatrix'] = adata.X.T.todense()
    else:
        ro.globalenv['rawMatrix'] = adata.X.T
    ro.globalenv['variables'] = adata.var_names.copy()
    ro.globalenv['observations'] = adata.obs[['total_counts']]

    print('Calculate PCA outliers')

    ro.r('')
    ro.r('pd <- DataFrame(data = observations)')
    ro.r('colnames(rawMatrix) <- rownames(pd)')
    ro.r('rownames(rawMatrix) <- variables')
    ro.r(
        'sce <- SingleCellExperiment(assays = list(counts = as.matrix(rawMatrix) ), colData = pd)'
    )
    ro.r('sce <- calculateQCMetrics(sce)')
    ro.r('sce <- runPCA(sce, use_coldata = TRUE, detect_outliers = TRUE)')
    ro.r('cat("Nr of outliers detected:", sum(sce$outlier), sep=" ")')
    ro.r('outlier2 = sce@colData@rownames[sce$outlier]')
    ro.r(
        'plotReducedDim(sce, use_dimred="PCA", shape_by = "outlier", size_by = "total_counts", colour_by = "total_features_by_counts")'
    )

    outlier2 = ro.r('outlier2')
    adata = adata[np.invert(np.in1d(adata.obs_names, outlier2))].copy()
    sc.pp.filter_genes(adata, min_cells=min_genes_per_cell)

    return adata
示例#34
0
def pca_covariates(adata, covariates=['total_counts'], verbose=False):
    """
    Function to output R^2 of covariates against PCA projection
    """
    import numpy as np
    import pandas as pd
    import rpy2.robjects as ro
    import anndata2ri
    import scanpy as sc
    from rpy2.robjects import pandas2ri
    from scipy.sparse import issparse
    import rpy2.rinterface_lib.callbacks
    import logging
    if not verbose:
        rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
    import seaborn as sns
    import matplotlib.pyplot as plt

    ro.r('library(scater)')

    pandas2ri.activate()
    anndata2ri.activate()

    print("Loading objects into R")
    if issparse(adata.X):
        ro.globalenv['rawMatrix'] = np.log1p(adata.X.T.todense())
    else:
        ro.globalenv['rawMatrix'] = np.log1p(adata.X.T)
    ro.globalenv['observations'] = adata.obs[covariates]

    print('Calculate PCA covariates')

    ro.r('pd <- DataFrame(data = observations)')
    #ro.r('print(pd[1:5,])')
    ro.r('colnames(rawMatrix) <- rownames(pd)')
    ro.r(
        'sce <- SingleCellExperiment(assays = list(counts = as.matrix(rawMatrix) ), colData = pd)'
    )
    commandString = 'getVarianceExplained(sce, exprs_values = "counts", variables = c('
    variables = ['"data.' + i + '"' for i in covariates]
    commandString = commandString + ','.join(variables) + ') )'
    print("using the R command")
    print(commandString)
    vals = ro.r(commandString)
    medians = np.argsort(-np.median(vals, 0))
    medianVals = -np.sort(-np.median(vals, 0))
    vals = pd.DataFrame(vals[:, medians])
    #print(covariates)
    #print(medians)
    vals.columns = np.asarray(covariates)[medians]
    plt.rcParams['figure.figsize'] = (8, 8)
    f, ax = plt.subplots(1)
    for nn, mm in zip(vals.columns, medianVals):
        sns.kdeplot(vals[nn], ax=ax, label=nn, clip=(mm, 97), gridsize=100)
    ax.set_xscale("symlog")
    #plt.xlim(0,100)
    ax.legend(title="Covariates", loc='best')

    adata.uns['pca_covariates'] = vals

    return adata
示例#35
0
# %%
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import sys

sys.path.append("../")
from fumipo_stat.util import py2r

base = importr('base')

ro.r('print("Hello R")')

ro.r("x <- c(0, 1, 2, 3, 4)")
ro.r("y <- c(0, 1, 0, 2, 3)")
result = ro.r("lm(y~x)")
print(result)
print(base.summary(result))
assert list(base.summary(result).rx(4)[0].names.rx2(1)) == ["(Intercept)", "x"]
assert list(base.summary(result).rx(4)[0].names.rx2(2)) == [
    "Estimate", "Std. Error", "t value", "Pr(>|t|)"
]
assert list(base.summary(result).rx(4)[0].rownames) == ["(Intercept)", "x"]

# %%
import numpy as np
import pandas as pd
from rpy2.robjects import pandas2ri, numpy2ri
from rpy2.robjects.conversion import localconverter

ro.r("x <- c(0, 1, 2, 3, 4)")
ro.r("y <- c(0, 1, 0, 2, 3)")
示例#36
0
def load_edgeR():
    """
    Load edgeR library into R.
    """
    robj.r("library(edgeR)")
示例#37
0
from collections import OrderedDict
import pandas as pd

output = sys.argv[1]

softwares = OrderedDict()

# TODO: need to replace hard coded software with retrieving ones in the toolsinfo

softwares["Pipeline"] = 'r20160208'
skewer_ver =  subprocess.check_output('echo `/mnt/software/skewer/skewer -v`', shell=True).decode("utf-8")
softwares["skewer"]=re.split("[: ]+", skewer_ver)[2] #filter(None, re.split("[: ]+", skewer_ver))[2]
star_ver = subprocess.check_output("/mnt/software/STAR-dir/STAR-2.4.2a/STAR --version", stderr=subprocess.STDOUT, shell=True)
softwares["STAR"] = star_ver.split("_")[1]
rsem_ver = subprocess.check_output("/mnt/software/rsem-dir/rsem-1.2.22/rsem-calculate-expression -version", shell=True)
softwares["RSEM"] = rsem_ver.split()[-1]

version = robjects.r("""
function (p) {
     paste(packageVersion(p),collapse=".")
}""")
softwares["R"] = list(version("base"))[0]
softwares["EdgeR"] = list(version("edgeR"))[0]
softwares["EBSeq"] = list(version("EBSeq"))[0]

softwares_df = pd.DataFrame(softwares, index = [0]).T
softwares_df.columns=['Version']
softwares_html = softwares_df.to_html(classes="table table-bordered table-hover", escape=False)
with open(output, 'w') as f_out:
    f_out.write(softwares_html)
示例#38
0
def gaussian_setup(X, Y, run_CV=True):
    """

    Some calculations that can be reused by methods:
    
    lambda.min, lambda.1se, lambda.theory and Reid et al. estimate of noise

    """
    n, p = X.shape

    Xn = X / np.sqrt((X**2).sum(0))[None, :]

    numpy2ri.activate()
    rpy.r.assign('X', X)
    rpy.r.assign('Y', Y)
    numpy2ri.deactivate()
    rpy.r('X=as.matrix(X)')
    rpy.r('Y=as.numeric(Y)')

    l_theory = np.fabs(Xn.T.dot(np.random.standard_normal(
        (n, 500)))).max(1).mean() * np.ones(p)

    if run_CV:
        numpy2ri.activate()
        rpy.r.assign('X', X)
        rpy.r.assign('Y', Y)
        rpy.r('X=as.matrix(X)')
        rpy.r('Y=as.numeric(Y)')
        rpy.r('G = cv.glmnet(X, Y, intercept=FALSE, standardize=FALSE)')
        rpy.r(
            'sigma_reid = selectiveInference:::estimate_sigma(X, Y, coef(G, s="lambda.min")[-1]) # sigma via Reid et al.'
        )
        rpy.r("L = G[['lambda.min']]")
        rpy.r("L1 = G[['lambda.1se']]")
        L = rpy.r('L')
        L1 = rpy.r('L1')
        sigma_reid = rpy.r('sigma_reid')[0]
        numpy2ri.deactivate()
        return L * np.sqrt(X.shape[0]) * 1.0001, L1 * np.sqrt(
            X.shape[0]) * 1.0001, l_theory, sigma_reid
    else:
        return None, None, l_theory, None
示例#39
0
def model_fit(model_options, X, y, *args, **kwargs):
    if model_options.model_name == '1':
        clf = sl.Lasso(alpha=model_options.lambda_value)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '2':
        clf = sl.ElasticNet(alpha=model_options.lambda_value,
                            l1_ratio=model_options.ratio_value)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '3':
        clf = sl.Lars(copy_X=True,
                      eps=model_options.lambda_value,
                      fit_intercept=True,
                      fit_path=True,
                      normalize=True,
                      positive=False,
                      precompute='auto',
                      verbose=False)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '4':
        from sklearn.gaussian_process import GaussianProcessRegressor
        clf = GaussianProcessRegressor(kernel=model_options.kernel,
                                       n_restarts_optimizer=3,
                                       random_state=2018)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '5':
        from sklearn.gaussian_process import GaussianProcessRegressor
        clf = GaussianProcessRegressor(kernel=model_options.kernel,
                                       normalize_y='T',
                                       n_restarts_optimizer=3,
                                       random_state=2018)
        clf.fit(X, y[:, 0])
        return clf
    elif model_options.model_name == '6':
        clf = sl.LogisticRegression(penalty=model_options.normSelection,
                                    C=1 / model_options.lambda_value)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '7':
        clf = sl.MultiTaskLasso(alpha=model_options.lambda_value)
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '8':
        X_input = Input(model_options.input_shape)

        X_ = ZeroPadding2D((3, 3))(X_input)
        X_ = Conv2D(32, (7, 7), strides=(1, 1), name='conv0')(X_)
        X_ = BatchNormalization(axis=3, name='bn0')(X_)
        X_ = Activation('relu')(X_)
        X_ = MaxPooling2D((2, 2), name='max_pool')(X_)
        X_ = Flatten()(X_)
        X_ = Dense(1, activation='sigmoid', name='fc')(X_)

        clf = Model(inputs=X_input, outputs=X_)

        clf.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
        clf.fit(X,
                y,
                epochs=20,
                batch_size=50,
                verbose=1,
                validation_data=(model_options.X_train_valid,
                                 model_options.y_train_valid))
        return clf
    elif model_options.model_name == '9':
        from statsmodels.tsa.ar_model import AR
        clf = AR(X).fit(maxlag=int(model_options.lambda_value))
        return clf
    elif model_options.model_name == '10':
        clf = robjects.r(
            '''mod1 = cubist(x = trainx, y = trainy, committees = 10)''')
        return clf
    elif model_options.model_name == '11':
        # Create a tensor Regressor estimator
        if model_options.tensorReg_type == '1':
            clf = KruskalRegressor(weight_rank=model_options.rank + 1,
                                   tol=10e-7,
                                   n_iter_max=100,
                                   reg_W=1,
                                   verbose=0)
        elif model_options.tensorReg_type == '2':
            clf = TuckerRegressor(
                weight_ranks=[model_options.rank + 1, model_options.rank + 1],
                tol=10e-7,
                n_iter_max=100,
                reg_W=1,
                verbose=0)
        # Fit the estimator to the data
        clf.fit(X, y)
        return clf
    elif model_options.model_name == '12':
        Z = kwargs.get('Z', None)
        clf = InsituEnsemble(model_options.lambda_value, X, y, Z)
        return clf
    elif model_options.model_name == '13':
        from sklearn import svm
        clf = svm.SVC(C=model_options.lambda_value)
        clf.fit(X, y)
        return clf
示例#40
0
## A script for extracting info about the patients used in the analysis

## Load necessary modules

from rpy2 import robjects as ro
import numpy as np
import os
ro.r('library(survival)')
import re

##This call will only work if you are running python from the command line.
##If you are not running from the command line manually type in your paths.
BASE_DIR = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.dirname(
        os.path.abspath(__file__)))))

## There were three clinical files with nonredundant data.  V4.0 is in general the most uptodate, but it is possible
## for data in the other files to be more uptodate.  As a result, clinical data will be merged.

f = open(
    os.path.join(BASE_DIR, 'tcga_data', 'UCEC', 'clinical',
                 'nationwidechildrens.org_clinical_follow_up_v4.0_ucec.txt'))
##get the column indexes needed
columns = f.readline().split('\t')
patient_column = columns.index('bcr_patient_barcode')
alive_column = columns.index('last_contact_days_to')
death_column = columns.index('death_days_to')
f.readline()
f.readline()
data = [i.split('\t') for i in f]
## A patient can be listed multiple times in the file. The most recent listing (furthest down in the file), contains the most recent
    def run(self):
        """
        Run the regression using R
        """
        # Source R script to define the function
        import rpy2.robjects as ro
        from rpy2.robjects import pandas2ri
        from .r_code.r_utilities import ewasresult2py, df_pandas2r

        r_code_folder = Path(__file__).parent / "r_code"
        filename = str(r_code_folder / "ewas_r.R")
        ro.r.source(filename)

        # Print warnings as they occur
        ro.r("options(warn=1)")

        # Lists of regression variables (NULL if empty)
        bin_vars = ro.StrVector(self.regression_variables["binary"])
        cat_vars = ro.StrVector(self.regression_variables["categorical"])
        cont_vars = ro.StrVector(self.regression_variables["continuous"])
        if len(bin_vars) == 0:
            bin_vars = ro.NULL
        if len(cat_vars) == 0:
            cat_vars = ro.NULL
        if len(cont_vars) == 0:
            cont_vars = ro.NULL

        # Lists of covariates (NULL if empty)
        dtypes = _get_dtypes(self.data)
        bin_covars = ro.StrVector(
            [v for v in self.covariates if (dtypes.loc[v] == "binary")]
        )
        cat_covars = ro.StrVector(
            [v for v in self.covariates if (dtypes.loc[v] == "categorical")]
        )
        cont_covars = ro.StrVector(
            [v for v in self.covariates if dtypes.loc[v] == "continuous"]
        )
        if len(bin_covars) == 0:
            bin_covars = ro.NULL
        if len(cat_covars) == 0:
            cat_covars = ro.NULL
        if len(cont_covars) == 0:
            cont_covars = ro.NULL

        # Allow nonvarying covariates by default to match python ewas (warn instead of error)
        allowed_nonvarying = ro.StrVector(self.covariates)

        # Run with or without survey design info
        if self.survey_design_spec is None:
            # Reset the index on data so that the first column is "ID" (note 'data' becomes a local variable)
            data = self.data.reset_index(drop=False)
            data = data[
                [
                    "ID",
                ]
                + [c for c in data.columns if c != "ID"]
            ]

            with ro.conversion.localconverter(
                ro.default_converter + pandas2ri.converter
            ):
                data_r = df_pandas2r(data)
                result = ro.r.ewas(
                    d=data_r,
                    bin_vars=bin_vars,
                    cat_vars=cat_vars,
                    cont_vars=cont_vars,
                    y=self.outcome_variable,
                    bin_covars=bin_covars,
                    cat_covars=cat_covars,
                    cont_covars=cont_covars,
                    regression_family=self.family,
                    allowed_nonvarying=allowed_nonvarying,
                    min_n=self.min_n,
                )
        else:
            # Merge weights into data and get weight name(s) (Note 'data' becomes a local variable)
            if self.survey_design_spec.single_weight:
                weights = self.survey_design_spec.weight_name
                data = pd.merge(
                    self.data,
                    self.survey_design_spec.weight_values,
                    left_index=True,
                    right_index=True,
                    how="left",
                )
            elif self.survey_design_spec.multi_weight:
                weights = self.survey_design_spec.weight_names
                data = pd.merge(
                    self.data,
                    pd.DataFrame(self.survey_design_spec.weight_values),
                    left_index=True,
                    right_index=True,
                    how="left",
                )
            else:
                raise ValueError("Weights must be provided")
            # Gather optional parts of survey parameters
            kwargs = dict()
            # Cluster IDs
            if self.survey_design_spec.has_cluster:
                kwargs["ids"] = f"{self.survey_design_spec.cluster_name}"
                data[
                    self.survey_design_spec.cluster_name
                ] = self.survey_design_spec.cluster_values
            else:
                kwargs["ids"] = ro.NULL
            # Strata
            if self.survey_design_spec.has_strata:
                kwargs["strata"] = f"{self.survey_design_spec.strata_name}"
                data[
                    self.survey_design_spec.strata_name
                ] = self.survey_design_spec.strata_values
            # fpc
            if self.survey_design_spec.has_fpc:
                kwargs["fpc"] = f"{self.survey_design_spec.fpc_name}"
                data[
                    self.survey_design_spec.fpc_name
                ] = self.survey_design_spec.fpc_values_original

            # Single cluster setting
            ro.r(
                f'options("survey.lonely.psu"="{self.survey_design_spec.single_cluster}")'
            )

            # Reset the index on data so that the first column is "ID"
            data = data.reset_index(drop=False)
            data = data[
                [
                    "ID",
                ]
                + [c for c in data.columns if c != "ID"]
            ]

            with ro.conversion.localconverter(
                ro.default_converter + pandas2ri.converter
            ):
                data_r = df_pandas2r(data)

                if self.survey_design_spec.multi_weight:
                    # Must convert python dict of var:weight name to a named list in R
                    weights = ro.ListVector(weights)

                result = ro.r.ewas(
                    d=data_r,
                    bin_vars=bin_vars,
                    cat_vars=cat_vars,
                    cont_vars=cont_vars,
                    y=self.outcome_variable,
                    bin_covars=bin_covars,
                    cat_covars=cat_covars,
                    cont_covars=cont_covars,
                    regression_family=self.family,
                    allowed_nonvarying=allowed_nonvarying,
                    min_n=self.min_n,
                    weights=weights,
                    subset=self.survey_design_spec.subset_array,
                    drop_unweighted=self.survey_design_spec.drop_unweighted,
                    **kwargs,
                )

        result = ewasresult2py(result)

        # Ensure correct dtypes (float may be objects if the are all NaN)
        float_cols = [
            "Beta",
            "SE",
            "Variable_pvalue",
            "LRT_pvalue",
            "Diff_AIC",
            "pvalue",
        ]
        result[float_cols] = result[float_cols].astype("float64")

        self.result = result.reset_index(drop=False)
        self.run_complete = True
示例#42
0
import numpy as np
import pandas as pd

# Rpy

import rpy2.robjects as rpy
from rpy2.robjects import numpy2ri

rpy.r(
    'suppressMessages(library(selectiveInference)); suppressMessages(library(knockoff))'
)  # R libraries we will use

rpy.r("""
estimate_sigma_data_splitting  = function(X,y, verbose=FALSE){
  nrep = 10
  sigma_est = 0
  nest = 0
  for (i in 1:nrep){
    n=nrow(X)
    m=floor(n/2)
    subsample = sample(1:n, m, replace=FALSE)
    leftover = setdiff(1:n, subsample)
    CV = cv.glmnet(X[subsample,], y[subsample], standardize=FALSE, intercept=FALSE, family="gaussian")
    beta_hat = coef(CV, s="lambda.min")[-1]
    selected = which(beta_hat!=0)
    if (verbose){
      print(c("nselected", length(selected)))
    }
    if (length(selected)>0){
      LM = lm(y[leftover]~X[leftover,][,selected])
      sigma_est = sigma_est+sigma(LM)
示例#43
0
    def __call__(self, **kw):
        if kw.get('input_type') == 'Table':
            filename = kw.get('table')
            assert os.path.exists(
                str(filename)), "File not found: '%s'" % filename
            robjects.r("""
Mdata = read.delim('%s',row.names=1)
conds = sapply(strsplit(colnames(Mdata),".",fixed=T),"[[",1)
""" % filename)
            conds = robjects.r("conds").rx()
        else:
            from QuantifyTable import QuantifyTablePlugin
            assembly = genrep.Assembly(kw.get('assembly'))
            chrmeta = assembly.chrmeta or "guess"
            kw['score_op'] = 'sum'
            signals1 = kw['Group1']['signals1']
            signals2 = kw['Group2']['signals2']
            if not isinstance(signals1, (list, tuple)): signals1 = [signals1]
            if not isinstance(signals2, (list, tuple)): signals2 = [signals2]
            signals = signals1 + signals2
            kw['SigMulti'] = {
                'signals': signals
            }  # to pass it to QuantifyTable plugin
            table = QuantifyTablePlugin().quantify(**kw)
            stracks = []
            norm_factors = []
            for sig in signals:
                assert os.path.exists(
                    str(sig)), "Signal file not found: '%s'." % sig
                _t = track(sig, chrmeta=chrmeta)
                if 'normalization' in _t.info:
                    _nf = float(_t.info['normalization'])
                elif 'nreads' in _t.info:
                    _nf = float(_t.info['nreads']) * 1e-7 / float(
                        _t.info.get('read_extension', 1))
                else:
                    _nf = 1
                stracks.append(_t)
                norm_factors.append(_nf)
            t = track(table,chrmeta=chrmeta,fields=['chr','start','end','name']+ \
                                                   ['score%d'%x for x in range(len(signals))])
            _f = [f for f in t.fields if f.startswith('score')]
            de_list = list(t.read(fields=['name'] + _f))
            t.close()
            os.remove(table)
            # Turn all scores into integers
            de_matrix = numpy.asarray([[
                int(float(s) * norm_factors[k] + .5)
                for k, s in enumerate(x[1:])
            ] for x in de_list],
                                      dtype=numpy.float)
            rownames = numpy.asarray([x[0] for x in de_list])
            colnames = numpy.asarray([s.name for s in stracks])
            # if all prefixes are identical within a group, keep this prefix as group identifier.
            if len(list(set( [x.split('.')[0] for x in colnames[:len(signals1)]] ))) == 1 \
                    and len(list(set( [x.split('.')[0] for x in colnames[len(signals1):]] ))) == 1:
                group1 = colnames[0].split('.')[0]
                group2 = colnames[-1].split('.')[0]
            else:
                group1 = "Group1"
                group2 = "Group2"
            conds = [group1] * len(signals1) + [group2] * len(signals2)
            robjects.r.assign('Mdata', numpy2ri(de_matrix))
            robjects.r.assign('row_names', robjects.StrVector(rownames))
            robjects.r.assign('col_names', robjects.StrVector(colnames))
            robjects.r.assign('conds', robjects.StrVector(conds))
            robjects.r("""
Mdata = as.data.frame(Mdata,row.names=row_names)
colnames(Mdata) = col_names
""")

        robjects.r("""
library(DESeq)
if (all(table(conds)>=3)){        # if >3 replicates in all conditions
    method = 'per-condition'        # for each group estimate the variance from its replicates
    sharingMode = 'gene-est-only'   # use the per-gene variance estimates only
} else if (any(table(conds)>1)){ # if few replicates
    method = 'pooled'               # use all groups with replicates to estimate the variance
    sharingMode = 'maximum'         # use the max of the GLM fit and the estimated variance
} else {                         # if no replicates
    method = 'blind'                # pools all groups together to estimate the variance
    sharingMode='fit-only'          # use only the GLM fit across the pooled variance
}
cds = newCountDataSet(Mdata, conds)
cds = estimateSizeFactors(cds)
test = try({
    cds = estimateDispersions(cds, method=method, fitType='parametric', sharingMode=sharingMode)
})
if(class(test) == "try-error") {
    cds = estimateDispersions(cds, method=method, fitType='local', sharingMode=sharingMode)
}
""")

        groups = list(set(conds))
        couples = itertools.combinations(groups, 2)
        output = self.temporary_path(fname='DE')
        for c in couples:
            out = "%s_%s-%s.txt" % ((output, ) + tuple(c))
            robjects.r("""
res = nbinomTest(cds, '%s', '%s')
write.table(res[order(res[,8]),], '%s', row.names=F, quote=F, sep='\t')
            """ % (c[0], c[1], out))
            if kw.get('complete') is None:
                clean = self.clean_deseq_output(out, c)
                shutil.move(clean, out)
            self.new_file(out, 'differential_expression')
        return self.display_time()
示例#44
0
                                           initial_size=N_Xpreb,
                                           population_id=0)
    ]
    if debug:
        dd = msprime.DemographyDebugger(
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events)
        dd.print_history()
        return
    return population_configurations, migration_matrix, demographic_events


#Calculate fROH from empirical data
rread_command = "read.table(file = \"" + eroh_file + "\", header = T)"
r_df = robjects.r(rread_command)
pd_df = pd.DataFrame.from_dict(
    {key: np.asarray(r_df.rx2(key))
     for key in r_df.names})
#calculate croh for each individual
ecroh_pd_df = pd_df.groupby("IID").sum()
#calc avg fROH for simulated data
eFROH = mean(ecroh_pd_df['KB'] * 1000 / len_chr)
print("Empirical FROH is " + str(eFROH))

#Find the number of variants in empirical data
for line in open(sroh_file):
    if "out of" in line:
        evariants = int(line.split(" ", 1)[0])
        print("%d variants in empirical dataset" % (evariants))
示例#45
0
    from rpy2.robjects.packages import importr
    from rpy2.robjects import Formula, Environment
    import rpy2.robjects as ro
    from rpy2.robjects import FloatVector, ListVector, IntVector, StrVector, NULL
    stats = importr('stats')
    base = importr('base')

    # Create matrix in R
    v = ro.FloatVector([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
    m = ro.r.matrix(v, nrow=2)
    m = ro.r['matrix'](v, nrow=2)

    ctl = FloatVector(
        [4.17, 5.58, 5.18, 6.11, 4.50, 4.61, 5.17, 4.53, 5.33, 5.14])
    trt = FloatVector(
        [4.81, 4.17, 4.41, 3.59, 5.87, 3.83, 6.03, 4.89, 4.32, 4.69])
    group = base.gl(2, 10, 20, labels=["Ctl", "Trt"])
    weight = ctl + trt
    ro.globalenv["weight"] = weight
    ro.globalenv["group"] = group
    lm_D9 = stats.lm("weight ~ group")
    print(stats.anova(lm_D9))
    lm_D90 = stats.lm("weight ~ group - 1")
    print(base.summary(lm_D90))
    res = ro.StrVector(['abc', 'def'])
    v = ro.FloatVector([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
    m = ro.r['matrix'](v, nrow=2)
    letters = ro.r['letters']
    rcode = 'paste(%s, collapse="-")' % (letters.r_repr())
    res = ro.r(rcode)
示例#46
0
def setup():
    r("install.packages('batchmeans', repos='http://cran.us.r-project.org')")
    r.require('batchmeans')
示例#47
0
Python binding for the copula function from R using Rpy.
"""

from __future__ import division

import numpy as np
import statistics as st
from scipy.interpolate import interp1d
from scipy.stats import kendalltau, pearsonr, spearmanr
from stats import scoreatpercentile

from rpy2.robjects import r
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
r("library('copula')")

class Copula():
    """
    This class estimate parameter of copula
    generate joint random variable for the parameters
    This class has following three copulas:
        Clayton
        Frank
        Gumbel
        
    Example:
        x = np.random.normal(size=20)
        y = np.random.normal(size=20)
        foo = Copula(x, y, 'frank')
        u,v = foo.generate_uv(1000)
示例#48
0
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

#Let's start some modelling! Firstly let's run some
#shit here to figoure out the p and q values in our
#ARIMA model
lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()

#furkin Auto-Arima this shiznit
pandas2ri.activate()
ro.r('install.packages("forecast")')
ro.r('library(forecast)')
rdf = pandas2ri.py2ri(ts_log_diff)
ro.globalenv['r_timeseries'] = rdf
pred = ro.r('as.data.frame(forecast(auto.arima(r_timeseries),h=5))')
pred
示例#49
0
def execute(snplist, chr):
    out = open(options.o + ".dist", "w")
    count1, count2, count = 0, 0, 1
    rhash, alist, blist = [], [], []
    ## loop through first SNP
    for i in snplist:
        if count % (float(options.r) / 10) == 0:
            print count, "positions processed, now at position:", i.rstrip()
        count += 1
        pos = int(i.split()[1])
        ## loop through second SNP
        for j in snplist[count1:]:
            pos1 = int(j.split()[1])
            ## get allele frequencies
            if pos1 <= pos:
                continue
            afhash = doubleallelecodes(i, j, individual)
            if afhash == "NA":
                print "afhash", i, j
                continue
            ## calculate r2 from Allele frequencies
            fi, fj, ftotal, allele1, allele2 = afhash
            rsq = rsquared(fi, fj, ftotal)
            if rsq == "NA":
                print "rsquared", fi, fj
                continue
            rhash.append(rsq)
            ##store positions of SNP1
            alist.append(pos)
            ##store positions of SNP2
            blist.append(pos1)
            #print inversion,rsq,pos,pos1
            out.write(
                "\t".join(map(str, [chr, pos, pos1, rsq, allele1, allele2])) +
                "\n")
        #print count1,i.rstrip()
        count1 += 1

    #### make x-axis labels based on the assumption that the labels go from 0-1 and now you need to scale the whole genome relative to these borders
    binlist, labellist = [], []
    ##last SNP pos
    upper = max(alist + blist)
    ## first SNP pos
    lower = min(alist + blist)
    ## make sure that there is at least ONE SNP
    if upper - lower != 0:
        ## here caluclate the relative step of one basepair
        step = 1 / float(upper - lower)
        invcoo = inversionrect(chr, step)
        ## set step size to 2mb
        co = 2000000
        ## this is the stepsize between the ticks
        stepsize = co * step
        ## this is the start position
        start = (co - lower) * step
        ## bin the steps in a list
        binlist.append(start)
        labellist.append(str(co / 1000000) + "mb")
        co += 2000000
        start += stepsize
        ## append ticks until the step is larger than the position of the last SNP
        while (co < upper):
            labellist.append(str(co / 1000000) + "mb")
            binlist.append(start)
            co += 2000000
            start += stepsize

        ## convert python to R
        cp = robjects.vectors.FloatVector(rhash)
        al = robjects.vectors.IntVector(alist)
        bl = robjects.vectors.IntVector(blist)
        bins = robjects.vectors.FloatVector(binlist)
        labels = robjects.vectors.StrVector(labellist)
        r.assign('values', cp)
        r.assign('al', al)
        r.assign('bl', bl)
        r.assign('bins', bins)
        r.assign('labels', labels)
        ## open graphics device and load libraries
        r('library("LDheatmap")')
        r('png("' + options.o + "_" + chr + '.png",width=5000,height=5000)')
        ##convert distance list to distance matrix
        r('x.names <- sort(unique(c(al, bl)))')
        r('x.dist <- matrix(0, length(x.names), length(x.names))')
        r('dimnames(x.dist) <- list(x.names, x.names)')
        r('x.ind <- rbind(cbind(match(al, x.names), match(bl, x.names)),cbind(match(bl, x.names), match(al, x.names)))'
          )
        r('x.dist[x.ind] <- rep(values, 2)')
        #print r('t(arev(x.dist))')
        ## make LDHeatmap grid object based on the r2 values. Use the topo color palette and put the Chromosome and Inversion in the title. Also print the number of SNPs used.Rotate the whole heatmap by 270 degrees.
        r('M<-LDheatmap(x.dist,sort(unique(c(al, bl)),decreasing=F),color=topo.colors(20),flip=T,geneMapLabelX=10000,title="")'
          )
        ## add an X-Axis above heatmap and use the labels generated above
        r('la<-LDheatmap.addGrob(M, grid.xaxis(label=labels,at=bins,main=F,gp=gpar(cex=10),name="axis"),height=0)'
          )
        ## add inversion breakpoints
        if invcoo != "NA":
            invcount = 0
            alphabet = ["a", "b", "c", "d", "e", "f", "g", "h"]
            for coord in invcoo:
                #print coord
                ## add red line for the inversion boundaries
                r('l' + alphabet[invcount + 1] + '<-LDheatmap.addGrob(l' +
                  alphabet[invcount] + ', grid.lines(x=c(' + str(coord[0]) +
                  ',' + str(coord[1]) + '),y=' + str(1.1 +
                                                     (invcount / float(5))) +
                  ',gp=gpar( lwd=8,col="red")),height=' +
                  str(0.1 + (invcount / float(500))) + ')')
                ## add label for the inversion
                r('l' + alphabet[invcount + 2] + '<-LDheatmap.addGrob(l' +
                  alphabet[invcount + 1] + ', grid.text("' + str(coord[2]) +
                  '",x=' + str(coord[0]) + ',y=' + str(1.3 +
                                                       (invcount / float(5))) +
                  ',gp = gpar(cex = 5)),height=' +
                  str(0.1 + (invcount / float(500))) + ')')
                invcount += 2
        ## make everything white.
        r('grid.edit("axis", gp = gpar(col = "white"))')
        ## and then just make the ticks and the labels black
        r('grid.edit(gPath("axis", "labels"), gp = gpar(col = "black"))')
        r('grid.edit(gPath("axis", "ticks"), gp = gpar(col = "black",lwd=4))')
        ## resize the linewidth of the segments
        r('grid.edit(gPath("geneMap", "segments"), gp = gpar(lwd = 0.2))')
        ## increae the size of the color key labels
        r('grid.edit("Key",  gp = gpar(cex = 8))')
        ## increase the size of the title
        #r('grid.edit(gPath("heatMap", "title"), gp = gpar(cex=0))')

        r('dev.off()')
示例#50
0
def test_anova_r_sleep():
    "Test ANOVA accuracy by comparing with R (sleep dataset)"
    from rpy2.robjects import r

    # "sleep" dataset
    print r('data(sleep)')
    ds = Dataset.from_r('sleep')
    ds['ID'].random = True

    # independent measures
    aov = test.ANOVA('extra', 'group', ds=ds)
    fs = run_on_lm_fitter('extra', 'group', ds)
    print r('sleep.aov <- aov(extra ~ group, sleep)')
    print r('sleep.summary <- summary(sleep.aov)')
    r_res = r['sleep.summary'][0]
    assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0])

    # repeated measures
    aov = test.ANOVA('extra', 'group * ID', ds=ds)
    fs = run_on_lm_fitter('extra', 'group * ID', ds)
    print r('sleep.aov <- aov(extra ~ group + Error(ID / group), sleep)')
    print r('sleep.summary <- summary(sleep.aov)')
    r_res = r['sleep.summary'][1][0]
    assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0])

    # unbalanced (independent measures)
    ds2 = ds[1:]
    print r('sleep2 <- subset(sleep, (group == 2) | (ID != 1))')
    aov = test.ANOVA('extra', 'group', ds=ds2)
    fs = run_on_lm_fitter('extra', 'group', ds2)
    print r('sleep2.aov <- aov(extra ~ group, sleep2)')
    print r('sleep2.summary <- summary(sleep2.aov)')
    r_res = r['sleep2.summary'][0]
    assert_f_test_equal(aov.f_tests[0], r_res, 0, fs[0])
示例#51
0
import os
import sys
import getopt
import rpy2.robjects.packages as rpackages
import rpy2.robjects as robjects
from itertools import combinations

## GENERATE  R markdown and md according to the config file:
#1.  dego 1v1
#2. dego stages vs
#3. index.md

DATADIR = "../"

robjects.r['source'](os.path.join(DATADIR, "conf/config.R"))  ### load config
names = robjects.r("names(data_src)")
lst_1v1 = list(combinations(names, 2))
stages = robjects.r("stage_lst")
project_name = robjects.r("PROJECT")

seen = set()
u_stages = [x for x in stages
            if x not in seen and not seen.add(x)]  ##remove dup
lst_stages = list(combinations(u_stages, 2))

cluster_use = "seurat_clusters"

try:
    options, args = getopt.getopt(sys.argv[1:], "c:")
except getopt.GetoptError:
    print("Erorr Parametes")
示例#52
0
 def __call__(self, *args, **kwargs):
     return robjects.r(*args, **kwargs)
示例#53
0
def analyze2(lo3):
    value = {
        "중이염": 24,
        "급성심근경색": 1,
        "골수이식": 10,
        "위암": 11,
        "간암": 12,
        "제왕절개": 13,
        "관상동맥우회술": 14,
        "뇌졸중": 2,
        "요양병원": 20,
        "당뇨병": 22,
        "대장암": 23,
        "유방암": 25,
        "폐암": 26,
        "천식": 27,
        "폐질환": 28,
        "폐렴": 29,
        "고혈압": 3,
        "중환자실": 30,
        "혈액투석": 4,
        "정신과": 5,
        "고관절치환술": 7,
        "췌장암": 8,
        "식도암": 9
    }
    num = str(value.get(lo3))

    if lo3 not in value:
        print("평가결과가 없음")
    elif num == 11:
        {
            robj.
            r('table_result<<-data.frame()\n'
              'table_region<<-table_region[order(table_region$item.asmGrd' +
              str(num) + '),]\n'
              'for(i in 1:nrow(table_region)) {\n'
              'if(((as.character(table_region$item.asmGrd' + str(num) +
              '[i])=="등급제외" || as.character(table_region$item.asmGrd' +
              str(num) + '[i])=="평가제외")) &&'
              '(as.character(table_region$item.asmGrd16[i])=="등급제외" || as.character(table_region$item.asmGrd16[i])=="평가제외")))'
              'next\n'
              'table_result<<-rbind.fill(table_result, table_region[i,])}\n'
              'table_result<<-table_result[order(table_result$item.asmGrd' +
              str(num) + '),]\n'
              'print(head(table_result$item.yadmNm.x, 10))\n'
              'write.csv(table_result, file="result.csv", row.names=FALSE)')
        }
    elif num == 12:
        {
            robj.
            r('table_result<<-data.frame()\n'
              'table_region<<-table_region[order(table_region$item.asmGrd' +
              str(num) + '),]\n'
              'for(i in 1:nrow(table_region)) {\n'
              'if(((as.character(table_region$item.asmGrd' + str(num) +
              '[i])=="등급제외" || as.character(table_region$item.asmGrd' +
              str(num) + '[i])=="평가제외")) &&'
              '(as.character(table_region$item.asmGrd15[i])=="등급제외" || as.character(table_region$item.asmGrd15[i])=="평가제외")))'
              'next\n'
              'table_result<<-rbind.fill(table_result, table_region[i,])}\n'
              'table_result<<-table_result[order(table_result$item.asmGrd' +
              str(num) + '),]\n'
              'print(head(table_result$item.yadmNm.x, 10))\n'
              'write.csv(table_result, file="result.csv", row.names=FALSE)')
        }
    else:
        {
            robj.r(
                'table_result<<-data.frame()\n'
                'table_region<<-table_region[order(table_region$item.asmGrd' +
                str(num) + '),]\n'
                'for(i in 1:nrow(table_region)) {\n'
                'if(as.character(table_region$item.asmGrd' + str(num) +
                '[i])=="등급제외" || as.character(table_region$item.asmGrd' +
                str(num) + '[i])=="평가제외")\n'
                'next\n'
                'table_result<<-rbind.fill(table_result, table_region[i,])}\n'
                'table_result<<-table_result[order(table_result$item.asmGrd' +
                str(num) + '),]\n'
                'print(head(table_result$item.yadmNm.x, 10))\n'
                'write.csv(table_result, file="result.csv", row.names=FALSE)')
        }
示例#54
0
from libmetgem import msp
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.rdMolDescriptors import CalcExactMolWt, CalcMolFormula
from rdkit.Chem import AllChem
from DeepFrag.utils import load_model, ms_correlation
from DeepFrag.utils import read_ms, morgan_fp, ms2vec, model_predict, plot_compare_ms
from DeepFrag.loss import pearson, loss
from DeepFrag.annotate import annotate_ms

from pycdk.pycdk import add_formula, subtract_formula, check_formula, getFormulaExactMass
import rpy2.robjects as robjects
import rpy2.robjects.numpy2ri as numpy2ri
numpy2ri.activate()
robjects.r('''source('DeepFrag/metfrag.R')''')
generateFragments = robjects.globalenv['generateFragments']

msp_file = 'RIKEN_PlaSMA/RIKEN_PlaSMA_Pos.msp'
model = load_model('RIKEN_PlaSMA_Pos_10')
pretrain = load_model('simulated_Pos_10V')
result = pd.read_csv('Result/RIKEN_PlaSMA_Pos_10.csv')

# parser dataset
ms = []
smiles = []
energies = []
modes = []
parser = msp.read(msp_file)
for i, (params, data) in enumerate(parser):
    if 'collisionenergy' in params:
示例#55
0
    def instance_methods(self):
        rstring = '''
            function(X, ran_gf) {
                for (gf in ran_gf) {
                    X[[gf]] <- as.factor(X[[gf]])
                }
                return(X)
            }
        '''
        process_ran_gf = robjects.r(rstring)

        rstring = '''
            function(X) {
                for (c in names(X)) {
                    if (is.numeric(X[[c]])) {
                        X[[paste0('z_',c)]] <- scale(X[[c]])
                    }
                }
                return(X)
            }
        '''
        add_z = robjects.r(rstring)

        rstring = '''
            function(bform, df) {
                return(bam(as.formula(bform), data=df, drop.unused.levels=FALSE, nthreads=10))
            }
        '''

        fit = robjects.r(rstring)

        rstring = '''
            function(model) {
                return(summary(model))
            }
        '''

        summary = robjects.r(rstring)

        rstring = '''
            function(model, bform, df, subjects=NULL, words=NULL) {
                for (c in names(df)) {
                    if (is.numeric(df[[c]])) {
                        df[[paste0('z_',c)]] <- scale(df[[c]])
                    }
                }
                select = logical(nrow(df))
                select = !select
                if (grepl('subject', bform) & !is.null(subjects)) {
                    select = select & df$subject %in% subjects
                }
                grepl('word', bform)
                if (grepl('word', bform) & !is.null(words)) {
                    select = select & (word %in% words)
                }
                preds = predict(model, df[select,])
                df$preds = NA
                df[select,]$preds = preds
                return(df$preds)
            }
        '''

        predict = robjects.r(rstring)

        rstring = '''
            function(df, col) {
                return(unique(df[[col]]))
            }
        '''

        unique = robjects.r(rstring)

        return process_ran_gf, add_z, fit, summary, predict, unique
def deseq2(
    pseudobulk: UnimodalData,
    design: str,
    contrast: Tuple[str, str, str],
    de_key: str = "deseq2",
    replaceOutliers: bool = True,
) -> None:
    """Perform Differential Expression (DE) Analysis using DESeq2 on pseduobulk data. This function calls R package DESeq2, requiring DESeq2 in R installed.

    DE analysis will be performed on all pseudo-bulk matrices in pseudobulk.

    Parameters
    ----------
    pseudobulk: ``UnimodalData``
        Pseudobulk data with rows for samples and columns for genes. If pseudobulk contains multiple matrices, DESeq2 will apply to all matrices.

    design: ``str``
        Design formula that will be passed to DESeq2

    contrast: ``Tuple[str, str, str]``
        A tuple of three elements passing to DESeq2: a factor in design formula, a level in the factor as numeritor of fold change, and a level as denominator of fold change.
    
    de_key: ``str``, optional, default: ``"deseq2"``
        Key name of DE analysis results stored. For cluster.X, stored key will be cluster.de_key

    replaceOutliers: ``bool``, optional, default: ``True``
        If execute DESeq2's replaceOutliers step. If set to ``False``, we will set minReplicatesForReplace=Inf in ``DESeq`` function and set cooksCutoff=False in ``results`` function.

    Returns
    -------
    ``None``

    Update ``pseudobulk.varm``:
        ``pseudobulk.varm[de_key]``: DE analysis result for pseudo-bulk count matrix.
        ``pseudobulk.varm[cluster.de_key]``: DE results for cluster-specific pseudo-bulk count matrices.

    Examples
    --------
    >>> pg.deseq2(pseudobulk, '~gender', ('gender', 'female', 'male'))
    """
    try:
        import rpy2.robjects as ro
        from rpy2.robjects import pandas2ri, numpy2ri, Formula
        from rpy2.robjects.packages import importr
        from rpy2.robjects.conversion import localconverter
    except ModuleNotFoundError as e:
        import sys
        logger.error(f"{e}\nNeed rpy2! Try 'pip install rpy2'.")
        sys.exit(-1)

    try:
        deseq2 = importr('DESeq2')
    except ModuleNotFoundError:
        import sys
        text = """Please install DESeq2 in order to run this function.\n
                To install this package, start R and enter:\n
                if (!require("BiocManager", quietly = TRUE))
                    install.packages("BiocManager")
                BiocManager::install("DESeq2")"""

        logger.error(text)
        sys.exit(-1)

    import math
    to_dataframe = ro.r('function(x) data.frame(x)')

    for mat_key in pseudobulk.list_keys():
        with localconverter(ro.default_converter + numpy2ri.converter +
                            pandas2ri.converter):
            dds = deseq2.DESeqDataSetFromMatrix(
                countData=pseudobulk.get_matrix(mat_key).T,
                colData=pseudobulk.obs,
                design=Formula(design))

        if replaceOutliers:
            dds = deseq2.DESeq(dds)
            res = deseq2.results(dds, contrast=ro.StrVector(contrast))
        else:
            dds = deseq2.DESeq(dds, minReplicatesForReplace=math.inf)
            res = deseq2.results(dds,
                                 contrast=ro.StrVector(contrast),
                                 cooksCutoff=False)
        with localconverter(ro.default_converter + pandas2ri.converter):
            res_df = ro.conversion.rpy2py(to_dataframe(res))
            res_df.fillna(
                {
                    'log2FoldChange': 0.0,
                    'lfcSE': 0.0,
                    'stat': 0.0,
                    'pvalue': 1.0,
                    'padj': 1.0
                },
                inplace=True)

        de_res_key = de_key if mat_key.find(
            '.') < 0 else f"{mat_key.partition('.')[0]}.{de_key}"
        pseudobulk.varm[de_res_key] = res_df.to_records(index=False)
示例#57
0
 def testGetclassdef(self):
     robjects.r('library(stats4)')
     cr = methods.getclassdef('mle', 'stats4')
     self.assertFalse(cr.virtual)
示例#58
0
from rpy2.robjects import r
import os
import rpy2.robjects.packages as rpackages
import rpy2.robjects as robj

utils = rpackages.importr('utils')  # R 기본 패키지 호출

robj.r('setwd("~/Desktop")')
r.library('plyr')

robj.r('load("~/Desktop/MediWeb-master/R_file/main.RData")')
robj.r('table_region<-data.frame()')

##### Category by Region #####

# start
robj.r('start<-function(sido_s, sggu_s){\n'
       'for(i in 1:nrow(table_united)){\n'
       'if((sido_s==as.character(table_united$item.sidoCdNm[i])) && '
       '(sggu_s==as.character(table_united$item.sgguCdNm[i]))) {\n'
       'start_num=i\n'
       'return(start_num)}}}')

# end
robj.r('end<-function(sido_e, sggu_e, start_num){\n'
       'for(i in start_num:nrow(table_united)){\n'
       'if(!((sido_e==as.character(table_united$item.sidoCdNm[i])) && '
       '(sggu_e==as.character(table_united$item.sgguCdNm[i])))) {\n'
       'end_num=i-1\n'
       'return(end_num)}}}')
示例#59
0
    def testRS4Auto_Type_nopackname(self):
        robjects.r('library(stats4)')

        class MLE(robjects.methods.RS4):
            __metaclass__ = robjects.methods.RS4Auto_Type
            __rname__ = 'mle'
示例#60
0
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri

pandas2ri.activate()
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

rstring = '''
     function(x) {
         return(scale(x, scale=FALSE))
     }
'''

center = robjects.r(rstring)
robjects.globalenv["c."] = center

rstring = '''
     function(x) {
         return(scale(x, scale=TRUE))
     }
'''

z_score = robjects.r(rstring)
robjects.globalenv["z."] = z_score

rstring = '''
     function(x) {
         return(x/sd(x))
     }