示例#1
0
    def _start_r_interpreter(self):
        '''Find and start a R intepreter

        On windows systems use the packaged R env.
        On Posix systems use the system installed R env.
        Will raise RNotFoundException
        '''
        if sys.platform.startswith('win32'):
            '''A MS Windows kind of system'''
            logger.info("Running on a MS Windows system")
            Rwbin = op.join(self.r_origo, 'R-3.3.1', 'bin', 'R.exe')
            Rlib = op.join(self.r_origo, 'R-3.3.1', 'library')
            logger.info("Try Windows R path: {0}".format(Rwbin))
            if op.exists(Rwbin):
                logger.info("R.exe found")
                self.r = pyper.R(RCMD=Rwbin, use_pandas=True)
                self.r('.libPaths("{0}")'.format(Rlib))
            else:
                '''Try to run R from system path
                Give message if R is not found
                '''
                self.r = pyper.R(use_pandas=True)
                # raise RNotFoundException()
        else:
            '''Not Windows, assumed to be a POSIX system

            OS X Darwin or a Linux flavor
            '''
            logger.info("Assumed to run on a POSIX system")
            Rxbin = self._find_posix_system_R()
            logger.info("System R found at path: {0}".format(Rxbin))
            self.r = pyper.R(RCMD=Rxbin, use_pandas=True)
示例#2
0
def surv_ci(data, pred_col, duration_col, event_col):
    """Concordance Index

    Parameters
    ----------
    data : pandas.DataFrame
        Full survival data.
    pred_col : str
        Name of column indicating log hazard ratio.
    duration_col : str
        Name of column indicating time.
    event_col : str
        Name of column indicating event.

    Returns
    -------
    `dict`
        Object of dict include details about CI.

    Examples
    --------
    >>> surv_ci(data, 'Pred', 'T', 'E')
    """
    X = data[pred_col].values
    T = data[duration_col].values
    E = data[event_col].values
    r = pr.R(use_pandas=True)
    r("library('survival')")
    r("library('Hmisc')")
    r.assign("t", T)
    r.assign("e", E)
    r.assign("x", X)
    r("src <- rcorr.cens(-x, Surv(t, e))")
    return r.src
示例#3
0
def get_dict_correction(set_XC, designed_barcode,
                        metric="seqlev", distance="2"):

    if type(distance) not in (int, long):
        raise InvalidArgumentError

    if distance not in (0, 1, 2):
        print("distance must be 0, 1, or 2")
        raise InvalidArgumentError

    r = pr.R()
    r("library(DNABarcodes)")
    r.assign("list_XC", list(set_XC))
    r.assign("designed_barcode", designed_barcode)

    if metric == "seqlev":
        r("demultiplexed <- demultiplex(list_XC, designed_barcode, metric='seqlev')")
    elif metric == "hamming":
        r("demultiplexed <- demultiplex(list_XC, designed_barcode, metric='hamming')")
    else:
        print("metric must be 'seqlev' or 'hamming'")
        raise InvalidArgumentError

    df_correction = r.get("demultiplexed")

    df_correction.columns = [x.replace(" ", "") for x in df_correction.columns]
    df_correction_filt = (df_correction[df_correction.distance <= distance]
                          [['read', 'barcode']])
    dict_correct = df_correction_filt.set_index('read').to_dict()['barcode']

    return dict_correct
示例#4
0
def loess_res(ts, bass = 0):
    '''
    Fit loess curve using Friedman's Super Smoother and return residuals
    
    Parameters
    ----------
    ts: pandas series
        time series of values 
    bass: int
        smoothing parameter of curve. values up to 10 for more smoothness
        
    Returns
    -------
    list
        series of residuals from loess curve fit
    '''
    
    ts = ts.tolist()
    
    # create a R instance
    r = pr.R(use_pandas = True)
    # pass ts from python to R as Y, and pass bass parameter
    r.assign("Y", ts)
    r.assign("bass", bass)
    
    # fit friedman's super smoother on ts and extract the fitted values
    r("fit = supsmu(x=1:length(Y), y=Y, bass=bass)$y")
    # pass fitted values from r to python
    fit = r.get("fit")
    # residuals from loess fit
    residuals = ts - fit
    return(residuals.tolist())
示例#5
0
def icar(file_path, algo, n_components, wl_range_start, wl_range_end):
    data = pd.read_csv(file_path, index_col=0).T
    data = data.loc[:, wl_range_start:wl_range_end].T
    wl = data.index.values.astype('float32')

    # Rのインスタンスを作る
    r = pyper.R(use_pandas='True')

    # PythonのオブジェクトをRに渡す
    r.assign('data', data)
    r.assign('wl', wl)
    r.assign('n_components', n_components)

    # Rのコードを実行する
    r("library(ica)")
    r("X  <- data")

    if algo == 'FastICA':
        r("a <- icafast(X, n_components)")
    elif algo == 'InfoMax':
        r("a <- icaimax(X, n_components)")
    else:
        r("a <- icajade(X, n_components)")

    r("ics <- cbind(wl, a$S)")

    # Pythonでrのオブジェクトを読む
    ics = r.get("ics")
    ics_df = pd.DataFrame(ics)

    return ics_df
def readModelFile(modelFileName, jobName):
    if os.path.isfile(modelFileName):
        r = pr.R()
        r(IMPORT_R_WRAPPER)
        r.assign('params', json.dumps({'rdsFile': modelFileName}))
        rOutput = r('model <- readFromRDS(params)')
        log.info(rOutput)
        results = r.get('model')
        del r
        rOutput = None
        if results:
            model = json.loads(results)
            maxTimePoint = model['maxTimePoint'][0]
            covariates = model['covariates']
            if (len(model['jobName'])):
                jobName = model['jobName'][0]
            return {
                'jobName': jobName,
                'maxTimePoint': maxTimePoint,
                'interceptOnly': True if len(covariates) == 0 else False
            }
        else:
            message = "Couldn't read Time Points from RDS file!"
            log.error(message)
            return message
    else:
        message = "Model file does not exist!"
        log.error(message)
        return message
示例#7
0
 def __init__(self,
              training_marginal_dist_matrix: np.matrix,
              family: str,
              param=None,
              dim=None):
     if family not in ['gumbel', 'clayton', 'frank', 'normal', 'indep']:
         print('Copula family "' + family + '" is not supported.')
         raise ValueError
     if training_marginal_dist_matrix.size == 0:
         if dim is None:
             raise ValueError
         self._dimension = dim
     else:
         self._dimension = training_marginal_dist_matrix.shape[1]
     self._r_engine = pyper.R()
     self._r_engine.assign("py.training.marginal.dist.matrix",
                           training_marginal_dist_matrix)
     self._r_engine.assign("py.cop.name", family)
     self._r_engine.assign("py.param", param)
     self._r_engine.assign("py.dim", self._dimension)
     self._r_engine('source("copula/copula.R")')
     self._r_engine('source("copula/copula.R")')
     trained_param = self._r_engine.get("trained.param")
     self.trained_param = trained_param  #asahi
     if trained_param is None:
         self._r_engine('trained <- indepCopula(dim=%d)' % self._dimension)
         print('indep')
     else:
         print(trained_param)
示例#8
0
def arimaTemp(filename):
    
    processTemp()
    predictions = []

    r = pr.R(RCMD="C:\\Program Files\\R\\R-3.1.2\\bin\\R", use_numpy=True, use_pandas=True)
    
    datetimes = np.arange('2008-07-01 00:00:00','2008-07-08 00:00:00',dtype='datetime64[h]')

    for j in range(1,12):

        data = pd.read_csv('../data/outputs/temp_history_processed_station_%s.csv'%j, parse_dates='datetime')  
     
        subts = data["value"]
        print 'Predictions for zone %s'%j    
        results = arima(subts,r)       
       
        results = pd.DataFrame(results, columns=['value'])
        
        results['datetime'] = datetimes
        results['station_id'] = j
      predictions.append(results)
        
    concatPredictions = pd.concat(predictions) 
    
    concatPredictions.to_csv(filename, index=False, date_format='%Y-%m-%d %H:%M:%S', mode='a')  
示例#9
0
def plot_scatter_polya(in_fisher, out_png):
    """plot a scatter plot of the expression of polya sites"""
    lhs, rhs = in_fisher.split('vs.')
    lhs, rhs = short_name(lhs), short_name(rhs)
    try:
        compare_type = in_fisher.replace('.fisher_test', '').split('polya.')[1]
    except:
        compare_type = 'ALL'
    R_script = r"""
library(lattice)
d<-read.table(file="%(in_fisher)s", header=TRUE, sep="\t")
png('%(out_png)s')

exp1 <- c(d$exp1_upstream_count, d$exp1_downstream_count)
exp2 <- c(d$exp2_upstream_count, d$exp2_downstream_count)

sig_sites <- d$fisher_p_two_sided < .05

exp1_sig = c(d$exp1_upstream_count[sig_sites], d$exp1_downstream_count[sig_sites])
exp2_sig = c(d$exp2_upstream_count[sig_sites], d$exp2_downstream_count[sig_sites])

plot(log2(exp1), log2(exp2), cex=.8, col='lightgray', pch=20, xlab="%(xlab)s", ylab="%(ylab)s", main="All sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("R^2 is ", cor(exp1, exp2)))

dev.off()
""" % dict(plot_label=r'Poly-A for\n%s' % in_fisher,
           xlab="log2(%s)" % (lhs),
           ylab="log2(%s)" % (rhs),
           in_fisher=in_fisher, out_png=out_png, lhs=lhs, rhs=rhs,
           compare_type=compare_type)
    # print R_script
    r = pyper.R()
    r(R_script)
示例#10
0
def plot_differential_polya(in_fisher, out_pattern, out_template):
    """plot a scatter plot of the log-expression difference of polya sites"""
    lhs, rhs = in_fisher.split('vs.')
    lhs, rhs = short_name(lhs), short_name(rhs)
    try:
        compare_type = in_fisher.replace('.fisher_test', '').split('polya.')[1]
    except:
        compare_type = 'ALL'
    for max_pval in [.05, .01, .001]:
        out_png = out_template % ('pval_%s' % max_pval)
        R_script = r"""
library(lattice)
d<-read.table(file="%(in_fisher)s", header=TRUE, sep="\t")
png('%(out_png)s')
sig_sites <- d$fisher_p_two_sided < %(max_pval)s

exp1_proximal = d$exp1_upstream_count[sig_sites]
exp1_distal = d$exp1_downstream_count[sig_sites]
exp2_proximal = d$exp2_upstream_count[sig_sites]
exp2_distal = d$exp2_downstream_count[sig_sites]

plot(log2(d$exp1_upstream_count/d$exp2_upstream_count), log2(d$exp1_downstream_count/d$exp2_downstream_count), cex=.8, col='lightgray', pch=20, xlab="%(xlab)s", ylab="%(ylab)s", main="Significant sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("Significant sites:", sum(sig_sites), "/", dim(d)[1]))
points(log2(exp1_proximal/exp2_proximal), log2(exp1_distal/exp2_distal), col='red', cex=.8, pch=20)

dev.off()
""" % dict(plot_label=r'Differential Poly-A for\n%s' % in_fisher,
               xlab="log2(%s/%s)-proximal" % (lhs, rhs),
               ylab="log2(%s/%s)-distal" % (lhs, rhs),
               in_fisher=in_fisher, out_png=out_png, lhs=lhs, rhs=rhs,
               compare_type=compare_type, max_pval=max_pval)
        # print R_script
        r = pyper.R()
        r(R_script)
示例#11
0
 def on_get(self, req, resp):
     r = pr.R()
     # https://www.r-tutor.com/elementary-statistics/quantitative-data/frequency-distribution-quantitative-data
     r("duration = faithful$eruptions")
     r("breaks = seq(1.5, 5.5, by=0.5)")
     r("duration.cut = cut(duration, breaks, right=FALSE)")
     r("duration.freq = table(duration.cut)")
     resp.body = r("duration.freq")
示例#12
0
def pc_rlib(d_dt, threshold, skel_method, verbose):
    import pandas
    import pyper

    if skel_method == "default":
        method = "original"
    else:
        method = skel_method

    input_data = d_dt
    #input_data = {}
    #for nid, ns in nsdict.iteritems():
    #    input_data[nid] = ns.get_values()

    r = pyper.R(use_pandas='True')
    r("library(pcalg)")
    r("library(graph)")

    df = pandas.DataFrame(input_data)
    r.assign("input.df", df)
    r.assign("method", method)
    r("evts = as.matrix(input.df)")
    #print r("evts")
    #r("t(evts)")

    #r("save(evts, file='rtemp')")

    r.assign("event.num", len(input_data))
    r.assign("threshold", threshold)
    r.assign("verbose.flag", verbose)

    print r("""
        pc.result <- pc(suffStat = list(dm = evts, adaptDF = FALSE),
            indepTest = binCItest, alpha = threshold, skel.method = method,
            labels = as.character(seq(event.num)-1), verbose = verbose.flag)
    """)
    #print r("""
    #    pc.result <- pc(suffStat = list(dm = evts, adaptDF = FALSE),
    #        indepTest = binCItest, alpha = threshold,
    #        labels = as.character(seq(event.num)-1), verbose = TRUE)
    #""")

    r("node.num <- length(nodes(pc.result@graph))")

    g = nx.DiGraph()
    for i in range(r.get("node.num")):
        r.assign("i", i)
        edges = r.get("pc.result@graph@edgeL[[as.character(i)]]$edges")
        if edges is None:
            pass
        elif type(edges) == int:
            g.add_edge(i, edges - 1)
        elif type(edges) == np.ndarray:
            for edge in edges:
                g.add_edge(i, edge - 1)
        else:
            raise ValueError("edges is unknown type {0}".format(type(edges)))
    return g
def ping():
    try:
        r = pr.R()
        return r['"true"']
    except Exception as e:
        log.exception("Exception occurred")
        return buildFailure({
            "status": False,
            "statusMessage": "Call R failed!"
        })
示例#14
0
def RdataToHDF5(fileName, variableName, path=None):
    r = pyper.R()
    if path is not None:
        r["setwd('%s')" % path]
    r['load("%s")' % fileName]
    r['library(rhdf5)']
    try:
        r['h5createFile("%s.h5")' % fileName]
    except pyper.RError:
        pass  # typically this is because the file already exists
        # TODO: determine if something else went wrong
    r['h5write(%s, "%s.h5","%s")' % (variableName, fileName, variableName)]
示例#15
0
def make_basic_colormap(todir: str,
                        width: int,
                        height: int,
                        encoding="CP932") -> None:
    '''shapeファイルから行政地区単位でカラーマップを作成

    行政地区単位で異なる色で配色される.
    しかしあくまで色と行政地区が対応しているのみで
    行政地区とその名前・コードとの対応はなされない.

    Parameters
    ----------
    todir : str
        shapeファイルまでのディレクトリ
    width : int
        出力svgファイルの幅
    height : int
        出力svgファイルの高さ
    encoding : str
        入力svgファイルに使用されている文字コード

    '''

    #create R object
    r = pyper.R()

    r("library(sf)")
    r("library(ggplot2)")

    #input shapefile
    shapefile = (glob.glob(os.path.join(todir, "*.shp")))[0]
    r.assign('shapefile', shapefile)

    #make tmporary directory.
    os.makedirs('./tmp', exist_ok=True)

    #set output svg file name
    svgfile = './tmp/tmp.svg'
    r.assign('svgfile', svgfile)

    #output OsakaMap
    #Each area are colored according to municipality code.
    r.assign('param1', width)
    r.assign('param2', height)

    encoding_to_r = "ENCODING=" + encoding + '"'
    r.assign('option', encoding_to_r)

    r('shp <- sf::st_read(shapefile,options=option)')
    r('svg(svgfile, width=param1, height=param2)')
    r('ggplot()+geom_sf(data=shp,aes(fill=N03_007))')
    r('dev.off()')
示例#16
0
文件: cpa.py 项目: adam-paul/ONC
    def __pipeData(self):
        '''
        This function pipes data to R environment and declares R variables

        NOTE: R must be installed in this script's environment

        '''

        try:
            self.r = pyper.R(use_numpy = True) # For data input as numpy array
            self.r("chooseCRANmirror(ind=10)") # Choose Canadian host as R mirror (for package downloads)
            self.r.assign('data', self.data)
        except:
            print 'There was an error piping data to R environment. Please ensure that R is properly installed and all PATH variables are correct.'
示例#17
0
def plot_balls(balls, depth):
    global save_dir
    r = pyper.R()
    r_code = '''
    par(pty="s")
    png("{0}/depth_{1}.png")
    data <- read.csv('{0}/depth_{1}.csv', header = FALSE)
    p = data$V1
    q = data$V2
    col1 <- densCols(p, q, colramp = colorRampPalette(c("white", "orange", "red")))
    plot(0, 0, type = "n", xlim = c(0, 100), ylim = c(0, 100),xlab = "x", ylab = "y")
    points(p, q, col = col1, pch = 3)
    dev.off()
    '''.format(save_dir, depth)
    r(r_code)
示例#18
0
def draw_expression_correlation(in_data, out_png):
    """Correlation test to see if the correlation between expression values and
    peak quality (score column from peak file).
    """

    R_script = r"""
png('%(out_png)s')
d<-read.table(file="%(in_data)s", header=TRUE, sep="\t");
library(lattice);
r <- cor.test(d$expression_val, d$peak_score)
plot(d$expression_val, d$peak_score, xlab="expression value", ylab="peak score")
title(paste("R^2 = ", r$estimate, ", p-value = ", r$p.value));
dev.off()
""" % dict(in_data=in_data, out_png=out_png)
    #print R_script
    r = pyper.R()
    r(R_script)
示例#19
0
def youden_onecut(data, pred_col, duration_col, event_col, pt=None):
    """Cutoff maximize Youden Index.

    Parameters
    ----------
    data : pandas.DataFrame
        full survival data.
    pred_col : str
        Name of column to reference for dividing groups.
    duration_col : str
        Name of column indicating time.
    event_col : str
        Name of column indicating event.
    pt : int, default None
        Predicted time.

    Returns
    -------
    float
        Value indicating cutoff for pred_col of data.

    Examples
    --------
    >>> youden_onecut(data, 'X', 'T', 'E')
    """
    X = data[pred_col].values
    T = data[duration_col].values
    E = data[event_col].values
    if pt is None:
        pt = T.max()
    r = pr.R(use_pandas=True)
    r.assign("t", T)
    r.assign("e", E)
    r.assign("mkr", np.reshape(X, E.shape))
    r.assign("pt", pt)
    r.assign("mtd", "KM")
    r.assign("nobs", X.shape[0])
    r("library(survivalROC)")
    r("src <- survivalROC(Stime = t, status = e, marker = mkr, predict.time = pt, span = 0.25*nobs^(-0.20))")
    r("Youden <- src$TP-src$FP")
    r("cutoff <- src$cut.values[which(Youden == max(Youden), arr.ind = T)]")
    r("abline(0,1)")
    return r.cutoff
示例#20
0
    def exec_r(self):
        import pyper
        r = pyper.R(use_pandas='True')
        r("suppressWarnings(require(tseries,warn.conflicts = FALSE,quietly=TRUE))"
          )
        #r("source(file='mswm.R')")#, encoding='utf-8'
        #r("result<-m.lm$coefficients[2]")
        #r("result1<-y1")
        #r("result2<-y2")
        #print(r.get("result"))
        #print("completed")

        for i in range(self._term, self._return_df.shape[0]):
            self._return_df.iloc[i - self._term:i].to_csv('run.csv')
            r("df <- read.csv('run.csv', header = T)")
            r("d <- diff(df$Last)")
            r("x <- factor(sign(d[-which(d %in% 0)]))")
            r("run_result <- runs.test(x)")
            r("p_value <- run_rsult$p.value")
            print(r.get('p_value'))
示例#21
0
    def fit_predict(self, X_train, y_train, X_test, y_test):

        X = np.vstack((X_train, X_test))
        df = DataFrame(X)

        r = pr.R(use_pandas=True)
        r.assign("X", df)
        # r('print(X)')  # if I remove this line pyper get stuck.....

        install_dir = os.path.dirname(
            os.path.abspath(inspect.getfile(
                inspect.currentframe())))  # script directory
        r('source("' + install_dir + '/RF.R")')
        r('set.seed(0)')
        r('no.forests=' + str(int(self.nforests)))
        r('no.trees=' + str(int(self.ntree)))
        r('rfdist <- RFdist(X, mtry1=3, no.trees, no.forests, '
          'addcl1=T, addcl2=F, imp=T, oob.prox1=T)')
        r('labelRF=outlier(rfdist$cl1)')
        return -np.array(r.get('labelRF'))[X_train.shape[0]:]
示例#22
0
def surv_roc(data, pred_col, duration_col, event_col, pt=None):
    """Get survival ROC at predicted time.

    Parameters
    ----------
    data : pandas.DataFrame
        Full survival data.
    pred_col : str
        Name of column to reference for dividing groups.
    duration_col : str
        Name of column indicating time.
    event_col : str
        Name of column indicating event.
    pt : int
        Predicted time.

    Returns
    -------
    `dict`
        Object of dict include "FP", "TP" and "AUC" in ROC.

    Examples
    --------
    >>> surv_roc(data, 'X', 'T', 'E', pt=5)
    """
    X = data[pred_col].values
    T = data[duration_col].values
    E = data[event_col].values
    if pt is None:
        pt = T.max()
    r = pr.R(use_pandas=True)
    r.assign("t", T)
    r.assign("e", E)
    r.assign("mkr", np.reshape(X, E.shape))
    r.assign("pt", pt)
    r.assign("mtd", "KM")
    r.assign("nobs", X.shape[0])
    # different predict.time may plot 1, 5, or 10 year ROC
    r("src<-survivalROC::survivalROC(Stime = t, status = e, marker = mkr, predict.time = pt, span = 0.25*nobs^(-0.20))")
    # r.src['AUC'] r.src['FP'], r.src['TP']
    return r.src
示例#23
0
def get_quantile_errors(data, quantiles):
    """
    Gets arbitrary quantile values for given data

    This function uses the PypeR library (download from 
    http://sourceforge.net/projects/rinpy/) to call commands from the R
    language used to calculate an arbitrary number of quantile intervals of
    a numpy array. In the case of bi-dimensional numpy arrays, it will
    calculate these statistics along the first axis, meaning that each line
    corresponds to an independent data set.


    Input:
     - data        numpy.ndarray : data input, each line being a data set
                                   (ndim=2,dtype=float)
     - quantiles   numpy.ndarray : percentages (ndim=1,dtype=float)
                                   Ex: quartiles : array([0.25,0.5,0.75])

    Output:
     - qtl_errors  numpy.ndarray : quantile error values (ndim=2,dtype=float) 
     
    ---
    """

    myR = pyper.R()

    myR['data'] = np.transpose(data)

    myR['quantiles'] = quantiles

    # Calculate the quantiles for each data line

    myR("""qtls <- t(sapply(as.data.frame(data),function(x) quantile(x,
                     quantiles,names=FALSE)))""")

    qtl_errors = myR['qtls']

    del myR

    return qtl_errors
示例#24
0
def plot_nearest_features(in_distances, out_png, test_out, window_size=20):
    """Plot a density of the distance to the nearest features"""
    print out_png
    print test_out
    R_script = r"""
png('%(out_png)s')
d<-read.table(file="%(in_data)s", header=TRUE, sep="\t");
d = d / 1000;
library(lattice);
plot(density(unlist(d[1])[unlist(d[1]) < %(window_size)s & unlist(d[1]) > -%(window_size)s], na.rm=TRUE), main="Feature densities around peaks", xlab="Distance (kb)", ylab="Density", xlim=c(-%(window_size)s,%(window_size)s))
index = 1
r = rainbow(length(d))
for (i in d) {
    i = i[i < %(window_size)s & i > -%(window_size)s]
    lines(density(i, from=-%(window_size)s, to=%(window_size)s, na.rm=TRUE), col=r[index])
    index = index + 1
}
legend("topleft", legend=names(d), col=r, lty=1)
dev.off()
""" % dict(in_data=in_distances, out_png=out_png, window_size=window_size)
    print R_script
    r = pyper.R()
    r(R_script)
示例#25
0
def plot_ttest_polya(in_ttest, out_png):
    """plot the t-test averages used as a scatter plot of the expression of polya sites"""
    lhs, rhs = in_ttest.split('vs.')
    lhs, rhs = short_name(lhs), short_name(rhs)
    try:
        compare_type = in_ttest.replace('.t_test', '').split('polya.')[1]
    except:
        compare_type = 'ALL'
    R_script = r"""
library(lattice)
d<-read.table(file="%(in_ttest)s", header=TRUE, sep="\t")
png('%(out_png)s')

exp1 <- unlist(lapply(lapply(strsplit(gsub("]", "", gsub("[", "", d$exp1_count, fixed=TRUE), fixed=TRUE), ", ", fixed=TRUE), as.numeric), mean))
exp2 <- unlist(lapply(lapply(strsplit(gsub("]", "", gsub("[", "", d$exp2_count, fixed=TRUE), fixed=TRUE), ", ", fixed=TRUE), as.numeric), mean))


sig_sites <- d$ttest_pvalue < .05
# upregulated means t-statistic is positive => exp1 < exp2
exp1_bigger <- d$ttest_pvalue < .05 & d$ttest_stat > 0
exp2_bigger <- d$ttest_pvalue < .05 & d$ttest_stat < 0

exp1_sig = exp1[sig_sites]
exp2_sig = exp2[sig_sites]

plot(log2(exp1), log2(exp2), cex=.8, col='lightgray', pch=20, xlab="", ylab="%(ylab)s", main="All sites for %(lhs)s vs. %(rhs)s in %(compare_type)s", sub=paste("R^2 is ", cor(exp1, exp2),"\nSig sites x > y: ", sum(exp1_bigger), "\nSig sites x < y: ", sum(exp2_bigger)))
points(log2(exp1_sig), log2(exp2_sig), col='red', cex=.8, pch=20)

dev.off()
""" % dict(plot_label=r'Poly-A for\n%s' % in_ttest,
           xlab="log2(%s)" % (lhs),
           ylab="log2(%s)" % (rhs),
           in_ttest=in_ttest, out_png=out_png, lhs=lhs, rhs=rhs,
           compare_type=compare_type)
    # print R_script
    r = pyper.R()
    r(R_script)
示例#26
0
 def post(self):
 	#データベース
 	db = pd.read_csv("data.csv")
 	params = pd.read_csv("params.csv")
 	#入力されたデータ(値は0~5の6段階評価)
 	user_data = pd.DataFrame([[int(self.get_argument("q1")),
 	int(self.get_argument("q2")),int(self.get_argument("q3")),int(self.get_argument("q4")),int(self.get_argument("q5")),int(self.get_argument("q6")),int(self.get_argument("q7")),int(self.get_argument("q8")),int(self.get_argument("q9")),int(self.get_argument("q10")),int(self.get_argument("q11")),int(self.get_argument("q12")),int(self.get_argument("q13")),int(self.get_argument("q14")),int(self.get_argument("q15")),int(self.get_argument("q16")),"偏差値"]], 
 	columns=["V1","V2","V3","V4","V5","V6","V7","V8","V9","V10","V11","V12","V13","V14","V15","V16","偏差値"])
 	
 	data = (db.append(user_data, ignore_index=True))
 	#Rでの分析
 	r = pyper.R(use_pandas='True')
 	r.assign("data", data)
 	r.assign("params", params)
 	
 	r("source('myfunc/myfunc.R')")
 	r("library(psych)")
 	r("library(irtoys)")
 	r("library(ltm)")
 	#17列目の偏差値は分析データから外し、最新行のみ分析
 	r("data<-data[nrow(data),1:16]")
 	#母数の推定
 	r("a <- grm.theta(data,a=params[,6]/1.7,bc=params[,c(1,2,3,4,5)],D=1.7,method ='ML')")
 	r("偏差値<-round(a[,1],4)*10+50")
 	
 	#ユーザーの偏差値
 	value = r.get("偏差値")
 	data.iat[len(data)-1,16] = value
 	print(data)
 	ranking = data.rank(ascending=False,method='max')
 	print(ranking)
 	#順位
 	rank = ranking.iat[len(data)-1,16].astype(int)
 	#受験人口
 	every = len(data)
 	self.render("result.html", value=value,rank=rank ,every=every)
@author: narrowly
"""
import numpy as np
import pandas as pd
import scipy.stats as sct
import random
import pyper
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# 前処理
np.random.seed(4521)
N = 10000  # N_particle

r = pyper.R(use_numpy='True', use_pandas='True')
r("load('data/ArtifitialLocalLevelModel.RData')")
y = r.get('y').astype(np.float64)
t_max = r.get('t_max')
mod = r.get('mod')

# %% コード11-1
# データの整形
y = np.r_[np.nan, y]

# リサンプリング用のインデックス列
k = np.repeat(np.arange(N)[:, np.newaxis], t_max + 1, axis=1)

# 事前分布の設定
# 粒子 (実現値)
x = np.zeros(shape=(t_max + 1, N))
示例#28
0
 async def plottemp(ctx, self):
     r = pyper.R()
     r("source(file='/home/rito/Programming/Python/discord_bot/cogs/plot.R')"
       )
     await self.send("部屋の温湿度と明るさはこんな感じです。")
     await self.send(file=discord.File('/home/rito/image/plot.png'))
示例#29
0
        x[i] = 0.4 * x[i - 1] + 0.8 * x[i - 1] * z[i - 1] + z[i]

    halfWindowSize = 20
    ret = {}
    ret[0] = pyHampel.hampel(x, halfWindowSize, method="center")
    #set the center of window at target value
    ret[1] = ret[0]  # just copy
    ret[2] = pyHampel.hampel(x, halfWindowSize, method="same")  #same window
    ret[3] = pyHampel.hampel(x, halfWindowSize,
                             method="ignore")  # ignore in end
    ret[4] = pyHampel.hampel(x, halfWindowSize, method="nan")  #set nan in end
    print(ret[0][0])  #filtered data
    print(ret[0][1])  #indices of outliers

    #compare pyhampel with R
    r = pyper.R()
    r("library(pracma);")
    r.assign("x", x)
    r("omad <- hampel(x, k=20);")
    retR = r.get("omad")

    fig, ax = plt.subplots(5, 1, figsize=(8, 8))
    ax[0].plot(t, x, "b", label="original data")
    for i in range(5):
        ax[i].plot(t, retR["y"], "orange", label="R")
        ax[i].plot(t, ret[i][0], "r.", label="python")
        ax[i].legend(loc="lower center")
    ax[0].set_title("overall")
    ax[1].set_title("center")
    ax[2].set_title("same")
    ax[3].set_title("ignore")
示例#30
0
文件: MS.py 项目: wangbin750/FOF
data = pd.read_excel("F:\GitHub\FOF\Global Allocation\SBG_US_M.xlsx")

data = pd.read_excel("/Users/WangBin-Mac/FOF/Global Allocation/SBG_US_M.xlsx")

data = data.interpolate()
data = data.dropna().pct_change().dropna()

data

#data_W = data.pct_change().dropna()*100

rp_result_list = []
mu_result_list = []
index_list = []
r = pr.R(use_pandas=True)
r("library(MSwM)")
for each in range(119, len(data) - 1):
    #each = 95
    #data_M.index[each]

    #data_frame = data[:data.index[each]]
    data_frame = data[data.index[each - 119]:data.index[each]]

    #data_frame = data_frame[['SP500', 'Barclays_US_bond']]
    '''
    mu_wgt = Ms_MU(data_frame, {'SP500':True, 'Barclays_US_bond':False}, 2)
    print each
    print mu_wgt

    rp_wgt = Ms_RP(data_frame, {'SP500':True, 'Barclays_US_bond':False})