Пример #1
0
def generate_images(data,template):
    """Run Sunniva's R script to generate the 5 plots she likes.
    :param data: list of tuples (dose,response,experiment)"""
    ro.r("""
    source("graph/R/4in1skript_ankervariation_backup.R")
    library("gplots")
    library("bmd")
    library("splines")
    library("plyr")
    library("emdbook")
    """)
    data_array = asarray(zip(*data))
    dose = data_array[0]; response = data_array[1]; experiment = data_array[2]
    ro.r.assign('dose',numpy2ri(dose))
    ro.r.assign('response',numpy2ri(response))
    ro.r.assign('experiment',numpy2ri(experiment))
    ro.r.assign('outfilename',template)  # sunny/media/images/fit_images_1
    ro.r("""
    mydata = data.frame(dose=dose,response=response,experiment=experiment)
    for (run in c(1,2,3,4)){
        outname = paste(outfilename,'_',run, sep="")
        plotname = paste(outfilename,'_',run,'.png', sep="")
        processData(mydata, outname=outname, xlab="Concentration [AU]", plotname=plotname, run=run)
    }
    """)
Пример #2
0
    def _plot_stats(self, bam_name):
        robjects.r.assign('rep_cnt', numpy2ri.numpy2ri(self.frag_rep.keys()))
        robjects.r.assign('rep_freq',
                          numpy2ri.numpy2ri(self.frag_rep.values()))
        robjects.r.assign('size_distr',
                          numpy2ri.numpy2ri(self.frag_size.keys()))
        robjects.r.assign('size_freq',
                          numpy2ri.numpy2ri(self.frag_size.values()))
        robjects.r.assign('nb_frag', self.nb_frag)
        robjects.r.assign('main', bam_name)
        robjects.r("""
rep_cnt = as.integer(rep_cnt)
Od = order(rep_cnt)
rep_freq = as.integer(rep_freq)[Od]*1e-6
rep_cnt = rep_cnt[Od]
I100 = rep_cnt<100
rep_cnt = c(rep_cnt[I100],100)
rep_freq = c(rep_freq[I100],sum(rep_freq[!I100]))
size_distr = as.integer(size_distr)
Od = order(size_distr)
size_freq = as.integer(size_freq)[Od]/nb_frag
size_distr = size_distr[Od]
par(mfrow=c(2,1),lwd=2,cex=1.1,cex.main=1.3,cex.lab=1.1,cex.axis=.8,oma=c(0,0,3,0),mar=c(5,5,1,1),las=1,pch=20)
plot(rep_cnt,rep_freq,type='s',main='Fragment redundancy',xlab='Nb of copies',ylab='Frequency (millions)',
     log='y',xlim=c(1,100),xaxt='n',ylim=c(1e-6,nb_frag*1e-6))
abline(h=nb_frag*1e-6,col='red')
text(50,nb_frag*1e-6,nb_frag,col='red',pos=1)
axis(side=1,at=seq(10,100,by=10),labels=c(seq(10,90,by=10),">100"))
plot(size_distr,size_freq,type='s',main='Fragment size distribution',xlab='Size',ylab='Density')
title(main=main,outer=T)
""")
Пример #3
0
 def fit(self, X, y):
     self.classes_ = np.unique(y)
     y = np.searchsorted(self.classes_, y) + 1
     X = numpy2ri(X)
     y = ro.FactorVector(numpy2ri(y))
     self.model_ = rf.randomForest(X, y, **self.params)
     return self
Пример #4
0
def wilcox_test_R(x, y, alternative='less'):
    """
    Call R implementation of single-sided Wilcoxon rank sum test
    with alternative hypothesis that @x is less than @y

    NOTE: Calling R many times is slow! rather use python function if possible
    """
    if alternative not in ['two.sided', 'less', 'greater']:
        raise ValueError("Alternative hypothesis should be either 'two.sided', 'less' or 'greater'")

    import rpy2
    from rpy2.robjects.numpy2ri import numpy2ri
    from rpy2.robjects.packages import importr
    from rpy2.robjects import pandas2ri
    pandas2ri.activate()

    statspackage = importr('stats', robject_translations={'format_perc': '_format_perc'})
    result = statspackage.wilcox_test(numpy2ri(x), numpy2ri(y), alternative=alternative,
                                      paired=True, exact=False, correct=False)

    pyresultdict = pandas2ri.ri2py(result)
    for k, v in pyresultdict.items():
        # print(k, v)
        if k == 'p.value':
            pval = v[0]
    return pval
Пример #5
0
    def _plot_stats(self, bam_name):
        robjects.r.assign('rep_cnt',numpy2ri.numpy2ri(self.frag_rep.keys()))
        robjects.r.assign('rep_freq',numpy2ri.numpy2ri(self.frag_rep.values()))
        robjects.r.assign('size_distr',numpy2ri.numpy2ri(self.frag_size.keys()))
        robjects.r.assign('size_freq',numpy2ri.numpy2ri(self.frag_size.values()))
        robjects.r.assign('nb_frag',self.nb_frag)
        robjects.r.assign('main',bam_name)
        robjects.r("""
rep_cnt = as.integer(rep_cnt)
Od = order(rep_cnt)
rep_freq = as.integer(rep_freq)[Od]*1e-6
rep_cnt = rep_cnt[Od]
I100 = rep_cnt<100
rep_cnt = c(rep_cnt[I100],100)
rep_freq = c(rep_freq[I100],sum(rep_freq[!I100]))
size_distr = as.integer(size_distr)
Od = order(size_distr)
size_freq = as.integer(size_freq)[Od]/nb_frag
size_distr = size_distr[Od]
par(mfrow=c(2,1),lwd=2,cex=1.1,cex.main=1.3,cex.lab=1.1,cex.axis=.8,oma=c(0,0,3,0),mar=c(5,5,1,1),las=1,pch=20)
plot(rep_cnt,rep_freq,type='s',main='Fragment redundancy',xlab='Nb of copies',ylab='Frequency (millions)',
     log='y',xlim=c(1,100),xaxt='n',ylim=c(1e-6,nb_frag*1e-6))
abline(h=nb_frag*1e-6,col='red')
text(50,nb_frag*1e-6,nb_frag,col='red',pos=1)
axis(side=1,at=seq(10,100,by=10),labels=c(seq(10,90,by=10),">100"))
plot(size_distr,size_freq,type='s',main='Fragment size distribution',xlab='Size',ylab='Density')
title(main=main,outer=T)
""")
def voom(counts, library_size):
    from rpy2.robjects.packages import importr
    from rpy2.robjects.numpy2ri import numpy2ri

    logger.info("Running limma voom in R")

    limma = importr('limma')
    edgeR = importr('edgeR')
    base_r = importr('base')
    r_dollar = getattr(base_r, '$')

    library_size_r = base_r.c(numpy2ri(library_size.values))
    counts_r = edgeR.DGEList(counts=numpy2ri(counts.values.T),
                             lib_size=library_size_r)

    counts_r = edgeR.calcNormFactors(counts_r)
    v = limma.voom(counts_r, plot=False)

    gexp = xr.DataArray(np.array(r_dollar(v, 'E')).T,
                        coords=counts.coords,
                        attrs={
                            'units': 'lb(re 1)',
                            'long_name': "Gene expression in log2 range"
                        })
    weights = xr.DataArray(np.array(r_dollar(v, 'weights')).T,
                           coords=counts.coords,
                           attrs={
                               'units': 'lb(re 1)',
                               'long_name': "Limma voom weights"
                           })

    return VoomResult(gexp, weights)
Пример #7
0
def generate_images(data, template):
    """Run Sunniva's R script to generate the 5 plots she likes.
    :param data: list of tuples (dose,response,experiment)"""
    ro.r("""
    source("graph/R/4in1skript_ankervariation_backup.R")
    library("gplots")
    library("bmd")
    library("splines")
    library("plyr")
    library("emdbook")
    """)
    data_array = asarray(zip(*data))
    dose = data_array[0]
    response = data_array[1]
    experiment = data_array[2]
    ro.r.assign('dose', numpy2ri(dose))
    ro.r.assign('response', numpy2ri(response))
    ro.r.assign('experiment', numpy2ri(experiment))
    ro.r.assign('outfilename', template)  # sunny/media/images/fit_images_1
    ro.r("""
    mydata = data.frame(dose=dose,response=response,experiment=experiment)
    for (run in c(1,2,3,4)){
        outname = paste(outfilename,'_',run, sep="")
        plotname = paste(outfilename,'_',run,'.png', sep="")
        processData(mydata, outname=outname, xlab="Concentration [AU]", plotname=plotname, run=run)
    }
    """)
Пример #8
0
def cluster_by_grs(target, source, env):
    import graphmod as gm
    args = source[-1].read()
    verb_map = {}
    gr_map = {}
    instances = gm.Instances()
    gm.load_instances(source[0].rstr(), instances)
    for ii in range(len(instances)):
        verb = instances.get_name("verb_lemma", instances.at(ii)["verb_lemma"][0])
        grs = [instances.get_name("gr", x) for x in instances.at(ii)["gr"]]
        verb_map[verb] = verb_map.get(verb, len(verb_map))
        for gr in grs:
            gr_map[gr] = gr_map.get(gr, len(gr_map))
    data = numpy.zeros(shape=(len(verb_map), len(gr_map)))
    for ii in range(len(instances)):
        verb = instances.get_name("verb_lemma", instances.at(ii)["verb_lemma"][0])
        verb_id = verb_map[verb]
        grs = [instances.get_name("gr", x) for x in instances.at(ii)["gr"]]
        gr_ids = [gr_map[x] for x in grs]
        for gr in gr_ids:
            data[verb_id, gr] += 1
    data = numpy.transpose(data.T / data.sum(1))
    tres = numpy.asarray(rcluster.clusGap(numpy2ri(data), FUN=stats.kmeans, K_max=30, B=500).rx2("Tab"))
    gaps = tres[:, 2]
    err = tres[:, 3]    
    best = rcluster.maxSE(numpy2ri(gaps), numpy2ri(err), method="globalmax")
    res = stats.kmeans(numpy2ri(data), centers=best)
    verbs = dict([(v, k) for k, v in verb_map.iteritems()])
    ofd = meta_open(target[0].rstr(), "w")
    for c in set(res.rx2("cluster")):
        ofd.write(" ".join([verbs[i] for i, a in enumerate(res.rx2("cluster")) if a == c]) + "\n")
    return None
Пример #9
0
    def test1(self):

        rkt = rpackages.importr('rkt')

        nyear = 4
        nseas = 5
        year = np.repeat(np.arange(2000, 2000 + nyear), nseas)
        dekad = np.tile(1 + np.arange(nseas), nyear)
        data = np.random.rand(nseas * nyear) + np.arange(nseas * nyear) * 0.1

        if 1:
            year = robjects.IntVector(year)
            dekad = robjects.IntVector(dekad)
            data = robjects.FloatVector(data)
        else:
            year = rpyn.numpy2ri(year)
            dekad = rpyn.numpy2ri(dekad)
            data = rpyn.numpy2ri(data)

        print(year)
        print(dekad)
        print(data)

        self.res = rkt.rkt(year, data, dekad)
        print(self.res)

        df = pandas2ri.ri2py_dataframe(rw.res).transpose()
        df.columns = self.res.names
        df = df[['sl', 'S', 'B', 'varS', 'tau']]

        print(pd.concat([df, df, df]))
        self.df = df
Пример #10
0
 def _from_python(obj):
     if isinstance(obj, DataFrame):
         obj = convert_to_r_dataframe(obj)
     elif isinstance(obj, Series):
         obj = numpy2ri(obj.values)
     elif isinstance(obj, np.ndarray):
         obj = numpy2ri(obj)
     return obj
Пример #11
0
 def _from_python(obj):
     if isinstance(obj, DataFrame):
         obj = py2ri(obj)
     elif isinstance(obj, Series):
         obj = numpy2ri(obj.values)
     elif isinstance(obj, np.ndarray):
         obj = numpy2ri(obj)
     return obj
Пример #12
0
def identity_heatmap_plot(numpy_matrix, labels,
                          header="",
                          xlab="",
                          ylab="",
                          reverse=False,
                          output_path="~/test.svg"):

        import rpy2.robjects as robjects
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
        import rpy2.robjects.numpy2ri as numpy2ri
        from pandas import DataFrame
        from rpy2.robjects import pandas2ri
        pandas2ri.activate()

        robjects.r.assign('Mdata',numpy2ri.numpy2ri(numpy_matrix))
        b = np.asarray(labels, dtype='str')
        robjects.r.assign('labels',numpy2ri.numpy2ri(b))

        if reverse:
            plot = '''
                cols <- rev(brewer.pal(9, "Blues"))
                heatmap.2(100-as.matrix(Mdata), trace="none", key="True",col=cols,
                na.rm=TRUE, density.info='none', cellnote=as.matrix(Mdata), notecol="black", labRow=labels, labCol=labels)
            '''
        else:
            plot = '''
                cols <- brewer.pal(9, "Blues")
                h <- heatmap.2(as.matrix(Mdata), trace="none", key="True",col=cols,
                na.rm=TRUE, density.info='none', cellnote=as.matrix(Mdata), notecol="black", labRow=labels, labCol=labels)
            '''


        robjects.r('''
            library(Cairo)
            library(ggplot2)
            library(gplots)
            library(RColorBrewer)

            rownames(Mdata) <- labels
            colnames(Mdata) <- labels


            h <- length(Mdata[,1])/4+8
            w <- length(Mdata[,1])/2+8
            print(h)
            print(w)
            svg('%s',height=h,width=w)
                par(oma = c(5, 0, 0, 8), xpd=TRUE)
                par(mar = c(5,1,1,8))
                par(cex.main=1,oma=c(22,0,0,20), xpd=TRUE, new=TRUE)
                %s
                #print(h)
                write.table(Mdata, "/home/trestan/identity.tab", sep="\t",col.names = NA)
            dev.off()

        ''' % (output_path, plot))
Пример #13
0
def plot_dnds(wt, ga):

    dnds_wt = numpy2ri(wt)
    dnds_ga = numpy2ri(ga)

    r.assign('wt', dnds_wt)
    r.assign('ga', dnds_ga)

    r(' source("src/R/figure_dnds.R") ')
Пример #14
0
def plot_stabhyddiff(stab, hyd):

    d1 = numpy2ri(stab)
    r.assign('stab', d1)

    d2 = numpy2ri(hyd)
    r.assign('hyd', d2)

    r(' source("src/R/figure_sites_stab_nothyd.R") ')
Пример #15
0
def plot_entropy(wt, ga):

    ent_wt = numpy2ri(wt)
    ent_ga = numpy2ri(ga)

    r.assign('wt', ent_wt)
    r.assign('ga', ent_ga)

    r(' source("src/R/figure_entropy.R") ')
Пример #16
0
def plot_diversity(wt, ga):

    div_wt = numpy2ri(wt)
    div_ga = numpy2ri(ga)

    r.assign('wt', div_wt)
    r.assign('ga', div_ga)

    r(' source("src/R/figure_diversity.R") ')
Пример #17
0
def boxplot(values,labels,output=None,format='pdf',new=True,last=True,**kwargs):
    """Creates a box-and-whiskers plot of *values* split by *labels*."""
    if not isinstance(values,ndarray): values = asarray(values)
    if not isinstance(labels,ndarray): labels = asarray(labels)
    plotopt,output = _begin(output=output,format=format,new=new,**kwargs)
    robjects.r.assign('values',numpy2ri.numpy2ri(values))
    robjects.r.assign('labels',numpy2ri.numpy2ri(labels))
    robjects.r("boxplot(values ~ labels,lty=1,varwidth=T)")
    _end("",last,**kwargs)
    return output
Пример #18
0
def basic_plot(values_x, values_y=False, header="", xlab="", ylab="", output_path="~/test.svg", type="hist"):

        import rpy2.robjects as robjects
        import rpy2.robjects.numpy2ri
        rpy2.robjects.numpy2ri.activate()
        import rpy2.robjects.numpy2ri as numpy2ri
        from pandas import DataFrame
        from rpy2.robjects import pandas2ri
        pandas2ri.activate()

        a = np.asarray(values_x, dtype='float')
        if values_y:
            b = np.asarray(values_y, dtype='float')

            robjects.r.assign('values_x', numpy2ri.numpy2ri(a))
            robjects.r.assign('values_y', numpy2ri.numpy2ri(b))

            robjects.r('''
                #library(genoPlotR)
                library(Cairo)
                library(ggplot2)


                library(plyr)
                #mu <- ddply(plot_data, "comp", summarise, identity.mean=median(identity))
                #print (mu)
                #plot_data$identity <- as.numeric(plot_data$identity)

                svg('%s',height=6,width=14)
                plot(values_x, values_y, pch=20) # , ylim=c(0,100)
                dev.off()


            ''' % (output_path))
        else:
            robjects.r.assign('values_x', numpy2ri.numpy2ri(a))

            robjects.r('''
                #library(genoPlotR)
                library(Cairo)
                library(ggplot2)

                svg('%s',height=7,width=7)
                #barplot(table(values_x), main="Conservation of predicted effectors in other genomes")

                  library(ggplot2)

                  mytable <- as.data.frame(table(values_x))
                  #print(mytable)
                  p <- ggplot(mytable, aes(x = reorder(values_x, -order(values_x)), y = Freq)) + geom_bar(stat = "identity")
                  p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))+ coord_flip()
                  print(p)
                dev.off()

            ''' % (output_path))
Пример #19
0
 def model_drm(fit_name,dose,response,fixed=''):
     ro.r.assign('dose',numpy2ri(dose))
     ro.r.assign('response',numpy2ri(response))
     if fixed:
         fixed = 'fixed='+list2r(list(fixed))
     fit_fct = ro.r(fit_name+'('+fixed+')')
     try:
         model = drc.drm(ro.Formula('response~dose'),fct=fit_fct)
         return model
     except RRuntimeError, re:
         return "R: "+str(re)
Пример #20
0
 def build_vine(self):
     """After being initialized, the vine copula is created.
     """
     r_structure = numpy2ri(self._structure)
     r_family = numpy2ri(permute_params(
         self._family_changed, self._structure))
     r_par = numpy2ri(permute_params(self._param1, self._structure))
     r_par2 = numpy2ri(permute_params(self._param2, self._structure))
     self._rvine = R_VINECOPULA.RVineMatrix(
         r_structure, r_family, r_par, r_par2)
     self._to_rebuild = False
Пример #21
0
def plot_tediff_supplement(r1_hela, r1_human, r2_hela, r2_human):

    r1hela = numpy2ri(r1_hela)
    r.assign('r1hela', r1hela)
    r1human = numpy2ri(r1_human)
    r.assign('r1human', r1human)
    r2hela = numpy2ri(r2_hela)
    r.assign('r2hela', r2hela)
    r2human = numpy2ri(r2_human)
    r.assign('r2human', r2human)

    r(' source("src/R/figure_supplement_nopt.R") ')
Пример #22
0
def model_selection(data):
    """:param data: list of tuples (dose,response,experiment)"""
    dose,response,experiment = asarray(zip(*data))
    ro.r.assign('dose',numpy2ri(dose))
    ro.r.assign('response',numpy2ri(response))
    ro.r.assign('experiment',numpy2ri(experiment))
    bmdrcdata = ro.r('data.frame(dose=dose,response=response,experiment=experiment)')
    selected_models = ro.r('bestModel')(bmdrcdata)
    if selected_models == ro.rinterface.NULL: # No model found
        selected_model = None
    else:
        selected_model = selected_models[0]
    return selected_model
Пример #23
0
def scatterplot(X,Y,output=None,format='pdf',new=True,last=True,ratio=1.375,**kwargs):
    """Creates a scatter plot of X vs Y.
     If Y is a list of arrays, a different color will be used for each of them."""
    plotopt,output = _begin(output=output,format=format,new=new,ratio=ratio,**kwargs)
    robjects.r.assign('xdata',numpy2ri.numpy2ri(X))
    if not(isinstance(Y,(list,tuple))): Y = [Y]
    robjects.r.assign('ydata',numpy2ri.numpy2ri(Y[0]))
    robjects.r("plot(xdata,ydata%s)" %plotopt)
    for n in range(1,len(Y)):
        robjects.r.assign('ydata',numpy2ri.numpy2ri(Y[n]))
        robjects.r("points(xdata,ydata,col=%i)" %(n+1))
    _end(",pch=20",last,**kwargs)
    return output
Пример #24
0
 def model_drm(fit_name,_data,fixed=''):
     data_array = asarray(zip(*_data))
     dose = data_array[0]; response = data_array[1]
     ro.r.assign('dose',numpy2ri(dose))
     ro.r.assign('response',numpy2ri(response))
     if fixed:
         fixed = 'fixed='+list2r(list(fixed))
     fit_fct = ro.r(fit_name+'('+fixed+')')
     try:
         model = drc.drm(ro.Formula('response~dose'),fct=fit_fct)
         return model
     except RRuntimeError, re:
         return "R: "+str(re)
Пример #25
0
def plot_sites_supplement(data_stab, data_agg, data_hyd):

    stab = numpy2ri(data_stab)
    r.assign('stab', stab)
    agg = numpy2ri(data_agg)
    r.assign('agg', agg)
    hyd = numpy2ri(data_hyd)
    r.assign('hyd', hyd)

    r(' source("src/R/figure_supplement_sites.R") ')
    r(' source("src/R/figure_supplement_P1_stab.R") ')
    r(' source("src/R/figure_supplement_P1_agg.R") ')
    r(' source("src/R/figure_supplement_P1_hyd.R") ')
Пример #26
0
def plot_entropy_supplement(ent_wt, ent_ga, div_wt, div_ga):

    ent_wt = numpy2ri(ent_wt)
    ent_ga = numpy2ri(ent_ga)
    div_wt = numpy2ri(div_wt)
    div_ga = numpy2ri(div_ga)

    r.assign('entwt', ent_wt)
    r.assign('entga', ent_ga)
    r.assign('divwt', div_wt)
    r.assign('divga', div_ga)

    r(' source("src/R/figure_supplement_entropy.R") ')
Пример #27
0
 def __call__(self, casecon, genotype, **kwargs):
     casecon = robjects.IntVector(casecon)
     genotype = nri.numpy2ri(genotype)
     res = self.fn(robjects.IntVector(casecon), nri.numpy2ri(genotype),
                   **kwargs)
     res = dict(res.iteritems())
     # convert from rpy2 stuffs to python dict.
     skey = [k for k in res if k.endswith('.stat')][0]
     perm_p = res['perm.pval'][0]
     asym_p = res.get('asym.pval', [None])[0]
     return dict(stat=res[skey][0],
                 perm_p=perm_p,
                 asym_p=asym_p,
                 function=self.function_name)
Пример #28
0
    def _plot_pdf(self,filename,stats,title=""):
        import rpy2.robjects as robjects
        import rpy2.robjects.numpy2ri as numpy2ri
        robjects.r('pdf("%s",paper="a4",height=8,width=8)' %filename)
        for chrom,st in stats.iteritems():
            if chrom:
                _title = title+":"+chrom
            else:
                _title = title
            if 'feat_stats' in st:
                fst = st['feat_stats']
                robjects.r.assign('len',numpy2ri.numpy2ri(fst[1].keys()))
                robjects.r.assign('num',numpy2ri.numpy2ri(fst[1].values()))
                robjects.r.assign('ylim',max(10,fst[0]))
                robjects.r.assign('med',fst[2][5])
                robjects.r.assign('men',fst[2][3])
                robjects.r.assign('sdv',fst[2][4])
                robjects.r("""
ypos=1
len=as.numeric(len)
num=as.numeric(num)
par(lwd=2,cex=1.1,cex.main=1.5,cex.lab=1.3,cex.axis=.8,mar=c(5,5,1,1),las=1,pch=20)
plot(len,num,type='h',main='%s',xlab='Feature Length',ylab='Frequency',ylim=c(1,ylim),log='y')
abline(v=med,col='red')
text(med,ylim,paste("median",med,sep="="),col='red',pos=4)
abline(h=ylim[1],col='green')
mtext(paste(ylim[1],"features"),side=2,at=10,col='green',las=1)
arrows(men-sdv,ypos,men+sdv,ypos,angle=90,code=3,length=.15,col='blue')
points(men,ypos,pch=19,col='blue')
"""%_title)
            if 'score_stats' in st:
                sst = st['score_stats']
                robjects.r.assign('score',numpy2ri.numpy2ri(sst[0].keys()))
                robjects.r.assign('num',numpy2ri.numpy2ri(sst[0].values()))
                robjects.r.assign('med',sst[1][5])
                robjects.r.assign('men',sst[1][3])
                robjects.r.assign('sdv',sst[1][4])
                robjects.r("""
ypos=1
score=as.numeric(score)
num=as.numeric(num)
par(lwd=2,cex=1.1,cex.main=1.5,cex.lab=1.3,cex.axis=0.8,mar=c(5,5,1,1),las=1,pch=20)
plot(score,num,type='h',main='%s',xlab='Score',ylab='Frequency',log='y')
abline(v=med,col='red')
text(med,ylim[1],paste("median",med,sep="="),col='red',pos=4)
arrows(men-sdv,ypos,men+sdv,ypos,angle=90,code=3,length=.15,col='blue')
points(men,ypos,pch=19,col='blue')
"""%_title)
        robjects.r("dev.off()")
        return None
Пример #29
0
    def _plot_pdf(self,filename,stats,title=""):
        import rpy2.robjects as robjects
        import rpy2.robjects.numpy2ri as numpy2ri
        robjects.r('pdf("%s",paper="a4",height=8,width=8)' %filename)
        for chrom,st in stats.iteritems():
            if chrom:
                _title = title+":"+chrom
            else:
                _title = title
            if 'feat_stats' in st:
                fst = st['feat_stats']
                robjects.r.assign('len',numpy2ri.numpy2ri(fst[1].keys()))
                robjects.r.assign('num',numpy2ri.numpy2ri(fst[1].values()))
                robjects.r.assign('ylim',max(10,fst[0]))
                robjects.r.assign('med',fst[2][5])
                robjects.r.assign('men',fst[2][3])
                robjects.r.assign('sdv',fst[2][4])
                robjects.r("""
ypos=1
len=as.numeric(len)
num=as.numeric(num)
par(lwd=2,cex=1.1,cex.main=1.5,cex.lab=1.3,cex.axis=.8,mar=c(5,5,1,1),las=1,pch=20)
plot(len,num,type='h',main='%s',xlab='Feature Length',ylab='Frequency',ylim=c(1,ylim),log='y')
abline(v=med,col='red')
text(med,ylim,paste("median",med,sep="="),col='red',pos=4)
abline(h=ylim[1],col='green')
mtext(paste(ylim[1],"features"),side=2,at=10,col='green',las=1)
arrows(men-sdv,ypos,men+sdv,ypos,angle=90,code=3,length=.15,col='blue')
points(men,ypos,pch=19,col='blue')
"""%_title)
            if 'score_stats' in st:
                sst = st['score_stats']
                robjects.r.assign('score',numpy2ri.numpy2ri(sst[0].keys()))
                robjects.r.assign('num',numpy2ri.numpy2ri(sst[0].values()))
                robjects.r.assign('med',sst[1][5])
                robjects.r.assign('men',sst[1][3])
                robjects.r.assign('sdv',sst[1][4])
                robjects.r("""
ypos=1
score=as.numeric(score)
num=as.numeric(num)
par(lwd=2,cex=1.1,cex.main=1.5,cex.lab=1.3,cex.axis=0.8,mar=c(5,5,1,1),las=1,pch=20)
plot(score,num,type='h',main='%s',xlab='Score',ylab='Frequency',log='y')
abline(v=med,col='red')
text(med,ylim[1],paste("median",med,sep="="),col='red',pos=4)
arrows(men-sdv,ypos,men+sdv,ypos,angle=90,code=3,length=.15,col='blue')
points(men,ypos,pch=19,col='blue')
"""%_title)
        robjects.r("dev.off()")
        return None
Пример #30
0
def model_selection(data):
    """:param data: list of tuples (dose,response,experiment)"""
    dose, response, experiment = asarray(zip(*data))
    ro.r.assign('dose', numpy2ri(dose))
    ro.r.assign('response', numpy2ri(response))
    ro.r.assign('experiment', numpy2ri(experiment))
    bmdrcdata = ro.r(
        'data.frame(dose=dose,response=response,experiment=experiment)')
    selected_models = ro.r('bestModel')(bmdrcdata)
    if selected_models == ro.rinterface.NULL:  # No model found
        selected_model = None
    else:
        selected_model = selected_models[0]
    return selected_model
Пример #31
0
 def model_drm(fit_name, _data, fixed=''):
     data_array = asarray(zip(*_data))
     dose = data_array[0]
     response = data_array[1]
     ro.r.assign('dose', numpy2ri(dose))
     ro.r.assign('response', numpy2ri(response))
     if fixed:
         fixed = 'fixed=' + list2r(list(fixed))
     fit_fct = ro.r(fit_name + '(' + fixed + ')')
     try:
         model = drc.drm(ro.Formula('response~dose'), fct=fit_fct)
         return model
     except RRuntimeError, re:
         return "R: " + str(re)
Пример #32
0
def plot_average_coverage(wt, ga):

    wt = np.sum(wt, 1)
    ga = np.sum(ga, 1)

    data_wt = numpy2ri(wt)
    data_ga = numpy2ri(ga)

    r.assign('wt', data_wt)
    r.assign('ga', data_ga)

    r(' wt <- as.matrix(wt) ')
    r(' ga <- as.matrix(ga) ')
    r(' source("src/R/figure_coverage.R") ')
Пример #33
0
def RCopula():
    import rpy2.robjects as ro
    from rpy2.robjects.numpy2ri import numpy2ri
    from rpy2.robjects.packages import importr
    copula = importr('copula')

    n_rv, n_dim = 6, 2
    data = np.random.rand(n_rv, n_dim)
    data2 = np.random.rand(n_rv / 2, n_dim)
    print "data:\n", data
    print "data2:\n", data2

    print copula.C_n(numpy2ri(data), numpy2ri(data2))
    mycopula = buildEmpiricalCopula(data)
    print mycopula
Пример #34
0
def RCopula():
    import rpy2.robjects as ro
    from rpy2.robjects.numpy2ri import numpy2ri
    from rpy2.robjects.packages import importr
    copula = importr('copula')
    
    n_rv, n_dim = 6, 2
    data = np.random.rand(n_rv, n_dim)
    data2 = np.random.rand(n_rv/2, n_dim)
    print "data:\n", data
    print "data2:\n", data2

    print copula.C_n(numpy2ri(data), numpy2ri(data2))
    mycopula = buildEmpiricalCopula(data)
    print mycopula 
Пример #35
0
def py2ri_pandasseries(obj):
    if obj.dtype == '<M8[ns]':
        # time series
        d = [
            IntVector([x.year for x in obj]),
            IntVector([x.month for x in obj]),
            IntVector([x.day for x in obj]),
            IntVector([x.hour for x in obj]),
            IntVector([x.minute for x in obj]),
            IntVector([x.second for x in obj])
        ]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        res = numpy2ri.numpy2ri(obj.values)
    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names',
                           StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res
Пример #36
0
def fuzzyCMeans(data, k):
    data_train_matrix = numpy2ri(data)
    results = e1071.cmeans(data_train_matrix, k)
    centers = np.array(results.rx2('centers'))
    membership = np.array(results.rx2('membership'))
    withinerror = np.array(results.rx2('withinerror'))
    return withinerror
Пример #37
0
def R_reconstruction(series, tau, m):
    '''
    http://cran.r-project.org/web/packages/tseriesChaos/tseriesChaos.pdf
    embedd(x, m, d, lags)
    '''
    res =  R_tseriesChaos.embedd(numpy2ri(series), m, tau)
    print type(res), np.asmatrix(res)
Пример #38
0
def plot_hyddiff(data):

    d = numpy2ri(data)
    r.assign('data', d)

    r(' source("src/R/figure_sites_hyd.R") ')
    r(' source("src/R/figure_P1_hyd.R") ')
Пример #39
0
def dv2_manova(DV1, DV2, IV):
    '''
    '''
    stats = importr('stats')

    formula = R.formula("cbind(factor0, factor1) ~ IV")
    env = formula.environment
    env["factor0"] = numpy2ri(DV1)
    env["factor1"] = numpy2ri(DV2)
    env["IV"] = numpy2ri(IV)
    ols_str = stats.lm(formula)
    results = stats.manova(ols_str)

    #report manova test
    print(R.summary(results, test='Wilks').rx('stats'))
    print(R.summary(R.aov(ols_str)))
Пример #40
0
def generate_images(data,template):
    """:param data: list of tuples (dose,response,experiment)"""
    ro.r("""
    source("graph/R/machPlots.R")
    source("graph/R/processData.R")
    """)
    data_array = asarray(zip(*data))
    dose = data_array[0]; response = data_array[1]; experiment = data_array[2]
    ro.r.assign('dose',numpy2ri(dose))
    ro.r.assign('response',numpy2ri(response))
    ro.r.assign('experiment',numpy2ri(experiment))
    ro.r.assign('outfilename',template)
    ro.r("""
    mydata = data.frame(dose=dose,response=response,experiment=experiment)
    processData(mydata, title="DRM", xlab="Dose", outfilename=outfilename, cooksfilename='', run=3)
    """)
Пример #41
0
def multiple_aa_composition_pca(numpy_matrix, path):
    '''
        # pca of multiple datasets
        # first column = color factor

        :param numpy_matrix:
        :param target_psoition:
        :param path:
        :return:
        '''

    import rpy2.robjects as robjects
    import rpy2.robjects.numpy2ri as numpy2ri
    rpy2.robjects.numpy2ri.activate()

    robjects.r.assign('Mdata', numpy2ri.numpy2ri(numpy_matrix))

    robjects.r('''
                library("FactoMineR")
                library("factoextra")
                print(class(Mdata))
                Mdata[is.na(Mdata)] <- 0
                mat <- as.data.frame(Mdata[,2:length(Mdata[1,])])
                print(head(data.matrix(mat)))
                aa.pca <- PCA(data.matrix(mat), graph = FALSE)
                png("%s", height=600, width=600)
                print(fviz_pca_ind(aa.pca,  label="none", habillage=as.factor(Mdata[,1]))) # ,  label="none", habillage=Mdata[,1]
                dev.off()
        ''' % (path))
def save_matrix_R(filename, matrix):

    rmatrix = npr.numpy2ri(matrix)

    r.assign('data', rmatrix)

    r.save('data', file=filename)
Пример #43
0
def cube_to_r(incube, reverse_dims=True):
    """ Convert a cube or numpy array to a data struct recognised by R """
    """
       Arguments:
         incube:- single data cube 
         reverse_dims : Reverse dimensions? -default True
       Returns :- Data structure that can be passed 
                  to a R function
   """

    from rpy2.robjects.numpy2ri import numpy2ri

    # Check if input data is a cube, otherwise
    # handle it as a numpy array
    if isinstance(incube, iris.cube.Cube):
        in_data = incube.data
    else:
        in_data = incube

    # Reverse dimensions if requested
    if reverse_dims:
        in_data = in_data.transpose()  # numpy method

    # Convert to R structure --recognises only numpy array
    return numpy2ri(in_data)
Пример #44
0
def save_matrix_R(filename, matrix):

    rmatrix = npr.numpy2ri(matrix)

    r.assign('data', rmatrix)

    r.save('data', file=filename)    
Пример #45
0
def process_ccs(cc_in):
    cc = cc_in[:300, :300]
    for i, c in enumerate(cc):
        c[i] = 0
    import rpy2.robjects as robjects
    from rpy2.rlike.container import TaggedList
    from rpy2.robjects.packages import importr

    r = robjects.r

    base = importr("base")
    # create a numerical matrix of size 100x10 filled with NAs
    nc = nr = shape(cc)[0]

    from rpy2.robjects.numpy2ri import numpy2ri

    m = numpy2ri(cc)  # robjects.r['matrix'](v, nrow = nr, ncol = nc)

    biclust = importr("biclust")
    mb = biclust.binarize(m, 0.90)

    # hcv = r.hclust(r.dist(mb))
    # hcv = r.hclust(r.dist(mb))
    # hm = r.heatmap(mb)

    # raise Exception()

    out = biclust.biclust(m, method=biclust.BCPlaid())
    n_bc = out.do_slot("Number")
    rows = array(out.do_slot("RowxNumber"))
    cols = array(out.do_slot("NumberxCol")).T

    return rows, cols, array(m), array(mb)
Пример #46
0
def aa_composition_pca(numpy_matrix, target_psoition, path):
    import rpy2.robjects as robjects
    import rpy2.robjects.numpy2ri as numpy2ri
    rpy2.robjects.numpy2ri.activate()

    robjects.r.assign('Mdata', numpy2ri.numpy2ri(numpy_matrix))

    robjects.r('''
            print(head(Mdata))

             plot_scores<-function(scores,x,y, target){
                 plot(scores[,x],scores[,y],xlab=paste("comp.",as.character(x)),ylab=paste("comp.",as.character(y)), xlim=range(scores[,c(x,y)]),ylim=range(scores[,c(x,y)]),cex=1.5,pch=20)
                     points(scores[target,x],scores[target,y],pch=18, col="red")
                     #text(scores[25,x],scores[25,y],labels="test",col="red",cex=0.9)
                     abline(h=0,col=2)
                     abline(v=0,col=2)
             }

             visual<-function(groups,clustertable, target){
                 pca2 <- princomp(clustertable)

                 par(mfrow=c(1,3),pty="s")
                 plot_scores(pca2$scores,1,2, target)
                 plot_scores(pca2$scores,1,3,target)
                 plot_scores(pca2$scores,2,3, target)
             }
             png("%s", height=500, width=1300)
             visual(c(1,2,3), Mdata, %s)
             dev.off()
        ''' % (path, target_psoition))
Пример #47
0
def dataframe(d):
    """ convert a dict of numbers to an RDataframe  """
    df = {}
    if d is None:
        return robjects.r('as.null()')
    else:
        for k, v in d.iteritems():
            df[k] = numpy2ri(numpy.array(v))
    dataf = robjects.r['data.frame'](**df)
    return dataf
Пример #48
0
def py2ri_pandasindex(obj):
    if obj.dtype.kind == 'O':
        return StrVector(obj)
    else:
        # pandas2ri should definitely not have to know which paths remain to be
        # converted by numpy2ri
        # Answer: the thing is that pandas2ri builds on the conversion
        # rules defined by numpy2ri - deferring to numpy2ri is allowing
        # us to reuse that code.
        return numpy2ri.numpy2ri(obj)
Пример #49
0
def cluster_verbs(target, source, env):
    args = source[-1].read()
    return None
    datas, verbs = pickle.load(open(source[0].rstr(), "rb"))
    data = datas[("verb", "verb_class")]
    data = numpy.transpose(data.T / data.sum(1))
    if "clusters" in args:
        res = stats.kmeans(numpy2ri(data), centers=args["clusters"])
    else:
        tres = numpy.asarray(rcluster.clusGap(numpy2ri(data), FUN=stats.kmeans, K_max=30, B=500).rx2("Tab"))
        gaps = tres[:, 2]
        err = tres[:, 3]    
        best = rcluster.maxSE(numpy2ri(gaps), numpy2ri(err), method="globalmax")
        res = stats.kmeans(numpy2ri(data), centers=best)
    ofd = meta_open(target[0].rstr(), "w")
    for c in set(res.rx2("cluster")):
        ofd.write(" ".join([verbs[i] for i, a in enumerate(res.rx2("cluster")) if a == c]) + "\n")

    return None
Пример #50
0
def smoothScatter(X,Y,output=None,format='png',new=True,last=True,**kwargs):
    """Creates a dotplot of Y values versus X values."""
    plotopt,output = _begin(output=output,format=format,new=new,**kwargs)
    if 'nbin' in kwargs: plotopt += ',nbin=c(%i,%i)' %tuple(kwargs['nbin'])
    if 'bandwidth' in kwargs: plotopt += ',bandwidth=c(%f,%f)' %tuple(kwargs['bandwidth'])
    robjects.r.assign('xdata',numpy2ri.numpy2ri(X))
    robjects.r.assign('ydata',numpy2ri.numpy2ri(Y))
    robjects.r.assign('colrs',
                      robjects.StrVector(kwargs.get("color",["lightgrey","blue","red"])))
    robjects.r("""
library(graphics)
colramp = colorRampPalette(colrs,interpolate="spline")
smoothScatter(xdata,ydata,colramp=colramp%s)
""" %plotopt)
#       library(RColorBrewer)
#       allcols = densCols(xdata,ydata,colramp=colramp)
#       plot(xdata,ydata,pch='.',col=allcols, cex=4%s)""" %plotopt)
    _end("",last,**kwargs)
    return output
Пример #51
0
def cluster_verbs(target, source, env):
    args = source[-1].read()
    verbs, samples = pickle.load(meta_open(source[0].rstr()))
    samples = samples.sum(2)
    data = numpy.transpose(samples.T / samples.sum(1))
    res = stats.kmeans(numpy2ri(data), centers=args.get("clusters", 20)) #data[args["matrix"]].shape[0] / 10)
    ofd = meta_open(target[0].rstr(), "w")
    for c in set(res.rx2("cluster")):
        ofd.write(" ".join([verbs[i] for i, a in enumerate(res.rx2("cluster")) if a == c]) + "\n")
    return None
Пример #52
0
def hist(X,options={},output=None,format='pdf',new=True,last=True,**kwargs):
    """Create a histogram of the values in vector *X*."""
    plotopt,output = _begin(output=output,format=format,new=new,**kwargs)
    rargs = ""
    for opt,val in options.iteritems():
        rargs += ", %s=%s" % (opt,list2r(val))
    robjects.r.assign('X',numpy2ri.numpy2ri(X))
    robjects.r("hist(X %s)" % rargs)
    _end("",last,**kwargs)
    return output
Пример #53
0
def screw_around():

    pi = robj.r['pi']
    print pi 
    print pi+2
    print pi[0]
    print pi[0]+2

    #create fake binned array
    nrow = 5
    ncol = 10
    counter = 0
    binned = np.zeros((nrow, ncol), dtype="float64")
    for row in xrange(nrow):
        for col in xrange(ncol):
            binned[row, col] = counter
            counter += 1
    #print binned
    
    #get binned array into R data.frame
    #vec = robj.FloatVector([1.1, 2.2, 0, 4.4, 5.5, ])
    #print binned.shape
    print numpy2ri(binned)
    rdf = robj.r['data.frame'](numpy2ri(binned), code="ID1000")
    #print rdf

    # now see if we can get R to use this dataframe 
    myRcode = """
    square <- function(rdf) {
        myv = rdf$X2 + rdf$X3
        return(myv)
    }
    doit <- function() {
        source("/srv/scratch/carolyn/Dengue_code/Rtest_rpy.R") 
        run_test_wrap(3)
    }
    """
    print "wwwwah"
    powerpack = SignatureTranslatedAnonymousPackage(myRcode, "powerpack")
    print powerpack._rpy2r.keys() #to reveal the functions within powerpack
    print powerpack.square(rdf) #to run the function "square" found in powerpack
    print powerpack.doit()
Пример #54
0
def save_simmat_R(filename, simmat):

    rmatrix = npr.numpy2ri(simmat.matrix)

    r.assign('data', rmatrix)
    
    r("rownames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r("colnames(%s) <- c%s" % ('data', tuple(simmat.labels)))

    r.save('data', file=filename)    
Пример #55
0
def pandas2ri(obj):
    if isinstance(obj, PandasDataFrame):
        od = OrderedDict()
        for name, values in obj.iteritems():
            if values.dtype.kind == 'O':
                od[name] = StrVector(values)
            else:
                od[name] = pandas2ri(values)
        return DataFrame(od)
    elif isinstance(obj, PandasIndex):
        if obj.dtype.kind == 'O':
            return StrVector(obj)
        else:
            # only other alternative to 'O' is integer, I think,
            # which goes straight to the numpy converter.
            return numpy2ri.numpy2ri(obj)        
    elif isinstance(obj, PandasSeries):
        if obj.dtype == '<M8[ns]':
            # time series
            d = [IntVector([x.year for x in obj]),
                 IntVector([x.month for x in obj]),
                 IntVector([x.day for x in obj]),
                 IntVector([x.hour for x in obj]),
                 IntVector([x.minute for x in obj]),
                 IntVector([x.second for x in obj])]
            res = ISOdatetime(*d)
            #FIXME: can the POSIXct be created from the POSIXct constructor ?
            # (is '<M8[ns]' mapping to Python datetime.datetime ?)
            res = POSIXct(res)
        else:
            # converted as a numpy array
            res = numpy2ri.numpy2ri(obj.values)
        # "index" is equivalent to "names" in R
        if obj.ndim == 1:
            res.do_slot_assign('names', ListVector({'x': pandas2ri(obj.index)}))
        else:
            res.do_slot_assign('dimnames', ListVector(pandas2ri(obj.index)))
        return res
    else:
        return original_py2ri(obj) 
Пример #56
0
def old_cluster_verbs(target, source, env):
    args = source[-1].read()
    #verbs, samples = pickle.load(meta_open(source[0].rstr()))
    #samples = numpy.asarray(samples)
    #samples = samples.sum(2)
    feat = args.get("feat", "class")
    all_data = {}
    for line in open(source[0].rstr()):
        toks = line.strip().split()
        if not toks[0].startswith("_"):
            verb = toks[0]
            other = toks[1]
            vals = [float(x.strip("[],")) for x in toks[2:]]
            if sum(vals) > 0:
                all_data[verb] = all_data.get(verb, {})
                all_data[verb][other] = vals



    data = numpy.zeros(shape=(len(all_data), len(all_data.values()[0]["_%s" % feat])))
    verbs = sorted(all_data.keys())
    for i, verb in enumerate(verbs):
        data[i, :] = all_data[verb]["_%s" % feat]

    


    data = numpy.transpose(data.T / data.sum(1))
    if "clusters" in args:
        res = stats.kmeans(numpy2ri(data), centers=args["clusters"])
    else:
        tres = numpy.asarray(rcluster.clusGap(numpy2ri(data), FUN=stats.kmeans, K_max=30, B=500).rx2("Tab"))
        gaps = tres[:, 2]
        err = tres[:, 3]    
        best = rcluster.maxSE(numpy2ri(gaps), numpy2ri(err), method="globalmax")
        res = stats.kmeans(numpy2ri(data), centers=best)
    ofd = meta_open(target[0].rstr(), "w")
    for c in set(res.rx2("cluster")):
        ofd.write(" ".join([verbs[i] for i, a in enumerate(res.rx2("cluster")) if a == c]) + "\n")
    return None
Пример #57
0
def cluster_by_valex(target, source, env):
    import graphmod as gm
    args = source[-1].read()
    target_verbs = set()
    instances = gm.Instances()
    gm.load_instances(source[0].rstr(), instances)
    for vid in range(instances.get_size("verb_lemma")):
        target_verbs.add(instances.get_name("verb_lemma", vid))
    data = {}
    scfs = {}
    verbs = {}
    for fname in sorted(glob(os.path.join("%s/lex-%s" % (env["VALEX_LEXICON"], args["lexicon"]), "*"))):
        verb = os.path.basename(fname).split(".")[0]
        if verb not in target_verbs:
            continue
        data[verb] = {}
        for m in re.finditer(r":CLASSES \((.*?)\).*\n.*FREQCNT (\d+)", meta_open(fname).read()):
            scf = int(m.group(1).split()[0])
            count = int(m.group(2))
            scfs[scf] = scfs.get(scf, 0) + count
            verbs[verb] = verbs.get(verb, 0) + count
            data[verb][scf] = count
    ddata = numpy.zeros(shape=(len(verbs), len(scfs)))
    verbs = sorted(verbs)
    scfs = sorted(scfs)
    for row, verb in enumerate(verbs):
        for col, scf in enumerate(scfs):
            ddata[row, col] = data[verb].get(scf, 0)

    data = numpy.transpose(ddata.T / ddata.sum(1))
    tres = numpy.asarray(rcluster.clusGap(numpy2ri(data), FUN=stats.kmeans, K_max=30, B=500).rx2("Tab"))
    gaps = tres[:, 2]
    err = tres[:, 3]    
    best = rcluster.maxSE(numpy2ri(gaps), numpy2ri(err), method="globalmax")
    res = stats.kmeans(numpy2ri(data), centers=best)
    ofd = meta_open(target[0].rstr(), "w")
    for c in set(res.rx2("cluster")):
        ofd.write(" ".join([verbs[i] for i, a in enumerate(res.rx2("cluster")) if a == c]) + "\n")
    return None
Пример #58
0
def R_correlationIntegral(series, tau, m, t, r):
    '''
    http://cran.r-project.org/web/packages/tseriesChaos/tseriesChaos.pdf
    
    C2(series, m, d, t, eps)
    series: time series
    m: embedding dimension
    d: time delay
    t: Theiler window
    eps: length scale
    '''
    res =  R_tseriesChaos.C2(numpy2ri(series), m, tau, t, r)
    print res[0]
Пример #59
0
    def fit(self, X, y):
        # Check params
        self.n_features_ = X.shape[1]

        if isinstance(self.max_features, str):
            if self.max_features == "auto":
                max_features = max(1, int(np.sqrt(self.n_features_)))
            elif self.max_features == "sqrt":
                max_features = max(1, int(np.sqrt(self.n_features_)))
            elif self.max_features == "log2":
                max_features = max(1, int(np.log2(self.n_features_)))
            else:
                raise ValueError(
                    'Invalid value for max_features. Allowed string '
                    'values are "auto", "sqrt" or "log2".')
        elif self.max_features is None:
            max_features = self.n_features_
        elif isinstance(self.max_features, (numbers.Integral, np.integer)):
            max_features = self.max_features
        else:  # float
            max_features = int(self.max_features * self.n_features_)

        params = {}
        params["mtry"] = max_features
        params["ntrees"] = self.n_estimators
        params["nodesize"] = self.min_samples_leaf

        # Convert data
        self.classes_ = np.unique(y)
        y = np.searchsorted(self.classes_, y) + 1
        X = numpy2ri(X)
        y = ro.FactorVector(numpy2ri(y))

        # Run
        self.model_ = rf.randomForest(X, y, **params)

        return self
def numpy2ri_avoiding_zerodim(x):
    if hasattr(x, 'shape') and x.shape == ():
        # cast into normal python scalar...sigh
        kinds = {
            'b': bool,
            'u': int,
            'i': int,
            'f': float,
            'c': complex,
        }
        try:
            x = kinds[x.dtype.kind](x)
        except KeyError:
            pass  # just pass it along
    return numpy2ri(x)