Пример #1
0
def fit_nbinom(samples):
    from rpy import r
    r.library('MASS')
    f = r.fitdistr(samples,'negative binomial')
    s,m = f['estimate']['size'],f['estimate']['mu']
    qp = r.qnbinom(r.ppoints(samples),size=s,mu=m)
    return qp,s,m
Пример #2
0
def get_quadprog(lc, trend_set):
    '''
	Return de-trended lc by quadratic programming.
	It constraints the free parameters to be bigger than 0.
	See Kim et al. 2008 for more details.
	
	lc :
		Original light curve of flux.
	
	trend_set :
		Set of trend light curves constructed by create_trend routine.
	
	return :
		De-trended light curve.
	'''

    r.library('quadprog')

    X = transpose(trend_set)
    dmat = r.crossprod(X, X)
    dvec = r.crossprod(lc, X)

    results = r.solve_QP(dmat, dvec, r.diag(len(trend_set)))
    #print results['solution'], results['value']

    return lc - dot(results['solution'], trend_set)
Пример #3
0
def calcKinship(snps):
    """
	Requires EMMA to be installed.
	"""
    a = array(snps)
    r.library("emma")
    return r.emma_kinship(a)
Пример #4
0
def fit_poisson(samples):
    from rpy import r
    r.library('MASS')
    f = r.fitdistr(samples,'poisson')
    l = f['estimate']['lambda'] #predicted mean
    qp = r.qpois(r.ppoints(samples),l)
    return qp,l
Пример #5
0
def fit_gamma(samples):
    from rpy import r
    samples = [double(n) for n in samples if n > 0]#because rpy does not like longs!
    r.library('MASS')
    f = r.fitdistr(samples,'gamma')
    shap,rat = f['estimate']['shape'],f['estimate']['rate']
    qp = r.qgamma(r.ppoints(samples),shape=shap,rate=rat)
    return qp,shape,rat
Пример #6
0
def fit_weibull(samples):
    from rpy import r
    #samples = [double(n) for n in samples if n > 0]#because rpy does not like longs!
    r.library('MASS')
    f = r.fitdistr(samples,'weibull')
    sc,sh = f['estimate']['scale'],f['estimate']['shape']
    qp = r.qweibull(r.ppoints(samples),scale=sc,shape=sh)
    return qp,sc,sh
Пример #7
0
def fit_exponential(samples):
    from rpy import r
    samples = [double(n) for n in samples]#because rpy does not like longs!
    r.library('MASS')
    f = r.fitdistr(samples,'exponential')
    rat = f['estimate']['rate']
    qp = r.qexp(r.ppoints(samples),rate=rat)
    
    return qp, rat
Пример #8
0
    def randomForest_fit(self, known_data, parameter_list, bit_string="1111111"):
        """
		03-17-06
		2006-10-302006-10-30, add avg_degree(vertex_gradient) and unknown_cut_off
		"""
        if self.debug:
            sys.stderr.write("Fitting randomForest...\n")
        mty = parameter_list[0]

        from rpy import r

        r._libPaths(
            os.path.join(lib_path, "R")
        )  # better than r.library("randomForest", lib_loc=os.path.join(lib_path, "R")) (see plone doc)
        r.library("randomForest")

        coeff_name_list = [
            "p_value",
            "recurrence",
            "connectivity",
            "cluster_size",
            "gradient",
            "avg_degree",
            "unknown_ratio",
        ]  # 2006-10-30
        formula_list = []
        for i in range(len(bit_string)):
            if bit_string[i] == "1":
                formula_list.append(coeff_name_list[i])
        formula = r("is_correct~%s" % "+".join(formula_list))

        known_data = array(known_data)
        set_default_mode(NO_CONVERSION)
        data_frame = r.as_data_frame(
            {
                "p_value": known_data[:, 0],
                "recurrence": known_data[:, 1],
                "connectivity": known_data[:, 2],
                "cluster_size": known_data[:, 3],
                "gradient": known_data[:, 4],
                "avg_degree": known_data[:, 5],
                "unknown_ratio": known_data[:, 6],
                "is_correct": r.factor(known_data[:, -1]),
            }
        )  # 03-17-06, watch r.factor	#2006-10-30

        if mty > 0:
            fit = r.randomForest(formula, data=data_frame, mty=mty)
        else:
            fit = r.randomForest(formula, data=data_frame)

        del data_frame
        if self.debug:
            sys.stderr.write("Done fitting randomForest.\n")
        return fit
Пример #9
0
def runEmma(phed, p_i, k, snps):
    # Assume that the accessions are ordered.
    i = phed.getPhenIndex(p_i)
    r.library("emma")
    phenValues = []
    for vals in phed.phenotypeValues:
        phenValues.append(float(vals[i]))
    phenArray = array([phenValues])
    snpsArray = array(snps)
    res = r.emma_REML_t(phenArray, snpsArray, k)
    # print res
    return res
 def compute(self , waveforms , spike_times ,  minG = 1 , maxG = 32,):
     
     #~ from rpy import r 
     
     if not R_available: return None
     
     r.library('mclust')
     
     ret = r.Mclust( waveforms , minG = minG , maxG =maxG)
     
     code = array(ret['classification'] , dtype = 'i')
     
     return code
Пример #11
0
def LinearRegression(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    r.library('MASS')
    k = r.options(warn=-1) ### suppress all warning messages from R
    #print ls1; print ls2
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.rlm(model, data = d) ###errors: rlm failed to converge in 20 steps - maxit=21
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else:
        return slope
Пример #12
0
def LinearRegression(ls1,ls2,return_rsqrd):
    intercept = 0 ### when forced through the origin
    from rpy import r
    r.library('MASS')
    k = r.options(warn=-1) ### suppress all warning messages from R
    #print ls1; print ls2
    d = r.data_frame(x=ls1, y=ls2)
    model = r("y ~ x - 1") ### when not forced through the origin it is r("y ~ x")
    fitted_model = r.rlm(model, data = d) ###errors: rlm failed to converge in 20 steps - maxit=21
    slope = fitted_model['coefficients']['x']
    #intercept = fitted_model['coefficients']['(Intercept)']
    if return_rsqrd == 'yes':
        from scipy import stats
        rsqrd = math.pow(stats.linregress(ls1,ls2)[2],2)
        return slope,rsqrd
    else:
        return slope
Пример #13
0
    def compute(self , waveforms , spike_times ,  centers = 16 , iter_base = 10,
                                                                    base_centers = 30,  hclust_method = "ward") :
        
        
        if not R_available: return None
        
        r.library('e1071')
        
        ret = r.bclust(waveforms , centers = centers , iter_base = iter_base , 
                                                                    base_centers = base_centers, hclust_method = hclust_method)
        #~ print ret
        
        code = numpy.array(ret['cluster'] , dtype = 'i')
        #~ print type(code)

        
        
        return code
Пример #14
0
	def rpart_fit_and_predict(self, all_data, known_data, rpart_cp, loss_matrix, prior_prob, bit_string='11111'):
		"""
		11-09-05
			1st use known_data to get the fit model
			2nd use the fit model to do prediction on all_data, result is prob for each class
		11-09-05 add rpart_cp
		11-17-05
			add loss_matrix, prior_prob
			return two pred
		"""
		sys.stderr.write("rpart fitting and predicting...\n")
		r.library("rpart")
		coeff_name_list = ['p_value', 'recurrence', 'connectivity', 'cluster_size', 'gradient']
		formula_list = []
		for i in range(len(bit_string)):
			if bit_string[i] == '1':
				formula_list.append(coeff_name_list[i])
		#11-17-05 transform into array
		all_data = array(all_data)
		known_data = array(known_data)
		
		set_default_mode(NO_CONVERSION)
		data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \
			"cluster_size":known_data[:,3], "gradient":known_data[:,4], "is_correct":known_data[:,-1]})
		if prior_prob:
			prior_prob = [prior_prob, 1-prior_prob]	#get the full list
			fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\
				parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix) ) )
		else:
			fit = r.rpart(r("is_correct~%s"%'+'.join(formula_list)), data=data_frame, method="class", control=r.rpart_control(cp=rpart_cp),\
				parms=r.list(loss=r.matrix(loss_matrix) ) )
		
		set_default_mode(BASIC_CONVERSION)
		pred_training = r.predict(fit, data_frame, type=["class"])
		del data_frame
		
		set_default_mode(NO_CONVERSION)
		all_data_frame = r.as_data_frame({"p_value":all_data[:,0], "recurrence":all_data[:,1], "connectivity":all_data[:,2], \
			"cluster_size":all_data[:,3], "gradient":all_data[:,4], "is_correct":all_data[:,-1]})
		set_default_mode(BASIC_CONVERSION)
		pred = r.predict(fit, all_data_frame, type=["class"])	#11-17-05 type=c("class")
		del all_data_frame
		sys.stderr.write("Done rpart fitting and predicting.\n")
		return pred, pred_training
Пример #15
0
    def compute(
        self,
        waveforms,
        spike_times,
        minG=1,
        maxG=32,
    ):

        #~ from rpy import r

        if not R_available: return None

        r.library('mclust')

        ret = r.Mclust(waveforms, minG=minG, maxG=maxG)

        code = array(ret['classification'], dtype='i')

        return code
 def _mcmc_betas_same_sources(self, tag_list):
     """
     The given tag_list contains tags that all have the same features
     available. Train on the tags in tag_list using only the songs
     in self.only_these_songs, or all available songs if
     self.only_these_songs is None.
     """
     if not self.production_run:
         self.mcmc_reps = 75 # save time
     rc.library("bayesm")
     data = []
     for tag in tag_list:
         data.append(rc.list(X=self.X[tag],y=self.y[tag]))
     rpy.set_default_mode(rpy.NO_CONVERSION) # Turn off conversion so that lm returns Robj.
     data = rc.list(*data)
     if self.regtype in ["Hierarchical Linear", "Hierarchical Mixture"]:
         Data = rc.list(regdata=data)
     elif self.regtype=="Hierarchical Logistic":
         Data = rc.list(lgtdata=data)
     if self.regtype=="Hierarchical Mixture":
         Prior = rc.list(ncomp=self.ncomp)
     Mcmc=rc.list(R=self.mcmc_reps)
     rpy.set_default_mode(rpy.BASIC_CONVERSION)
     try:
         if self.regtype=="Hierarchical Linear":
             output = rc.rhierLinearModel(Data=Data,Mcmc=Mcmc)
         elif self.regtype=="Hierarchical Logistic":
             output = rc.rhierBinLogit(Data=Data,Mcmc=Mcmc)
         elif self.regtype=="Hierarchical Mixture":
             output = rc.rhierLinearMixture(Data=Data,Prior=Prior,Mcmc=Mcmc)
     except:
         #pdb.set_trace()
         self._info_about_r_error(tag_list)
         return
     beta_matrix = output['betadraw'].mean(axis=2) # nregressions x ncoeffs, averaged along third dim
     matrix_index = 0
     for tag in tag_list:
         cur_tag_beta_vec = beta_matrix[matrix_index,:]
         beta_dict_list = [dict([('beta', coeff)]) for coeff in cur_tag_beta_vec]
         self.beta[tag] = dict(zip(self.sorted_sources[tag],beta_dict_list))
         self.stats[tag] = dict() # I'm not currently storing any stats for hierarchical regressions.
         matrix_index += 1
Пример #17
0
def smooth_data(data):
    sample_data=data[0]
    window_size=data[1]
    for rep_num in range(sample_data.get_number_of_replicates()):
        for chrom in sample_data.get_chromosome_list():
            met_manager = sample_data.get_manager_of_chrom(chrom)
            pos=[]
            m=[]
            cov=[]
            for methyl_c in met_manager:
                pos.append(methyl_c.position)
                m.append(methyl_c.get_methylrate(rep_num))
                cov.append(methyl_c.get_coverage(rep_num))
            r.warnings()
            r.library("locfit")
            r.assign("pos",pos)
            r.assign("m",m)
            r.assign("cov",cov)
            r.assign("h",window_size)
            r("posm=data.frame(pos,m)")
            r("fit=locfit(m~lp(pos,h=h),data=posm,maxk=1000000,weights=cov)")
            r("pp=preplot(fit,where='data',band='local',newdata=data.frame(pos=pos))")
            fit=r("pp")["fit"]
            list=r("unlist(pp$xev$xev)")
            for i, each in enumerate(list):
                position=int(each[0])
                methyl_c=met_manager.get_methyl_c(position)
                if methyl_c:
                    smoothedrate=None
                    if 1 <= fit[i]:
                        smoothedrate=1
                    elif fit[i] <= 0:
                        smoothedrate=0
                    else:
                        smoothedrate=fit[i]
                    methyl_c.update_methylrate(rep_num,smoothedrate)
                else:
                    sys.stderr.write("methyl_c doesn't exist at %d",position)
                    sys.exit(1)
Пример #18
0
    def compute(self,
                waveforms,
                spike_times,
                centers=16,
                iter_base=10,
                base_centers=30,
                hclust_method="ward"):

        if not R_available: return None

        r.library('e1071')

        ret = r.bclust(waveforms,
                       centers=centers,
                       iter_base=iter_base,
                       base_centers=base_centers,
                       hclust_method=hclust_method)
        #~ print ret

        code = numpy.array(ret['cluster'], dtype='i')
        #~ print type(code)

        return code
Пример #19
0
def get_linprog(lc, trend_set):
    '''
	Return de-trended lc by linear programming.
	It constraints the free parameters to be bigger than 0.
	
	lc :
		Original light curve of flux.
	
	trend_set :
		Set of trend light curves constructed by create_trend routine.
	
	return :
		De-trended light curve.
	'''

    r.library('linprog')

    X = transpose(trend_set)
    #dmat = r.crossprod(X, X)
    dvec = r.crossprod(lc, X)

    results = r.solveLP(dvec, zeros([len(trend_set)]), r.diag(len(trend_set)))
    print results['opt'], results['solution']
    sys.exit()
Пример #20
0
# Written by Joyce Tipping <*****@*****.**>
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
#
# This library provides helpful functions for approximating the binomial with the skew normal

from __future__ import division
from rpy import r
import math
r.library('sn')


#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Binomial
#
def binomial_pmf (n, p, pairs=False):
  # Given n and p, returns a binomial pmf in two arrays of xs and ys, respectively
  # If pairs is True, it returns the pmf as an array of points
  xs = r.seq(0, n)
  ys = r.dbinom(xs, n, p)
  return pair(xs, ys) if pairs else {'xs':xs, 'ys':ys}


#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Normal Approximation
#
def normal_pdf (mu, sigma, pairs=False):
  # Given mu and sigma, returns a normal pdf in two arrays of xs and ys, respectively
  # If pairs is True, it returns the pdf as an array of points
  xs = r.seq(mu - 5*sigma, mu + 5*sigma, 0.01)
  ys = r.dnorm(xs, mu, sigma)
  return pair(xs, ys) if pairs else {'xs':xs, 'ys':ys}
Пример #21
0
    def _heatmap_R(self,labels,filename=None,format='pdf',**kwargs):
        """Plot a clustered heat map using R."""
        # Note on coloring: test if data is z-score normalized and if
        #  true, force scale='row' for the heat map so that this is
        #  reflected in the legend (in fact, the z-score is recomputed
        #  over the data for heatmap.2 coloring but it looks identical
        #  and in any case, the clustering is done on the original
        #  data --- which is NOT clear from the heatmap docs)
        try:
            from rpy import r, RException
        except ImportError:
            from rpy2.rpy_classic import r, RException

        hm_args = dict(scale='none',
                       margins=(10,10), # space for long labels
                       N_colors=32,
                       )
        hm_args.update(kwargs)
        N_colors = hm_args.pop('N_colors')  # N_colors not a true heatmap argument
        if filename is not None:
            interactive = False
            def r_format():
                r[format](filename)
            def r_dev_off():
                r.dev_off()
        else:
            interactive = True
            def r_format():
                try:
                    r.X11()
                except RException:
                    r.quartz()
            def r_dev_off():
                msg(1,"Interactive R display. Kill with hop.analysis.kill_R()")
                pass
        if self.normalization_method is 'zscore' and hm_args['scale'] is not 'row':
            hm_args['scale'] = 'row'
            msg(3,"Forcing scale='row' for z-score normalized heat map.\n")
            # (This only has the effect to put the label 'Row Z-score' in the graph)
        try:
            r.library('colorRamps')
            r_color = r.matlab_like(N_colors) # getting somewhat close to matplotlib 'jet'
        except RException:
            msg(1,"For matplotlib-like colors install the R-package 'colorRamps':\n"
                ">>> import rpy\n"
                ">>> rpy.r.install_packages('colorRamps',type='source')")
            r_color = r.topo_colors(N_colors)
        try:
            r.library('gplots')
            r_heatmap = r.heatmap_2
            hm_args.update(dict(key=r.TRUE, symkey=r.FALSE,
                                density_info='histogram', trace='none'))
        except RException:
            msg(1,"For heatmap with a legend install the R-package 'gplots' via \n"
                ">>> import rpy\n"
                ">>> rpy.r.install_packages('gplots',type='source')")
            r_heatmap = r.heatmap

        r_format()
        r_heatmap(self.heatmap,
                  labRow=labels['observables'],
                  labCol=labels['columns'],
                  col=r_color,
                  **hm_args)
        r_dev_off()
Пример #22
0
    def rpart_fit(self, known_data, parameter_list, bit_string="11111"):
        """
		11-09-05
			1st use known_data to get the fit model
			2nd use the fit model to do prediction on all_data, result is prob for each class
		11-09-05 add rpart_cp
		11-17-05
			add loss_matrix, prior_prob
			return two pred
		11-23-05
			split fit and predict. rpart_fit_and_predict() is split into rpart_fit() and rpart_predict()
		11-27-05
			r cleanup
		03-17-06
			use parameter_list instead
		"""
        if self.debug:
            sys.stderr.write("Doing rpart_fit...\n")
            # 03-17-06
        rpart_cp, loss_matrix, prior_prob = parameter_list

        # 11-27-05 r cleanup
        from rpy import r

        r.library("rpart")

        coeff_name_list = ["p_value", "recurrence", "connectivity", "cluster_size", "gradient"]
        formula_list = []
        for i in range(len(bit_string)):
            if bit_string[i] == "1":
                formula_list.append(coeff_name_list[i])
                # 11-17-05 transform into array
        known_data = array(known_data)

        set_default_mode(NO_CONVERSION)
        data_frame = r.as_data_frame(
            {
                "p_value": known_data[:, 0],
                "recurrence": known_data[:, 1],
                "connectivity": known_data[:, 2],
                "cluster_size": known_data[:, 3],
                "gradient": known_data[:, 4],
                "is_correct": known_data[:, -1],
            }
        )
        if prior_prob:
            prior_prob = [prior_prob, 1 - prior_prob]  # get the full list
            fit = r.rpart(
                r("is_correct~%s" % "+".join(formula_list)),
                data=data_frame,
                method="class",
                control=r.rpart_control(cp=rpart_cp),
                parms=r.list(prior=prior_prob, loss=r.matrix(loss_matrix)),
            )
        else:
            fit = r.rpart(
                r("is_correct~%s" % "+".join(formula_list)),
                data=data_frame,
                method="class",
                control=r.rpart_control(cp=rpart_cp),
                parms=r.list(loss=r.matrix(loss_matrix)),
            )
        del data_frame
        if self.debug:
            sys.stderr.write("Done rpart_fit.\n")
        return fit
Пример #23
0
    def __init__(self, y, design, model_type=r.lm, **kwds):
        ''' Set up and estimate R model with data and design '''
        r.library('MASS')  # still needs to be in test, but also here for
        # logical tests at the end not to show an error
        self.y = np.array(y)
        self.design = np.array(design)
        self.model_type = model_type
        self._design_cols = [
            'x.%d' % (i + 1) for i in range(self.design.shape[1])
        ]
        # Note the '-1' for no intercept - this is included in the design
        self.formula = r('y ~ %s-1' % '+'.join(self._design_cols))
        self.frame = r.data_frame(y=y, x=self.design)
        rpy.set_default_mode(rpy.NO_CONVERSION)
        results = self.model_type(self.formula, data=self.frame, **kwds)
        self.robj = results  # keep the Robj model so it can be
        # used in the tests
        rpy.set_default_mode(rpy.BASIC_CONVERSION)
        rsum = r.summary(results)
        self.rsum = rsum
        # Provide compatible interface with scipy models
        self.results = results.as_py()

        #        coeffs = self.results['coefficients']
        #        self.beta0 = np.array([coeffs[c] for c in self._design_cols])
        self.nobs = len(self.results['residuals'])
        if isinstance(self.results['residuals'], dict):
            self.resid = np.zeros((len(self.results['residuals'].keys())))
            for i in self.results['residuals'].keys():
                self.resid[int(i) - 1] = self.results['residuals'][i]
        else:
            self.resid = self.results['residuals']
        self.fittedvalues = self.results['fitted.values']
        self.df_resid = self.results['df.residual']
        self.params = rsum['coefficients'][:, 0]
        self.bse = rsum['coefficients'][:, 1]
        self.bt = rsum['coefficients'][:, 2]
        try:
            self.pvalues = rsum['coefficients'][:, 3]
        except:
            pass
        self.rsquared = rsum.setdefault('r.squared', None)
        self.rsquared_adj = rsum.setdefault('adj.r.squared', None)
        self.aic_R = rsum.setdefault('aic', None)
        self.fvalue = rsum.setdefault('fstatistic', None)
        if self.fvalue and isinstance(self.fvalue, dict):
            self.fvalue = self.fvalue.setdefault('value', None)  # for wls
        df = rsum.setdefault('df', None)
        if df:  # for RLM, works for other models?
            self.df_model = df[0] - 1  # R counts intercept
            self.df_resid = df[1]
        self.bcov_unscaled = rsum.setdefault('cov.unscaled', None)
        self.bcov = rsum.setdefault('cov.scaled', None)
        if 'sigma' in rsum:
            self.scale = rsum['sigma']
        elif 'dispersion' in rsum:
            self.scale = rsum['dispersion']
        else:
            self.scale = None
        self.llf = r.logLik(results)

        if model_type == r.glm:
            self.getglm()
        if model_type == r.rlm:
            self.getrlm()
Пример #24
0
    def __init__(self, y, design, model_type=r.lm, **kwds):
        """ Set up and estimate R model with data and design """
        r.library("MASS")  # still needs to be in test, but also here for
        # logical tests at the end not to show an error
        self.y = np.array(y)
        self.design = np.array(design)
        self.model_type = model_type
        self._design_cols = ["x.%d" % (i + 1) for i in range(self.design.shape[1])]
        # Note the '-1' for no intercept - this is included in the design
        self.formula = r("y ~ %s-1" % "+".join(self._design_cols))
        self.frame = r.data_frame(y=y, x=self.design)
        rpy.set_default_mode(rpy.NO_CONVERSION)
        results = self.model_type(self.formula, data=self.frame, **kwds)
        self.robj = results  # keep the Robj model so it can be
        # used in the tests
        rpy.set_default_mode(rpy.BASIC_CONVERSION)
        rsum = r.summary(results)
        self.rsum = rsum
        # Provide compatible interface with scipy models
        self.results = results.as_py()

        #        coeffs = self.results['coefficients']
        #        self.beta0 = np.array([coeffs[c] for c in self._design_cols])
        self.nobs = len(self.results["residuals"])
        if isinstance(self.results["residuals"], dict):
            self.resid = np.zeros((len(list(self.results["residuals"].keys()))))
            for i in list(self.results["residuals"].keys()):
                self.resid[int(i) - 1] = self.results["residuals"][i]
        else:
            self.resid = self.results["residuals"]
        self.fittedvalues = self.results["fitted.values"]
        self.df_resid = self.results["df.residual"]
        self.params = rsum["coefficients"][:, 0]
        self.bse = rsum["coefficients"][:, 1]
        self.bt = rsum["coefficients"][:, 2]
        try:
            self.pvalues = rsum["coefficients"][:, 3]
        except:
            pass
        self.rsquared = rsum.setdefault("r.squared", None)
        self.rsquared_adj = rsum.setdefault("adj.r.squared", None)
        self.aic_R = rsum.setdefault("aic", None)
        self.fvalue = rsum.setdefault("fstatistic", None)
        if self.fvalue and isinstance(self.fvalue, dict):
            self.fvalue = self.fvalue.setdefault("value", None)  # for wls
        df = rsum.setdefault("df", None)
        if df:  # for RLM, works for other models?
            self.df_model = df[0] - 1  # R counts intercept
            self.df_resid = df[1]
        self.bcov_unscaled = rsum.setdefault("cov.unscaled", None)
        self.bcov = rsum.setdefault("cov.scaled", None)
        if "sigma" in rsum:
            self.scale = rsum["sigma"]
        elif "dispersion" in rsum:
            self.scale = rsum["dispersion"]
        else:
            self.scale = None
        self.llf = r.logLik(results)

        if model_type == r.glm:
            self.getglm()
        if model_type == r.rlm:
            self.getrlm()
Пример #25
0
aa=np.genfromtxt(r'gspc_table.csv',skiprows=0, delimiter=',',names=True)

cl = aa['Close']
ret = np.diff(np.log(cl))[-2000:]*1000.

ggmod = Garch(ret - ret.mean())#hgjr4[:nobs])#-hgjr4.mean()) #errgjr4)
ggmod.nar = 1
ggmod.nma = 1
ggmod._start_params = np.array([-0.1, 0.1, 0.1, 0.1])
ggres = ggmod.fit(start_params=np.array([-0.1, 0.1, 0.1, 0.0]), maxiter=1000,method='bfgs')
print 'ggres.params', ggres.params
garchplot(ggmod.errorsest, ggmod.h, title='Garch estimated')


from rpy import r
r.library('fGarch')
f = r.formula('~garch(1, 1)')
fit = r.garchFit(f, data = ret - ret.mean(), include_mean=False)
f = r.formula('~arma(1,1) + ~garch(1, 1)')
fit = r.garchFit(f, data = ret)


ggmod0 = Garch0(ret - ret.mean())#hgjr4[:nobs])#-hgjr4.mean()) #errgjr4)
ggmod0.nar = 1
ggmod.nma = 1
start_params = np.array([-0.1, 0.1, ret.var()])
ggmod0._start_params = start_params #np.array([-0.6, 0.1, 0.2, 0.0])
ggres0 = ggmod0.fit(start_params=start_params, maxiter=2000)
print 'ggres0.params', ggres0.params

g11res = optimize.fmin(lambda params: -loglike_GARCH11(params, ret - ret.mean())[0], [0.01, 0.1, 0.1])
from util import *
import os
import colors
from rpy import r
import plot_utilities
import copy
import re
import time

r.library("RColorBrewer")

RESULT_FILENAME = '/home/twalter/data/JoanaDruggableResult.csv'
HTML_DIR = '/netshare/mitofiler/Thomas/Joana_HTML/html'
IMAGE_BASE_DIR = '/netshare/mitofiler/Thomas/Joana'
#PICKLERESULTDIR = '/netshare/mitofiler/Thomas/Joana_HTML'

LOCAL_PLOT_DIR = os.path.join(HTML_DIR, 'plots')
NB_ROW = 32
NB_COL = 12


def get_color_brewer_pattern(brew_str):
    hex_pattern = r.brewer_pal(9, brew_str)
    #def hex2int(hexStr): return([int(hexStr[1:3], 16), int(hexStr[3:5], 16), int(hexStr[5:], 16)])
    pattern = [(int(hexStr[1:3], 16) / 255.0, int(hexStr[3:5], 16) / 255.0, int(hexStr[5:], 16) / 255.0) for hexStr in hex_pattern]
    return pattern
    
def convert_numeric_table_to_res(filename=RESULT_FILENAME):
    tabD = readTableFromFile(filename, sep=';', header=True)
    lt_idL = ['%s_%s' % (x[2:], y) for x,y in zip(tabD['Labtek'], tabD['Date']) ]
    idL = ['%s--%03i' % (x, int(y)) for x, y in zip(lt_idL, tabD['Spot'])]
Пример #27
0
        __use_cython__ = False

__use_weave__ = True
if __use_weave__:
    try:
        from scipy import weave
        weave.inline('std::cout << "weave works!" << std::endl;')
    except:
        print "WARNING: Coul not load weave"
        __use_weave__ = False

__use_R__ = True
if __use_R__:
    try:
        from rpy import r as R
        R.library("CircStats")
    except:
        print "WARNING: Could not load R-project"
        __use_R__ = False

#
# load (and reload) modules
#
modules = ['model', 'utils', 'circstats', 'plotlib', 'f_energy']
for name in modules:
    mod = __import__(name, globals(), locals(), [])
    # reload modules (useful during development)
    reload(mod)

# from model import *
# from circstats import *
Пример #28
0
	return data, is_correct_list

known_fname = '/tmp/hs_fim_92m5x25bfsdfl10q0_7gf1.known'
unknown_fname = '/tmp/hs_fim_92m5x25bfsdfl10q0_7gf1.unknown'

known_data, known_is_correct_list = read_data(known_fname)
unknown_data, unknown_is_correct_list = read_data(unknown_fname)

from numarray import array
from rpy import r, set_default_mode,NO_CONVERSION,BASIC_CONVERSION
set_default_mode(NO_CONVERSION)
#pack data into data_frame
known_data = array(known_data)
known_data_frame = r.as_data_frame({"p_value":known_data[:,0], "recurrence":known_data[:,1], "connectivity":known_data[:,2], \
	"cluster_size":known_data[:,3], "gradient":known_data[:,4]})
unknown_data = array(unknown_data)
unknown_data_frame = r.as_data_frame({"p_value":unknown_data[:,0], "recurrence":unknown_data[:,1], "connectivity":unknown_data[:,2], \
	"cluster_size":unknown_data[:,3], "gradient":unknown_data[:,4]})
#start to call randomF.r to run randomForest
r.library('randomForest')
r.source('randomF.r')
#rf_model still needs to be in pure R object
rf_model = r.randomF(known_data_frame, known_data[:,-1])

set_default_mode(BASIC_CONVERSION)
unknown_pred = r.predictRandomF(rf_model, unknown_data_frame)

rf_model= rf_model.as_py(BASIC_CONVERSION)
print rf_model.keys()
print rf_model['confusion']
Пример #29
0
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par):
    """Interpolate point data onto a grid using Kriging.

    Interpolate point data onto a regular rectangular grid of square cells using
    Kriging with a predefined semi-variogram.  The observed data locations must
    be specified in the same projection and coordinate system as the grid, which
    is defined in an ArcGIS raster file.

    Parameters
    ----------
    grid_fname : string
        Filename of an ArcGIS float grid raster defining the required grid to
        Krige onto.  All cells are included regardless of their value.
    obs_x : array_like
        The x coordinates of the observation locations.
    obs_y : array_like
        The y coordinates of the observation locations.
    obs_data : array_like
        The data values at the observation locations.
    vgm : dict
        A dictionary describing the semi-variogram model.  Required keys are:
        'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'}
        'nugget' must be a scalar
        'range' must be a scalar
        'sill' must be a scalar

    Returns
    -------
    kriged_est : 2darray
        A 2D array containing the Kriged estimates at each point on the
        specified rectangular grid.

    Notes
    -----
    This function requires that R, RPy and the R gstat library are correctly
    installed.

    """
    grid, headers = arcfltgrid.read(grid_fname)
    cols = headers[0]
    rows = headers[1]
    x0 = headers[2]
    y0 = headers[3]
    cell_size = headers[4]
    # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner..

    # define the grid (pixel centre's)
    xt, yt = np.meshgrid(np.linspace(x0, x0 + (cols-1)*cell_size, num=cols),
                         np.linspace(y0 + (rows-1)*cell_size, y0, num=rows))

    xt = xt.flatten()
    yt = yt.flatten()

    # Krige using gstat via RPy
    r.library('gstat')
    rpy.set_default_mode(rpy.NO_CONVERSION)

    obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data)
    target_grid = r.data_frame(x=xt, y=yt)

    v = r.vgm(vgm_par['sill'], vgm_par['model'],
              vgm_par['range'], vgm_par['nugget'])

    result = r.krige(r('data ~ 1'), r('~ x + y'),
                     obs_frame, target_grid, model=v)

    rpy.set_default_mode(rpy.BASIC_CONVERSION)

    result = result.as_py()

    kriged_est = np.array(result['var1.pred'])
    kriged_est = kriged_est.reshape(rows, cols)

    return kriged_est
Пример #30
0
    for i in range(n):
        for j in range(m):
            D[i, j] = norm(SS1[:, i] - SS2[:, j])

    return D


if __name__ == "__main__":

    import os
    from dlab import pcmio, labelio
    from pylab import figure, cm, show
    from rpy import r

    r.library("fpc")
    r.library("MASS")

    # FFT parameters
    window = 10.0  # ms
    shift = 2.0
    padding = 5  # number of frames to use for padding the spectrogram; these are cut off later

    # "standard" DTW cost matrix:
    costs = [[1, 1, 1], [1, 0, 1], [0, 1, 1]]
    # tends to produce smoother paths:
    costs = [[1, 1, 1], [1, 0, 1], [0, 1, 1], [1, 2, 2], [2, 1, 2]]
    # prevents more than one frame from being omitted from either signal
    costs = [[1, 1, 1], [1, 2, 2], [2, 1, 2]]

    # example data
Пример #31
0
from plugin import Projections
from rpy import r
from numpy import dot, array
import wx

r.library("fastICA")

class Ica(Projections):
    name = "Ica"
    def Main(self,model):
        # self.model = model
        data = array(model.GetCurrentData()[:])

        k = wx.GetNumberFromUser("ICA Dialog",
                                 "Enter number of independent components",
                                 "k",
                                 1)

        ica_data = r.fastICA(data, k, alg_typ = "deflation", fun = "logcosh", alpha = 1, method = "R", row_norm = 0, maxit = 200, tol = 0.0001, verbose = 1)
        fields = ['Comp%02d' % c for c in range(1, k+1)]
        model.updateHDF('ICA', ica_data['S'], fields=fields)
        
Пример #32
0
def find_group_DW(tree, dist_matrix, mes=0, seed_max=0, l_significance=0.1):
    '''
	For more details, see the Kim et al. 2008.
		
	tree :
		Tree structure returned from Pycluster module.
	
	dist_matrix :
		Distance matrix (= 1. - correlation matrix)
	
	mes = 0 :
		Total number of measurement of each light curve.
	
	seed_max = 0 :
		To get more tighter seed. 1 ~ 10 are good values. '10' gets more tighter clusters than '1'.
		
	return :
		List of clusters.
	'''

    r.library('nortest')

    clusters = []
    #print tree, len(tree)
    density_list = []
    for i in range(len(dist_matrix) - 1):
        for j in range(i + 1, len(dist_matrix)):
            density_list.append(dist_matrix[i][j])
    density_list_clip = sigma_clipping(density_list, sigma=3.)
    overall_density = (max(density_list_clip) -
                       min(density_list_clip)) / len(dist_matrix)
    #print overall_density, mean(density_list_clip), std(density_list_clip)

    #get highly correlated pair of elements.
    initial_seed = []
    for i in range(len(tree)):
        #both left and right element has to be star. not a link to other cluster.
        if tree[i].left >= 0 and tree[i].right >= 0:
            #to get more tight elements.
            if dist_matrix[tree[i].left][
                    tree[i].right] <= median(density_list_clip) / seed_max:
                if mes == 0:
                    initial_seed.append(i)
                elif dist_matrix[tree[i].left][tree[i].right] <= (
                        1. - 3. / math.sqrt(mes)):
                    initial_seed.append(i)
    #print initial_seed

    #start from highly correlated initial pair.
    for i in initial_seed:
        #print tree[i]
        current_node = i
        while current_node < len(tree) - 1:
            cluster_1 = []
            cluster_2 = []
            #find base cluster --> cluster_1
            simplify_group(find_group_with_node_index(tree, current_node),
                           cluster_1)
            #find cluster which will be merged --> cluster_2
            dummy = find_one_side_group(tree, (current_node + 1) * -1)
            current_node = dummy[0]
            simplify_group(dummy[1], cluster_2)

            #check the density changes with overall density
            #initial density
            d_1 = []
            for ele_i in range(len(cluster_1) - 1):
                for ele_j in range(ele_i + 1, len(cluster_1)):
                    if ele_i != ele_j:
                        d_1.append(
                            dist_matrix[cluster_1[ele_i]][cluster_1[ele_j]])
            #density after merged
            d_merge = []
            cluster_3 = hstack([cluster_1, cluster_2])
            for ele_i in range(len(cluster_3) - 1):
                for ele_j in range(ele_i + 1, len(cluster_3)):
                    if ele_i != ele_j:
                        d_merge.append(
                            dist_matrix[cluster_3[ele_i]][cluster_3[ele_j]])

            d_1 = array(d_1)
            d_merge = array(d_merge)
            if len(d_merge) < 8:
                continue
            else:
                #the resulting clusters are almost identical. not use anymore.
                #d_merge = array(d_merge)
                #d_merge = .5 * log((1. + d_merge) / (1. - d_merge))

                ad = r.ad_test(d_merge)
                ad_p = ad['p.value']
                p_value = ad_p

                #check the level of significance
                #if it's out of normality, the previous cluster is the final cluster.
                if p_value < l_significance:
                    #becausd AD test needs at least 8 elements.
                    if len(cluster_1) >= 5:
                        #print cluster_1
                        clusters.append(cluster_1)
                    break
                #it's still gaussian, but if there comes outliers into clusters, stop it.
                #the resulting clusters are almost identical. not use anymore.
                #elif len(d_1[where(d_1 > mean(density_list_clip))]) > 0:
                #	if len(cluster_1) >= 5:
                #		clusters.append(cluster_1)
                #	break

    return clusters
Пример #33
0
        __use_cython__ = False

__use_weave__ = True
if __use_weave__:
    try:
        from scipy import weave
        weave.inline('printf("weave works!\n"')
    except:
        print "WARNING: Could not load weave"
        __use_weave__ = False

__use_R__ = True
if __use_R__:
    try:
        from rpy import r as R
        R.library("CircStats")
    except:
        print "WARNING: Could not load R-project"
        __use_R__ = False

#
# load (and reload) modules
#
modules = ['model','utils','circstats','plotlib','f_energy']
for name in modules:
    mod = __import__(name,globals(),locals(),[])
    # reload modules (useful during development)
    reload(mod)

# from model import *
# from circstats import *
Пример #34
0
    SS1 = ifft(nx.log10(nx.sqrt(S1)), axis=0)
    SS2 = ifft(nx.log10(nx.sqrt(S2)), axis=0)    

    for i in range(n):
        for j in range(m):
            D[i,j] = norm(SS1[:,i] - SS2[:,j])

    return D

if __name__=="__main__":

    import os
    from dlab import pcmio, labelio
    from pylab import figure, cm, show
    from rpy import r
    r.library('fpc')
    r.library('MASS')

    # FFT parameters
    window = 10.  # ms
    shift = 2.
    padding = 5  # number of frames to use for padding the spectrogram; these are cut off later

    # "standard" DTW cost matrix:
    costs = [[1,1,1],[1,0,1],[0,1,1]]
    # tends to produce smoother paths:
    costs = [[1,1,1],[1,0,1],[0,1,1],[1,2,2],[2,1,2]]
    # prevents more than one frame from being omitted from either signal
    costs = [[1,1,1],[1,2,2],[2,1,2]]

    # example data
Пример #35
0
ggmod.nma = 1
start_params = np.array([-0.6, 0.2, 0.1])
ggmod0._start_params = start_params  #np.array([-0.6, 0.1, 0.2, 0.0])
ggres0 = ggmod0.fit(start_params=start_params, method='bfgs', maxiter=2000)
print 'ggres0.params', ggres0.params

g11res = optimize.fmin(
    lambda params: -loglike_GARCH11(params, errgjr4 - errgjr4.mean())[0],
    [0.93, 0.9, 0.2])
print g11res
llf = loglike_GARCH11(g11res, errgjr4 - errgjr4.mean())
print llf[0]

if 'rpyfit' in examples:
    from rpy import r
    r.library('fGarch')
    f = r.formula('~garch(1, 1)')
    fit = r.garchFit(f, data=errgjr4 - errgjr4.mean(), include_mean=False)

if 'rpysim' in examples:
    from rpy import r
    f = r.formula('~garch(1, 1)')
    #fit = r.garchFit(f, data = errgjr4)
    x = r.garchSim(n=500)
    print 'R acf', tsa.acf(np.power(x, 2))[:15]
    arma3 = Arma(np.power(x, 2))
    arma3res = arma3.fit(start_params=[-0.2, 0.1, 0.5], maxiter=5000)
    print arma3res.params
    arma3b = Arma(np.power(x, 2))
    arma3bres = arma3b.fit(start_params=[-0.2, 0.1, 0.5],
                           maxiter=5000,
Пример #36
0
def krige_to_grid(grid_fname, obs_x, obs_y, obs_data, vgm_par):
    """Interpolate point data onto a grid using Kriging.

    Interpolate point data onto a regular rectangular grid of square cells using
    Kriging with a predefined semi-variogram.  The observed data locations must
    be specified in the same projection and coordinate system as the grid, which
    is defined in an ArcGIS raster file.

    Parameters
    ----------
    grid_fname : string
        Filename of an ArcGIS float grid raster defining the required grid to
        Krige onto.  All cells are included regardless of their value.
    obs_x : array_like
        The x coordinates of the observation locations.
    obs_y : array_like
        The y coordinates of the observation locations.
    obs_data : array_like
        The data values at the observation locations.
    vgm : dict
        A dictionary describing the semi-variogram model.  Required keys are:
        'model' can be one of {'Lin', 'Exp', 'Sph', 'Gau'}
        'nugget' must be a scalar
        'range' must be a scalar
        'sill' must be a scalar

    Returns
    -------
    kriged_est : 2darray
        A 2D array containing the Kriged estimates at each point on the
        specified rectangular grid.

    Notes
    -----
    This function requires that R, RPy and the R gstat library are correctly
    installed.

    """
    grid, headers = arcfltgrid.read(grid_fname)
    cols = headers[0]
    rows = headers[1]
    x0 = headers[2]
    y0 = headers[3]
    cell_size = headers[4]
    # TO DO: adjust x0, y0 by 0.5*cell_size if llcorner..

    # define the grid (pixel centre's)
    xt, yt = np.meshgrid(
        np.linspace(x0, x0 + (cols - 1) * cell_size, num=cols),
        np.linspace(y0 + (rows - 1) * cell_size, y0, num=rows))

    xt = xt.flatten()
    yt = yt.flatten()

    # Krige using gstat via RPy
    r.library('gstat')
    rpy.set_default_mode(rpy.NO_CONVERSION)

    obs_frame = r.data_frame(x=obs_x, y=obs_y, data=obs_data)
    target_grid = r.data_frame(x=xt, y=yt)

    v = r.vgm(vgm_par['sill'], vgm_par['model'], vgm_par['range'],
              vgm_par['nugget'])

    result = r.krige(r('data ~ 1'),
                     r('~ x + y'),
                     obs_frame,
                     target_grid,
                     model=v)

    rpy.set_default_mode(rpy.BASIC_CONVERSION)

    result = result.as_py()

    kriged_est = np.array(result['var1.pred'])
    kriged_est = kriged_est.reshape(rows, cols)

    return kriged_est