Exemplo n.º 1
0
    def compute_XD_results(n_components=10, max_iter=500):
        clf = XDGMM(n_components,
                    max_iter=max_iter,
                    tol=1e-03,
                    verbose=False,
                    random_state=None)
        clf.fit(Z, Zerr)

        return clf
Exemplo n.º 2
0
def XD_filter(y , yerr):

    clf = XDGMM(n_components = 2 , n_iter = 4)
    Y = y.reshape(y.shape[0] , 1) 
    Yerr = np.zeros((y.shape[0] , 1, 1))
    #diag = np.arange(Y.shape[-1])
    Yerr[:, 0, 0] = yerr ** 2
    clf.fit(Y , Yerr)

    return clf.mu, clf.V
Exemplo n.º 3
0
 def __init__(self,algorithm='XD',n_comp = 20):
     if algorithm == 'XD':
         self.algorithm='XD'
         self.lQSO_model = XDGMM(n_components=n_comp,verbose=True)
         self.dud_model = XDGMM(n_components=n_comp,verbose=True)
     elif algorithm == 'RandomForest':
         self.algorithm = 'RandomForest'
         self.trialRF = RandomForestClassifier()
         self.RF_params = {'n_estimators':(10,50,200),"max_features": ["auto",2,4],
                       'criterion':["gini","entropy"],"min_samples_leaf": [1,2]}
     return
Exemplo n.º 4
0
def _xdFit(X, XErr, nGauss, n_iter=10):
    gmm = GMM(nGauss, n_iter=n_iter, covariance_type='full').fit(X)
    amp = gmm.weights_
    mean = gmm.means_
    covar = gmm.covars_
    xd.extreme_deconvolution(X, XErr, amp, mean, covar)
    clf = XDGMM(nGauss)
    clf.alpha = amp
    clf.mu = mean
    clf.V = covar
    return clf
Exemplo n.º 5
0
def compute_XD_results(x, y, dx, dy, n_components=6, n_iter=50):
	X = np.vstack([x,y]).T
	Xerr = np.zeros(X.shape + X.shape[-1:])
	diag = np.arange(X.shape[-1])
	Xerr[:, diag, diag] = np.vstack([dx ** 2, dy ** 2]).T
	clf = None
	while clf is None:
		try:
			clf = XDGMM(n_components, n_iter=n_iter,verbose=True)
			clf.fit(X, Xerr)
		except:
			print('Error: Singular Matrix. Retrying...')
			clf = None
	return clf
Exemplo n.º 6
0
    def initialise(self):
        nmeas, ndim = self.data.shape

        lower_idxs = np.tril_indices(ndim, k=-1)
        if self.data_covariances is not None:
            xdgmm = XDGMM(1, 1000, verbose=True)
            xdgmm.fit(self.data, self.data_covariances)
            guess_mu = xdgmm.mu[0]
            guess_Sigma = xdgmm.V[0]
        else:
            gmm = GaussianMixture(1, max_iter=1000, covariance_type='full').fit(self.data)
            guess_mu = gmm.means_[0]
            guess_Sigma = gmm.covariances_[0]
        guess_chol = np.linalg.cholesky(guess_Sigma)
        guess_packed_chol = guess_chol[lower_idxs]
        return guess_mu, guess_Sigma, guess_packed_chol, guess_chol
Exemplo n.º 7
0
def check_single_gaussian(N=100, D=3, sigma=0.1):
    np.random.seed(0)
    mu = np.random.random(D)
    V = np.random.random((D, D))
    V = np.dot(V, V.T)

    X = np.random.multivariate_normal(mu, V, size=N)
    Xerr = np.zeros((N, D, D))
    Xerr[:, range(D), range(D)] = sigma ** 2

    X += np.random.normal(0, sigma, X.shape)

    xdgmm = XDGMM(1)
    xdgmm.fit(X, Xerr)

    # because of sample variance, results will be similar
    # but not identical.  We'll use a fudge factor of 0.1
    assert_allclose(mu, xdgmm.mu[0], atol=0.1)
    assert_allclose(V, xdgmm.V[0], atol=0.1)
Exemplo n.º 8
0
def check_single_gaussian(N=100, D=3, sigma=0.1):
    np.random.seed(0)
    mu = np.random.random(D)
    V = np.random.random((D, D))
    V = np.dot(V, V.T)

    X = np.random.multivariate_normal(mu, V, size=N)
    Xerr = np.zeros((N, D, D))
    Xerr[:, range(D), range(D)] = sigma**2

    X += np.random.normal(0, sigma, X.shape)

    xdgmm = XDGMM(1)
    xdgmm.fit(X, Xerr)

    # because of sample variance, results will be similar
    # but not identical.  We'll use a fudge factor of 0.1
    assert_allclose(mu, xdgmm.mu[0], atol=0.1)
    assert_allclose(V, xdgmm.V[0], atol=0.1)
Exemplo n.º 9
0
 def getMarginalClf(self, cols=None):
     if cols is None:
         raise ValueError(
             "You have to specify the columns you want to keep so that I can marginalizse over the rest."
         )
     rowsV, colsV = np.meshgrid(cols, cols, indexing='ij')
     xdMarginal = XDClf(ngStar=self.ngStar,
                        ngGal=self.ngGal,
                        priorStar=self.priorStar)
     xdMarginal.clfStar = XDGMM(self.ngStar)
     xdMarginal.clfStar.alpha = self.clfStar.alpha
     xdMarginal.clfStar.mu = self.clfStar.mu[:, cols]
     xdMarginal.clfStar.V = self.clfStar.V[:, rowsV, colsV]
     xdMarginal.clfGal = XDGMM(self.ngGal)
     xdMarginal.clfGal.alpha = self.clfGal.alpha
     xdMarginal.clfGal.mu = self.clfGal.mu[:, cols]
     xdMarginal.clfGal.V = self.clfGal.V[:, rowsV, colsV]
     if self.priorStar == 'auto':
         xdMarginal._priorStar = self._priorStar
     return xdMarginal
Exemplo n.º 10
0
def main(ps1_file, g_lim):
    try:
        ps1 = np.load(ps1_file)
    except:
        ps1 = ascii.read(ps1_file)

    cut_ps1 = cut_func(ps1, g_lim=g_lim)
    ps1_c = coord.SkyCoord(ra=cut_ps1['ra']*u.degree,
                           dec=cut_ps1['dec']*u.degree)

    cut_ps1 = cut_ps1[ps1_c.separation(cluster_c) > (0.12*u.degree)]

    # feature and covariance matrices
    X,Xcov = data_to_X_cov(cut_ps1)

    n_clusters = 8
    n_iter = 512

    xd_clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-4, verbose=True)
    xd_clf.fit(X[::100], Xcov[::100])

    # pickle this thing! xd_clf
    with open("xd_control_clf.pickle", "wb") as f:
        pickle.dump(xd_clf, f)
Exemplo n.º 11
0
def test_XDGMM_1D_gaussian(N=100, sigma=0.1):
    np.random.seed(0)
    mu = 0
    V = 1

    X = np.random.normal(mu, V, size=(N, 1))
    X += np.random.normal(0, sigma, size=(N, 1))
    Xerr = sigma**2 * np.ones((N, 1, 1))

    xdgmm = XDGMM(1).fit(X, Xerr)

    # because of sample variance, results will be similar
    # but not identical.  We'll use a fudge factor of 0.1
    assert_allclose(mu, xdgmm.mu[0], atol=0.1)
    assert_allclose(V, xdgmm.V[0], atol=0.1)
Exemplo n.º 12
0
def TryModel(nGaussiansStar, nGaussiansGalaxy): 
    print 'Star Gaussians: {0}'.format(nGaussiansStar)
    print 'Galaxy Gaussians: {0}'.format(nGaussiansGalaxy)

    #convolving
    print 'Estimating Gaussians'
    GMMStar = GMM(nGaussiansStar, n_iter = 10, covariance_type='full').fit(XTrainStar)
    GMMGalaxy = GMM(nGaussiansGalaxy, n_iter=10, covariance_type='full').fit(XTrainGalaxy)

    ampstar = GMMStar.weights_
    meanstar = GMMStar.means_
    covarstar = GMMStar.covars_

    ampgalaxy = GMMGalaxy.weights_
    meangalaxy = GMMGalaxy.means_
    covargalaxy = GMMGalaxy.covars_


    # Results are saved in `amp`, `mean`, and `covar`
    print 'Deconvolving star'

    xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar)

    clfstar = XDGMM(nGaussiansStar)
    clfstar.alpha = ampstar
    clfstar.mu = meanstar
    clfstar.V = covarstar

    print 'Deconvolving galaxies'
    xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy)

    clfgalaxy = XDGMM(nGaussiansGalaxy)
    clfgalaxy.alpha = ampgalaxy
    clfgalaxy.mu = meangalaxy
    clfgalaxy.V = covargalaxy


    print 'Predicting'
    # need to pass XTestStar[i] and XTestGalaxy[i] as np.array([XTestStar[i]]) because internally it assumes 2D matrix
    starPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestStar[i]]), np.array([XErrTestStar[i]]), i) for i in range(starTestNumber)])
    galaxyPredictions = np.array([predictStar(clfstar, clfgalaxy, np.array([XTestGalaxy[i]]), np.array([XErrTestGalaxy[i]]), i) for i in range(galaxyTestNumber)])

    predictions = np.array(starPredictions.tolist() +  galaxyPredictions.tolist())
    results = np.array([1 for i in range(len(starPredictions))] + [0 for i in range(len(galaxyPredictions))])
    report = generateReport(predictions, results)
    return (report['Precision'], report['Recall'], clfstar, clfgalaxy)
Exemplo n.º 13
0
def mixture_fitting(args):
    '''
    component = 0 : u-g, 1: g-r, 2: r-i
    '''
    zmin, zmax, component = args
    zspec, x, xerr, color, color_err = catalog_slicer(zmin, zmax, component)
    
    Y_xd = np.vstack([x,color[component,:]]).T
    Yerr_xd = np.zeros((Y_xd.shape[0] , 2 , 2))
    Yerr_xd[:,0,0] = xerr
    Yerr_xd[:,1,1] = color_err[component,component,:]
    #fitting a two component GMM to (mi , color(component) space in the redshift bin)
    clf_in = XDGMM(2, n_iter=400)
    clf_in.fit(Y_xd, Yerr_xd)
    # mixture component associated with the red population
    red_index = np.where(clf_in.mu[:,1] == clf_in.mu[:,1].max())[0] 
    mu_red , V_red= clf_in.mu[red_index] , clf_in.V[red_index][0]
    red_line = mu_red[0,1] + V_red[0,1]*(Y_xd[:,0] - mu_red[0,0])/V_red[0,0]
    red_scatter = V_red[1,1] - V_red[0,1]**2./V_red[0,0]
    chi_red = (Y_xd[:,1] - red_line)**2. / (red_scatter + Yerr_xd[:,1,1])
    mask = chi_red < 2
    ##UPDATE : I have converged on using g-r for masking purposes!!
    # at this point we don't care which color component was used for masking
    # we keep the masked galaxies (chisq<2) and fit a linear line to the i-colors.
    # this step is agnostic about the color component used for masking 
    # note that we ahve used mu_red[0,0] (the first component of the center of the red galaxies) as m_ref
    x_xd = x[mask]
    xerr_xd = x[mask]

    Y_xd = np.vstack([color[0,mask], color[1,mask], color[2,mask]]).T
    Yerr_xd = np.zeros((Y_xd.shape[0] , 3 , 3))
    for i in xrange(3):
        for j in xrange(3):
            Yerr_xd[:,i,j] = color_err[i,j,mask]
    # fitting a two component GMM to the remainder of galaxies in the three dimensional colorspace
    clf_fi = XDGMM(2, n_iter=400)
    clf_fi.fit(Y_xd, Yerr_xd)
    pure_index = np.where(clf_fi.mu[:,1] == clf_fi.mu[:,1].max())
    mu_pure , V_pure = clf_fi.mu[pure_index] , clf_fi.V[pure_index][0]
    dY_pure = Y_xd - mu_pure
    P = np.linalg.inv(V_pure + Yerr_xd)
    chi = np.einsum('mn,mn->m', np.einsum('ijk,ik->ij', P, dY_pure) , dY_pure)
    pure_mask = chi<2

    zred = zspec[mask][pure_mask]
    zred = zred.reshape(zred.shape[0],1)
    ired = x_xd[pure_mask]
    ired = ired.reshape(ired.shape[0],1)
    eired = xerr_xd[pure_mask]
    eired = ired.reshape(eired.shape[0],1)
    cred = Y_xd[pure_mask]
    ecred = Yerr_xd[pure_mask].reshape(cred.shape[0],cred.shape[1]*cred.shape[1])
    
    return [mu_red[0,0] , np.hstack([zred,ired,eired,cred,ecred])]
Exemplo n.º 14
0
def compute_XD(n_clusters=12, rseed=0, n_iter=100, verbose=True):
    np.random.seed(rseed)
    clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-5, verbose=verbose)
    clf.fit(X, Xcov)
    return clf
Exemplo n.º 15
0
def compute_XD_results(n_components=10, n_iter=500):
    clf = XDGMM(n_components, n_iter=n_iter)
    clf.fit(X, Xerr)
    return clf
Exemplo n.º 16
0
def compute_XD_results(n_components=10, n_iter=500):
    clf = XDGMM(n_components, n_iter=n_iter)
    clf.fit(X, Xerr)
    return clf
Exemplo n.º 17
0
def compute_XD(n_clusters=12, rseed=0, max_iter=100, verbose=True):
    np.random.seed(rseed)
    clf = XDGMM(n_clusters, max_iter=max_iter, tol=1E-5, verbose=verbose)
    clf.fit(X, Xcov)
    return clf
Exemplo n.º 18
0
ampstar = GMMStar.weights_
meanstar = GMMStar.means_
covarstar = GMMStar.covars_

ampgalaxy = GMMGalaxy.weights_
meangalaxy = GMMGalaxy.means_
covargalaxy = GMMGalaxy.covars_


# Results are saved in `amp`, `mean`, and `covar`
print 'Deconvolving star'

xd.extreme_deconvolution(XTrainStar, XErrTrainStar, ampstar, meanstar, covarstar)

clfstar = XDGMM(nGaussiansStar)
clfstar.alpha = ampstar
clfstar.mu = meanstar
clfstar.V = covarstar


print 'Deconvolving galaxies'
xd.extreme_deconvolution(XTrainGalaxy, XErrTrainGalaxy, ampgalaxy, meangalaxy, covargalaxy)

clfgalaxy = XDGMM(nGaussiansGalaxy)
clfgalaxy.alpha = ampgalaxy
clfgalaxy.mu = meangalaxy
clfgalaxy.V = covargalaxy

print 'Predicting'
# need to pass XTestStar[i] and XTestGalaxy[i] as np.array([XTestStar[i]]) because internally it assumes 2D matrix
Exemplo n.º 19
0
class Classifier(object):

    def __init__(self,algorithm='XD',n_comp = 20):
        if algorithm == 'XD':
            self.algorithm='XD'
            self.lQSO_model = XDGMM(n_components=n_comp,verbose=True)
            self.dud_model = XDGMM(n_components=n_comp,verbose=True)
        elif algorithm == 'RandomForest':
            self.algorithm = 'RandomForest'
            self.trialRF = RandomForestClassifier()
            self.RF_params = {'n_estimators':(10,50,200),"max_features": ["auto",2,4],
                          'criterion':["gini","entropy"],"min_samples_leaf": [1,2]}
        return
    
    def train(self,train,truth,covmat=1):
        if self.algorithm == 'XD':
            self.XDtrain(train,truth,covmat)
        elif self.algorithm == 'RandomForest':
            self.RFtrain(train,truth)
        return
    
    def RFtrain(self,train,truth):
        tunedRF = grid_search.GridSearchCV(self.trialRF, self.RF_params,\
                                    n_jobs = -1, cv = 3,verbose=1)
        self.optRF = tunedRF.fit(train, truth)
        return
    
    def XDtrain(self,train,truth,covmat=1):
        self.lQSO_model.fit(train[truth==1], listify(covmat,np.sum(truth)))
        self.dud_model.fit(train[truth==0], listify(covmat,np.sum(1-truth)))
        return
    
    def test(self,test,covmat=1):
        if self.algorithm == 'XD':
            self.XDprobs(test,covmat)
        elif self.algorithm == 'RandomForest':
            self.RFprobs(test)
        return
    
    def RFprobs(self,test):
        self.dud_probs = self.optRF.predict_proba(test)[:,0]
        self.lQSO_probs = self.optRF.predict_proba(test)[:,1]
        return
    
    def XDprobs(self,test,covmat):
        lQSO_like = np.sum(np.exp(self.lQSO_model.logprob_a(test, listify(covmat,test.shape[0]))),axis=1)
        dud_like= np.sum(np.exp(self.dud_model.logprob_a(test, listify(covmat,test.shape[0]))),axis=1)
        self.lQSO_probs = (lQSO_like * lQSO_prior) / (lQSO_like * lQSO_prior + dud_like * dud_prior)
        self.dud_probs =  (dud_like  *  dud_prior) / (lQSO_like * lQSO_prior + dud_like * dud_prior)
        return
    
    def make_roc(self,truth):
        fpr, tpr, _ = metrics.roc_curve(truth,self.lQSO_probs,pos_label=1)
        plt.title('ROC Curve')
        plt.plot(fpr,tpr,'b--')
        plt.xlabel('FPR')
        plt.ylabel('TPR')
        return fpr,tpr
    
    def save(self,pkl_fname='classifiers.pkl'):
        outfile = open(pkl_fname,'wb')
        outDict = {}
        if hasattr(self,'lQSO_model'):
            outDict.update({'lQSO_model':self.lQSO_model})
        if hasattr(self,'dud_model'):
            outDict.update({'dud_model':self.dud_model})
        if hasattr(self,'optRF'):
            outDict.update({'optRF':self.optRF})
        pickle.dump(outDict,outfile)
        outfile.close()
        return
    
    def load(self,pkl_fname='classifiers.pkl'):
        pkl_in = open(pkl_fname,'rb')
        inDict = pickle.load(pkl_in)
        pkl_in.close()
        if 'lQSO_model' in inDict.keys():
            self.lQSO_model = inDict['lQSO_model']
        if 'dud_model' in inDict.keys():
            self.dud_model = inDict['dud_model']
        if 'optRF' in inDict.keys():
            self.optRF = inDict['optRF']
        return
Exemplo n.º 20
0
def compute_XD(n_clusters=2, rseed=0, n_iter=30, verbose=True):
    np.random.seed(rseed)
    clf = XDGMM(n_clusters, n_iter=n_iter, tol=1E-5, verbose=verbose)
    clf.fit(newZ, Zcov)
    return clf
Exemplo n.º 21
0
    gaussian(x, amp3, mu3, sig3, 0)) + y0


#Lee
ids, pmx, pmy, magK, pmex, pmey = np.genfromtxt('PM_final.dat', unpack=True, usecols=(0,3,4,5,8,9))

#Filtra
mag_mask = (magK < max_mag) & (magK > min_mag)
err_mask = (pmex**2 + pmey**2)**0.5 < max_err

dataset = np.vstack([pmx[mag_mask*err_mask], pmy[mag_mask*err_mask]]).T
dataerr = np.zeros(dataset.shape + dataset.shape[-1:])
diag    = np.arange(dataset.shape[-1])
dataerr[:, diag, diag] = np.vstack([pmex[mag_mask*err_mask]**2, pmey[mag_mask*err_mask]**2]).T

clf = XDGMM(compo, itera, verbose=True)
clf.fit(dataset, dataerr)
samples = clf.sample(np.sum(mag_mask*err_mask))

clfu = mixture.VBGMM(compo, covariance_type='full', tol=1e-5, n_iter=1000)
clfu.fit((pmx[mag_mask*err_mask])[:,np.newaxis])
meu  = np.hstack(clfu.means_)
stu  = np.hstack(clfu.precs_)[0]
weu  = np.hstack(clfu.weights_)

clfd = XDGMM(compo, itera, verbose=True)
clfd.fit(pmy[mag_mask*err_mask][:,np.newaxis], (pmey[mag_mask*err_mask]**2)[:,np.newaxis,np.newaxis])
samd = clfd.sample(np.sum(mag_mask*err_mask))

print('Centros:\n', clf.mu)
print('Covarianza:\n', clf.V)