Exemplo n.º 1
0
    def setUp(self):
        """
        Set up each test with a new XDGMM object and some data.
        """
        self.xdgmm = XDGMM(n_components=3)
        self.files = []
        """
        Use scikit-learn GaussianMixture for sampling some data points
        """
        self.gmm = skl_GMM(n_components=3,
                           max_iter=10,
                           covariance_type='full',
                           random_state=None)
        self.gmm.weights_ = np.array([0.3, 0.5, 0.2])
        self.gmm.means_ = np.array(
            [np.array([0, 1]),
             np.array([5, 4]),
             np.array([2, 4])])
        self.gmm.covariances_ = np.array([
            np.diag((2, 1)),
            np.array([[1, 0.2], [0.2, 1]]),
            np.diag((0.3, 0.5))
        ])

        self.gmm.precisions_ = np.linalg.inv(self.gmm.covariances_)
        self.gmm.precisions_cholesky_ = np.linalg.cholesky(
            self.gmm.precisions_)

        self.X = self.gmm.sample(1000)[0]
        errs = 0.2 * np.random.random_sample((1000, 2))
        self.Xerr = np.zeros(self.X.shape + self.X.shape[-1:])
        diag = np.arange(self.X.shape[-1])
        self.Xerr[:, diag, diag] = np.vstack([errs[:, 0]**2, errs[:, 1]**2]).T
def plot_ellipses(X, Xerr, optimal_n_components):
    xdgmm = XDGMM(n_components=2, n_iter=1000)
    plot_results(X, xdgmm.predict(X, Xerr), xdgmm.mu, xdgmm.V, 0,
                 'Gaussian Mixture')
    plt.xlabel('Log(T90)', size=20)
    plt.ylabel('Log(Hardness Ratio)', size=20)
    plt.show()
Exemplo n.º 3
0
    def __init__(self, model_file=None, fit_method='astroML'):

        self.XDGMM = XDGMM(n_components=7, method=fit_method)
        self.fit_method = fit_method

        if model_file is not None:
            self.read_model(model_file)
def get_computed_models(X, Xerr):
    param_range = np.arange(1, 6)
    n_iter = 10**3
    xdgmm = XDGMM(n_iter=n_iter)
    bic, optimal_n_comp, lowest_bic = xdgmm.bic_test(X, Xerr, param_range)
    aic, optimal_n_aic_comp, lowest_aic = xdgmm.aic_test(X, Xerr, param_range)
    print("optimal bic {}".format(optimal_n_comp))
    print("optimal aic {}".format(optimal_n_aic_comp))
    return bic, aic, optimal_n_comp
Exemplo n.º 5
0
def priorSample(ngauss=128, quantile=0.5, iter='8th', survey='2MASS', dataFilename='All.npz', Nsamples=1.2e6, xdgmmFilename='xdgmm.fit', xlabel='X', ylabel='Y', contourColor='k'):

    setup_text_plots(fontsize=16, usetex=True)

    xdgmm = XDGMM(filename=xdgmmFilename)
    figPrior = plt.figure(figsize=(12, 5.5))
    figPrior.subplots_adjust(left=0.1, right=0.95,
                            bottom=0.15, top=0.95,
                            wspace=0.1, hspace=0.1)
    sample = xdgmm.sample(Nsamples)
    negParallax = sample[:,1] < 0
    nNegP = np.sum(negParallax)
    while nNegP > 0:
        sampleNew = xdgmm.sample(nNegP)
        sample[negParallax] = sampleNew
        negParallax = sample[:,1] < 0
        nNegP = np.sum(negParallax)

    samplex = sample[:,0]
    sampley = testXD.absMagKinda2absMag(sample[:,1])
    ax3 = figPrior.add_subplot(121)
    alpha = 0.1
    xlim = [-0.25, 1.25]
    ylim = [6, -6]

    levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2)
    corner.hist2d(samplex, sampley, ax=ax3, levels=levels, bins=200, plot_datapoints=False, no_fill_contours=True, plot_density=False, color=contourColor)
    ax3.scatter(samplex, sampley, s=1, lw=0, c='k', alpha=alpha)

    ax4 = figPrior.add_subplot(122)
    for i in range(xdgmm.n_components):
        points = drawEllipse.plotvector(xdgmm.mu[i], xdgmm.V[i])
        ax4.plot(points[0, :], testXD.absMagKinda2absMag(points[1,:]), 'k-', alpha=xdgmm.weights[i]/np.max(xdgmm.weights))

    titles = ["Extreme Deconvolution\n  resampling",
              "Extreme Deconvolution\n  cluster locations"]

    ax = [ax3, ax4]

    for i in range(2):
        ax[i].set_xlim(xlim)
        ax[i].set_ylim(ylim[0], ylim[1]*1.1)
        ax[i].text(0.05, 0.95, titles[i],
                   ha='left', va='top', transform=ax[i].transAxes, fontsize=18)

        ax[i].set_xlabel(xlabel, fontsize = 18)
        if i in (1, 3):
            ax[i].yaxis.set_major_formatter(plt.NullFormatter())
        else:
            ax[i].set_ylabel(ylabel, fontsize = 18)

    figPrior.savefig('prior_ngauss' + str(ngauss) +'.png')
Exemplo n.º 6
0
def GMM(ai, aj, ak, eai, eaj, eak, n_components=1, method="Bovy", mu=None, V=None, weights=None, fit=True):

	X = np.vstack([ai, aj, ak]).T
	Xerr = np.zeros(X.shape + X.shape[-1:])
	diag = np.arange(X.shape[-1])
	Xerr[:, diag, diag] = np.vstack([eai** 2, eaj ** 2, eak**2]).T

	xdgmm = XDGMM(n_components=n_components, method=method, mu=mu, V=V, weights=weights)

	if fit:
		xdgmm.fit(X, Xerr)

	LogL=xdgmm.logL(X, Xerr)

	return xdgmm, LogL
Exemplo n.º 7
0
    def __init__(self, model_file=None, fit_method='astroML'):

        self.XDGMM = XDGMM(n_components=7, method=fit_method)
        self.fit_method = fit_method

        if model_file is not None:
            self.read_model(model_file)
Exemplo n.º 8
0
def comparePrior():
    ngauss = [512, 128]
    iter = ['1st', '6th']
    color = ['k', 'red']
    label = ['512 Gaussians', '128 Gaussians']
    fig, ax = plt.subplots(1,2, figsize=(12,5))

    for n, i, c, l in zip(ngauss, iter, color, label):
        xdgmmFilename = 'xdgmm.' + str(n) + 'gauss.dQ0.05.' + i + '.2MASS.All.npz.fit'
        xdgmm = XDGMM(filename=xdgmmFilename)

        for gg in range(xdgmm.n_components):
            if xdgmm.weights[gg] == np.max(xdgmm.weights):
                lab = l
            else:
                lab = None
            points = drawEllipse.plotvector(xdgmm.mu[gg], xdgmm.V[gg])
            ax[0].plot(points[0,:],testXD.absMagKinda2absMag(points[1,:]), c, lw=1, alpha=xdgmm.weights[gg]/np.max(xdgmm.weights))
            ax[1].plot(points[0,:], points[1,:], c, lw=1, alpha=xdgmm.weights[gg]/np.max(xdgmm.weights), label=lab)

    for a in ax:
        a.set_xlim(-0.5, 1.5)
        a.set_xlabel(r'$(J - K)^C$')
    ax[0].set_ylabel(r'$M_J^C$')
    ax[1].set_ylabel(r'$\varpi 10^{0.2\,m_J}$')
    ax[0].set_ylim(6, -6)
    ax[1].set_ylim(1100, -100)
    ax[1].legend(loc='lower left', fontsize=10)
    plt.tight_layout()
    fig.savefig('priorNgaussComparison.png')
Exemplo n.º 9
0
    def test_ReadWrite(self):
        self.xdgmm.fit(self.X, self.Xerr)
        self.xdgmm.save_model('test.fit')
        xd2 = XDGMM(filename='test.fit')

        self.assertLess(self.xdgmm.mu[0, 0] - xd2.mu[0, 0], 1e-5)
        self.assertLess(self.xdgmm.V[0, 0, 0] - xd2.V[0, 0, 0], 1e-5)
        self.assertLess(self.xdgmm.weights[0] - xd2.weights[0], 1e-5)
        self.files.append('test.fit')
Exemplo n.º 10
0
def examplePosterior(nexamples=100, postFile='posteriorSimple.npz', dustFile='dust.npz', nPosteriorPoints=1000, xdgmmFilename='xdgmm.fit'):
    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    xdgmm = XDGMM(filename=xdgmmFilename)
    absmag = 'J'
    mag1 = 'J'
    mag2 = 'K'
    ndim = 2
    data = np.load(dustFile)
    dustEBV = data['ebv']
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax'])
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)
    xparallaxMAS = np.logspace(-2, 2, 1000)
    data = np.load(postFile)
    posterior = data['posterior']
    mean = data['mean']
    var = data['var']
    notnans = ~np.isnan(var) & ~np.isnan(tgas['parallax_error'])
    print 'The median of the differences of the logs: ', np.median(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.))

    varDiff = var - tgas['parallax_error']**2.
    ind = np.argsort(varDiff)[::-1]
    for i in ind[0:nexamples]:
        xabsMagKinda = testXD.parallax2absMagKinda(xparallaxMAS, apparentMagnitude[i])
        likelihood = st.gaussian(tgas['parallax'][i], tgas['parallax_error'][i], xparallaxMAS)
        meanPrior, covPrior = testXD.matrixize(color[i], absMagKinda[i], color_err[i], 1e3)
        meanPrior = meanPrior[0]
        covPrior = covPrior[0]
        allMeans, allAmps, allCovs, summedPriorAbsMagKinda = testXD.absMagKindaPosterior(xdgmm, ndim, meanPrior, covPrior, xabsMagKinda, projectedDimension=1, nPosteriorPoints=nPosteriorPoints, prior=True)
        norm = scipy.integrate.cumtrapz(summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i]), x=xparallaxMAS)[-1]
        plotPrior = summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i])/norm
        posteriorFly = likelihood*summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i])
        norm = scipy.integrate.cumtrapz(posteriorFly, x=xparallaxMAS)[-1]
        if norm > 0.0 : posteriorFly = posteriorFly/norm
        plt.clf()
        plt.plot(xparallaxMAS, posterior[i], label='posterior')
        plt.plot(xparallaxMAS, likelihood, label='likelhood')
        plt.plot(xparallaxMAS, plotPrior, label='prior')
        plt.plot(xparallaxMAS, posteriorFly, label='posterior on the Fly')
        plt.xlim(tgas['parallax'][i] - 5.*tgas['parallax_error'][i], tgas['parallax'][i] + 5.*tgas['parallax_error'][i])
        #plt.xscale('log')
        plt.legend(loc='best')
        plt.tight_layout()
        plt.xlabel('parallax [mas]', fontsize=18)
        plt.title('J-K: ' + '{0:.1f}'.format(color[i]) + '    M: ' +  '{0:.1f}'.format(testXD.absMagKinda2absMag(absMagKinda[i])))
        plt.savefig('exampleCMDPosteriorLargerVariance_' + str(i) + '.png')
Exemplo n.º 11
0
def paperComparePrior(ngauss=128, quantile=0.05, iter='10th', survey='2MASS', dataFilename='All.npz', contourColor='k', posteriorColor='royalblue'):
    setup_text_plots(fontsize=16, usetex=True)
    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    xdgmm = XDGMM(filename=xdgmmFilename)
    absmag = 'J'
    mag1 = 'J'
    mag2 = 'K'
    xlabel = '$(J-K)^C$'
    ylabel = r'$M_J^C$'
    xlim = [-0.25, 1.25]
    ylim = [6, -6]

    ndim = 2
    data = np.load(dustFile)
    dustEBV = data['ebv']
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax'])

    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)

    postFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename
    yim = (-1, 5)

    indices = np.random.randint(0, high=len(color), size=1024)

    fig, ax = plt.subplots(1, 2)
    for i, file in enumerate(['posteriorSimple.npz', postFile]):
        data = np.load(file)
        posterior = data['posterior']
        sigma = np.sqrt(data['var'])
        mean = data['mean']
        absMag = testXD.absMagKinda2absMag(mean*10.**(0.2*apparentMagnitude))
        absMagSigma = testXD.absMagKinda2absMag(sigma*10.**(0.2*apparentMagnitude))
        ax[0].scatter(color[indices], absMag[indices])
        ax[0].errorbar(color[indices], absMag[indices], xerr=color_err[inidces], yerr=[absMag/absMagSigma, absMag*absMagSigma], fmt=None, zorder=0, lw=0.5, mew=0, color=posteriorColor))

        ax[0].set_xlabel(xlabel, fontsize=18)
        ax[0].set_ylabel(ylabel, fontsize=18)
        plt.tight_layout()
        #if file == 'posteriorSimple.npz':
        ax[0].set_ylim(ylim)
        ax[0].set_xlim(xlim)
    fig.savefig('comparePriorPaper.png')
Exemplo n.º 12
0
def plotvector(mean, var, step=0.001):
    """
    mean, var should be *projected* to the 2-d space in which plotting is about to occur
    """
    assert mean.shape == (2, )
    assert var.shape == (2, 2)
    ts = np.arange(0, 2. * np.pi, step)  #magic
    w, v = np.linalg.eigh(var)
    ps = np.sqrt(w[0]) * (v[:, 0])[:,None] * (np.cos(ts))[None, :] + \
         np.sqrt(w[1]) * (v[:, 1])[:,None] * (np.sin(ts))[None, :] + \
      mean[:, None]
    return ps


if __name__ == "__main__":
    from xdgmm import XDGMM
    import pylab as plt

    xdgmm = XDGMM(filename='xdgmm.1028gauss.1.2M.fit')
    amps = xdgmm.weights
    mus = xdgmm.mu
    Vs = xdgmm.V

    plt.clf()
    for amp, mean, var in zip(amps, mus, Vs):
        ps = plotvector(mean, var)
        plt.plot(ps[0, :], fixAbsMag(ps[1, :]), "k-", alpha=amp / np.max(amps))
    plt.xlim(-2, 3)
    plt.ylim(10, -6)
    plt.savefig("drawEllipse.png")
Exemplo n.º 13
0
def compareSimpleGaia(ngauss=128, quantile=0.05, iter='10th', survey='2MASS', dataFilename='All.npz', contourColor='k'):
    setup_text_plots(fontsize=16, usetex=True)
    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    xdgmm = XDGMM(filename=xdgmmFilename)
    absmag = 'J'
    mag1 = 'J'
    mag2 = 'K'
    xlabel = '$(J-K)^C$'
    ylabel = r'$M_J^C$'
    xlim = [-0.25, 1.25]
    ylim = [6, -6]

    ndim = 2
    data = np.load(dustFile)
    dustEBV = data['ebv']
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax'])

    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)

    postFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename
    yim = (-1, 5)
    for file in ['posteriorSimple.npz', postFile]:
        data = np.load(file)
        posterior = data['posterior']
        samples = np.zeros(np.shape(posterior)[0])
        xparallaxMAS = np.logspace(-2, 2, np.shape(posterior)[1])
        for i, p in enumerate(posterior):
            try: samples[i] = testXD.samples(xparallaxMAS, p, 1, plot=False)[0]
            except IndexError: samples[i] = -999
        mean = data['mean']
        var = data['var']
        absMag = testXD.absMagKinda2absMag(mean*10.**(0.2*apparentMagnitude))
        absMagSample = testXD.absMagKinda2absMag(samples*10.**(0.2*apparentMagnitude))


        neg = tgas['parallax'] < 0
        fig, ax = plt.subplots(1, 2)
        ax[0].plot(data['mean'][~neg], mean[~neg] - tgas['parallax'][~neg], 'ko', markersize=0.5)
        ax[0].plot(data['mean'][neg], mean[neg] - tgas['parallax'][neg], 'ro', markersize=0.5)
        ax[0].set_xscale('log')
        ax[1].plot(data['mean'][~neg], np.log(var[~neg]) - np.log(tgas['parallax_error'][~neg]**2.), 'ko', markersize=0.5)
        ax[1].plot(data['mean'][neg], np.log(var[neg]) - np.log(tgas['parallax_error'][neg]**2.), 'ro', markersize=0.5)
        ax[1].set_xscale('log')
        ax[0].set_xlabel(r'$E[\varpi]$', fontsize=18)
        ax[1].set_xlabel(r'$E[\varpi]$', fontsize=18)
        ax[0].set_ylabel(r'$E[\varpi] - \varpi$', fontsize=18)
        ax[1].set_ylabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18)
        plt.tight_layout()
        #if file == 'posteriorSimple.npz':
        ax[0].set_ylim(-5, 5)
        ax[1].set_ylim(-6, 2)
        ax[0].set_xlim(1e-1, 1e1)
        ax[1].set_xlim(1e-1, 1e2)
        fig.savefig(file.split('.')[0] + '_Comparison2Gaia.png')
        notnans = ~np.isnan(var) & ~np.isnan(tgas['parallax_error'])
        print 'The median of the differences of the logs: ', np.median(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.))
        cNorm  = plt.matplotlib.colors.Normalize(vmin=-6, vmax=6)
        fig, ax = plt.subplots(1, 2, figsize=(14, 7))
        x = color[notnans]
        y = np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.)
        levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2)
        #(counts, xedges, yedges, Image) = ax[0].hist2d(x, y, bins=100, cmap='Greys', norm=cNorm)
        #figcount, axcounts = plt.subplots()
        #nonzero = counts > 0
        #axcounts.hist(np.log10(counts[nonzero]), log=True)
        #axcounts.set_xlabel('log counts')
        #figcount.savefig('counts.png')
        norm = plt.matplotlib.colors.Normalize(vmin=-1.5, vmax=1)
        cmap = 'inferno'
        ax[0].scatter(x, y, c=y, s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap)
        corner.hist2d(x, y, bins=200, ax=ax[0], levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor)
        #ax[0].scatter(color[notnans], np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), lw=0, s=1, alpha=0.5, c=tesXD.absMagKinda2absMag(absMagKinda[notnans]), norm=cNorm, cmap='plasma')
        ax[0].set_xlabel(r'$(J-K)^c$', fontsize=18)
        ax[0].set_ylim(-6, 2)
        ax[0].set_xlim(-0.5, 2)
        ax[0].set_ylabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18)
        #ax[0].errorbar(color, np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), fmt="none", zorder=0, lw=0.5, mew=0, color='grey')
        cNorm  = plt.matplotlib.colors.Normalize(vmin=0.1, vmax=2)
        ax[1].scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap)
        ax[1].set_xlim(xlim)
        ax[1].set_ylim(ylim)
        ax[1].set_xlabel(xlabel, fontsize=18)
        ax[1].set_ylabel(ylabel, fontsize=18)
        #ax[1].hist(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), bins=100, histtype='step', lw=2, log=True, color='black')
        #ax[1].set_xlabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18)
        #ax[1].set_xlim(-6, 2)
        #ax[1].set_ylim(1,)
        fig.savefig('deltaLogVariance_' + file.split('.')[0] + '.png')

        figVarDiff = plt.figure(figsize=(14,7))

        ax1 = figVarDiff.add_subplot(121)
        ax2 = figVarDiff.add_subplot(122)

        ax1.scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap)
        ax2.scatter(x, absMag[notnans], s=1, lw=0, c=tgas['parallax_error'][notnans]**2., alpha=0.05, cmap=cmap)

        titles = ["Colored by change in variance", "Colored by observed variance"]

        ax = [ax1, ax2]

        for i in range(2):
            ax[i].set_xlim(xlim)
            ax[i].set_ylim(ylim[0], ylim[1]*1.1)

            ax[i].text(0.05, 0.95, titles[i],
                   ha='left', va='top', transform=ax[i].transAxes, fontsize=18)

            ax[i].set_xlabel(xlabel, fontsize = 18)

        #if i in (1, 3):
            #ax[i].yaxis.set_major_formatter(plt.NullFormatter())
        #else:
            ax[i].set_ylabel(ylabel, fontsize = 18)

        figVarDiff.savefig('denoisedVariance_' + file.split('.')[0] + '.png')
        figVarDiff.clf()

        ax1 = figVarDiff.add_subplot(121)
        ax2 = figVarDiff.add_subplot(122)

        ax1.scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap)
        ax2.scatter(x, absMagSample[notnans], s=1, lw=0, c=tgas['parallax_error'][notnans]**2., alpha=0.05, cmap=cmap)

        titles = ["Colored by change in variance", "Colored by observed variance"]

        ax = [ax1, ax2]

        for i in range(2):
            ax[i].set_xlim(xlim)
            ax[i].set_ylim(ylim[0], ylim[1]*1.1)

            ax[i].text(0.05, 0.95, titles[i],
                   ha='left', va='top', transform=ax[i].transAxes, fontsize=18)

            ax[i].set_xlabel(xlabel, fontsize = 18)

        #if i in (1, 3):
            #ax[i].yaxis.set_major_formatter(plt.NullFormatter())
        #else:
            ax[i].set_ylabel(ylabel, fontsize = 18)

        figVarDiff.savefig('denoisedVarianceSamples_' + file.split('.')[0] + '.png')
Exemplo n.º 14
0
class Empiricist(object):
    """
    Worker object that can fit supernova and host galaxy parameters 
    given noisy inputs using an XDGMM model, and then predict new
    supernovae based on this model and a set of new host galaxies.

    Parameters
    ----------
    model_file: string (optional)
        Name of text file containing model being used (default=None).
    fit_method: string (optional)
        Name of XD fitting method to use (default='astroML'). Must be
        either 'astroML' or 'Bovy'.

    Notes
    -----
    The class can be initialized with a model or one can be loaded or
        fit to data.
    """
    def __init__(self, model_file=None, fit_method='astroML'):

        self.XDGMM = XDGMM(n_components=7, method=fit_method)
        self.fit_method = fit_method

        if model_file is not None:
            self.read_model(model_file)

    def get_SN(self, X, Xerr=None, n_SN=1):
        """
        Conditions the XDGMM model based on the data in X and returns
        SN parameters sampled from the conditioned model.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data. First 3 entries (SN parameters) should be NaN.
        Xerr: array_like, shape = (n_samples, n_features), optional
            Error on input data. SN errors should be 0.0. If None,
            errors are not used for the conditioning.
        n_SN: int (optional)
            Number of SNe to sample (default = 1).

        Returns
        -------
        SN_data: array_like, shape = (n_SN, 3)
            Sample of SN data taken from the conditioned model.

        Notes
        -----
        Assumes that the first three parameters used when fitting
            the model are the SN parameters.
        """
        if self.model_file is None:
            raise StandardError("Model parameters not set.")

        if Xerr is None: cond_XDGMM = self.XDGMM.condition(X)
        else: cond_XDGMM = self.XDGMM.condition(X, Xerr)

        return np.atleast_2d(cond_XDGMM.sample(n_SN))

    def fit_model(self,
                  X,
                  Xerr,
                  filename='empiriciSN_model.fit',
                  n_components=6):
        """
        Fits the XD model to data.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
        filename: string (optional)
            Filename for model fit to be saved to (default =
            'empiriciSN_model.fit').
        n_components: float (optional)
            Number of Gaussian components to use (default = 6)

        Notes
        -----
        The specified method and n_components Gaussian components will
        be used (typical BIC-optimized numbers of components for ~100s
        of training datapoints are 6 or 7).

        The fit will be saved in the file with name defined by the 
        filename variable.
        """
        self.XDGMM.n_components = n_components
        self.XDGMM = self.XDGMM.fit(X, Xerr)
        self.XDGMM.save_model(filename)
        self.model_file = filename
        return

    def fit_from_files(self,
                       filelist,
                       filename='empiriciSN_model.fit',
                       n_components=7):
        """
        Fits the XD model to data contained in the files provided.

        Parameters
        ----------
        filelist: array_like
            Array of strings containing names of files containing data
            to fit.
        filename: string (optional)
            Filename for model fit (default = 'empiriciSN_model.fit').
        n_components: float (optional)
            Number of Gaussian components to use (default = 7)
        method: string (optional)
            XD fitting method to use (default = 'astroML')

        Notes
        -----
        The model is fitted using the data contained in the files
        named in the `filelist` variable. This assumes that the data
        files are in the same format as those provided with this code
        and that only redshift, distance from host nucleus, host colors,
        and local host surface brightness are being used for the fit.
        """
        X, Xerr = self.get_data(filelist)
        self.fit_model(X, Xerr, filename=filename, n_components=n_components)
        return

    def read_model(self, filename):
        """
        Reads the parameters of a model from a file.

        Parameters
        ----------
        filename: string
            Name of the file to read from.

        Notes
        -----
        Model parameters are stored in the self.XDGMM model object. 
        The model filename is stored self.model_file.
        """
        self.XDGMM.read_model(filename)
        self.model_file = filename
        return

    def component_test(self, X, Xerr, component_range, no_err=False):
        """
        Test the performance of the model for a range of numbers of
        Gaussian components.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
        component_range: array_like
            Range of n_components to test.
        no_err: bool (optional)
            Flag for whether to calculate the BIC with the errors
            included or not. (default = False)

        Returns
        -------
        bics: array_like, shape = (len(param_range),)
            BIC for each value of n_components
        optimal_n_comp: float
            Number of components with lowest BIC score
        lowest_bic: float
            Lowest BIC from the scores computed.

        Notes
        -----
        Uses the XDGMM.bic_test method to compute the BIC score for
        each n_components in the component_range array.
        """
        bics, optimal_n_comp, lowest_bic = \
            self.XDGMM.bic_test(X, Xerr, component_range, no_err)
        return bics, optimal_n_comp, lowest_bic

    def get_logR(self, cond_indices, R_index, X, Xerr=None):
        """
        Uses a subset of parameters in the given data to condition the
        model and return a sample value for log(R/Re).

        Parameters
        ----------
        cond_indices: array_like
            Array of indices indicating which parameters to use to
            condition the model. Cannot contain [0, 1, 2] since these
            are SN parameters.
        R_index: int
            Index of log(R/Re) in the list of parameters that were used
            to fit the model.
        X: array_like, shape = (n < n_features,)
            Input data.
        Xerr: array_like, shape = (X.shape,) (optional)
            Error on input data. If none, no error used to condition.

        Returns
        -------
        logR: float
            Sample value of log(R/Re) taken from the conditioned model.

        Notes
        -----
        The fit_params array specifies a list of indices to use to
        condition the model. The model will be conditioned and then
        a radius will be drawn from the conditioned model.

        This is so that the radius can then be used to calculate local
        surface brightness to fully condition the model to sample
        likely SN parameters.

        This does not make assumptions about what parameters are being
        used in the model, but does assume that the model has been
        fit already and that the first three parameters in the data
        that were used to fit the model are the SN parameters.
        """
        if self.model_file is None:
            raise StandardError("Model parameters not set.")

        if 0 in cond_indices or 1 in cond_indices or 2 in cond_indices:
            raise ValueError("Cannot condition model on SN parameters.")
        if R_index in cond_indices:
            raise ValueError("Cannot condition model on log(R/Re).")

        cond_data = np.array([])
        if Xerr is not None: cond_err = np.array([])
        R_cond_idx = R_index
        n_features = self.XDGMM.mu.shape[1]
        j = 0

        for i in range(n_features):
            if i in cond_indices:
                cond_data = np.append(cond_data, X[j])
                if Xerr is not None: cond_err = np.append(cond_err, Xerr[j])
                j += 1
                if i < R_index: R_cond_idx -= 1
            else:
                cond_data = np.append(cond_data, np.nan)
                if Xerr is not None: cond_err = np.append(cond_err, 0.0)

        if Xerr is not None:
            cond_XDGMM = self.XDGMM.condition(cond_data, cond_err)
        else:
            cond_XDGMM = self.XDGMM.condition(cond_data)

        sample = cond_XDGMM.sample()
        logR = sample[0][R_cond_idx]
        return logR

    def get_local_SB(self, SB_params, R):
        """
        Uses magnitudes, a surface brightness (SB) profile, and
        a SN location to fit local surface brightnesses at the location
        of the SN.

        Parameters
        ----------
        SB_params: array_like, shape = (21,)
            Array of parameters needed for the SB fit. First entry 
            should be a sersic index of 1 or 4, indicating whether to
            use an exponential or de Vaucouleurs profile. Following this
            should be sets of 
            (magnitude, mag_unc, effective radius, rad_unc) data for
            each of the 5 ugriz filters, giving a total array length of
            21. These data are assumed to be known by the user.
        R: float
            Separation from host nucleus in units of log(R/Re).
            It is assumed that the Re used here is the r-band Re, as is
            output by the get_logR function.

        Returns
        -------
        SBs: array_list, shape = (5,)
            Local surface brightness at the location of the SN for each
            of the 5 ugriz filters. Units = mag/arcsec^2
        SB_errs: array_like, shape = (5,)
            Uncertainties on the local surface brightnesses.
        """
        if SB_params[0] != 1 and SB_params[0] != 4:
            raise ValueError("Sersic index must be 1 or 4")

        sep = (10**R) * SB_params[11]  # separation in arcsec

        SBs = np.array([])
        SB_errs = np.array([])

        for j in range(5):
            halfmag = SB_params[j * 4 + 1] + 0.75257
            magerr = SB_params[j * 4 + 2]
            Re = SB_params[j * 4 + 3]
            Re_err = SB_params[j * 4 + 4]
            r = sep / Re

            Ie = halfmag + 2.5 * np.log10(np.pi * Re**2)
            Re2_unc = 2 * Re * Re_err * np.pi
            log_unc = 2.5 * Re2_unc / (np.log10(np.pi * Re**2) * np.log(10))
            Ie_unc = np.sqrt(magerr**2 + log_unc**2)

            if SB_params[0] == 1:
                Io = Ie - 1.824
                Io_unc = Ie_unc
                sb = Io * np.exp(-1.68 * (r))
                exp_unc = np.exp(-1.68 * (r)) * 1.68 * sep * Re_err / (Re**2)
                sb_unc = sb * np.sqrt((Io_unc / Io)**2 +
                                      (exp_unc / np.exp(-1.68 * (r)))**2)
                if np.isnan(sb_unc): sb_unc = 0.0
                if sb_unc < 0: sb_unc = sb_unc * -1.0
                SBs = np.append(SBs, sb)
                SB_errs = np.append(SB_errs, sb_unc)

            if SB_params[0] == 4:
                Io = Ie - 8.328
                Io_unc = Ie_unc
                sb = Io * np.exp(-7.67 * ((r)**0.25))
                exp_unc = np.exp(-7.67*((r)**0.25))*7.67*sep \
                          *Re_err/(4*Re**(1.25))
                sb_unc = sb*np.sqrt((Io_unc/Io)**2+(exp_unc \
                       /np.exp(-7.67*((r)**0.25))))
                if np.isnan(sb_unc): sb_unc = 0.0
                if sb_unc < 0: sb_unc = sb_unc * -1.0
                SBs = np.append(SBs, sb)
                SB_errs = np.append(SB_errs, sb_unc)

        return SBs, SB_errs

    def set_fit_method(self, fit_method):
        """
        Sets the XD fitting method to use.

        Parameters
        ----------
        fit_method: string
            Name of fitting method to use. Must be either 'astroML' or
            'Bovy'.

        Notes
        -----
        Changes the fitting method of self.XDGMM to the one specified
        in `fit_method`.
        """
        if fit_method == 'astroML':
            n_iter = 100
        elif fit_method == 'Bovy':
            n_iter = 10**9
        else:
            raise ValueError("Method must be either 'astroML' or 'Bovy'")
        self.XDGMM.method = fit_method
        self.XDGMM.n_iter = n_iter
        self.fit_method = fit_method
        return

    def get_data(self, filelist):
        """
        Parses SN and host data from a list of data files.

        Parameters
        ----------
        filelist: array_like
            Array of strings containing names of files containing data
            to fit.

        Returns
        -------
        X: array_like, shape = (n_samples, n_features)
            Output data. Contains SALT2 SN parameters, host redshift,
            log(R/Re), host colors, and host brightnesses at the
            locations of the SN in each filter.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on output data.

        Notes
        -----
        Reads in each data file and returns an array of data and a
        matrix of errors, which can be used to fit the XDGMM model.

        Currently reads the SALT2 SN parameters, host redshift,
        log(R/Re), host magnitudes, and host surface brightnesses
        at the location of the SN.

        This method needs further modularizing, to enable the worker
        to calculate host surface brightnesses separately (in a static method).
        """
        x0 = np.array([])
        x0_err = np.array([])
        x1 = np.array([])
        x1_err = np.array([])
        c = np.array([])
        c_err = np.array([])
        z = np.array([])
        z_err = np.array([])
        logr = np.array([])
        logr_err = np.array([])
        umag = np.array([])
        umag_err = np.array([])
        gmag = np.array([])
        gmag_err = np.array([])
        rmag = np.array([])
        rmag_err = np.array([])
        imag = np.array([])
        imag_err = np.array([])
        zmag = np.array([])
        zmag_err = np.array([])
        SB_u = np.array([])
        SB_u_err = np.array([])
        SB_g = np.array([])
        SB_g_err = np.array([])
        SB_r = np.array([])
        SB_r_err = np.array([])
        SB_i = np.array([])
        SB_i_err = np.array([])
        SB_z = np.array([])
        SB_z_err = np.array([])

        for filename in filelist:
            infile = open(filename, 'r')
            inlines = infile.readlines()
            infile.close()

            for line1 in inlines:
                if line1[0] == '#': continue
                line = line1.split(',')
                if line[33]=='nan' or line[39]=='nan' or line[45]=='nan'\
                    or line[51]=='nan' or line[57]=='nan':
                    continue

                # SN params
                x0 = np.append(x0, float(line[7]))  #x0
                x0_err = np.append(x0_err, float(line[8]))
                x1 = np.append(x1, float(line[9]))  # x1
                x1_err = np.append(x1_err, float(line[10]))
                c = np.append(c, float(line[11]))  # c
                c_err = np.append(c_err, float(line[12]))

                # Host params
                z = np.append(z, float(line[4]))
                z_err = np.append(z_err, 0.0)
                logr = np.append(logr,
                                 np.log10(float(line[15]) /
                                          float(line[42])))  # r
                logr_err = np.append(
                    logr_err,
                    float(line[43]) / (float(line[42]) * np.log(10)))
                umag = np.append(umag, float(line[18]))  # u_mag
                umag_err = np.append(umag_err, float(line[19]))
                gmag = np.append(gmag, float(line[20]))  # g_mag
                gmag_err = np.append(gmag_err, float(line[21]))
                rmag = np.append(rmag, float(line[22]))  # r_mag
                rmag_err = np.append(rmag_err, float(line[23]))
                imag = np.append(imag, float(line[24]))  # i_mag
                imag_err = np.append(imag_err, float(line[25]))
                zmag = np.append(zmag, float(line[26]))  # z_mag
                zmag_err = np.append(zmag_err, float(line[27]))
                SB_u = np.append(SB_u, float(line[32]))  # SB_u
                SB_u_err = np.append(SB_u_err, float(line[33]))
                SB_g = np.append(SB_g, float(line[38]))  # SB_g
                SB_g_err = np.append(SB_g_err, float(line[39]))
                SB_r = np.append(SB_r, float(line[44]))  # SB_r
                SB_r_err = np.append(SB_r_err, float(line[45]))
                SB_i = np.append(SB_i, float(line[50]))  # SB_i
                SB_i_err = np.append(SB_i_err, float(line[52]))
                SB_z = np.append(SB_z, float(line[56]))  # SB_z
                SB_z_err = np.append(SB_z_err, float(line[57]))

        ug = umag - gmag
        ug_err = np.sqrt(umag_err**2 + gmag_err**2)
        ur = umag - rmag
        ur_err = np.sqrt(umag_err**2 + rmag_err**2)
        ui = umag - imag
        ui_err = np.sqrt(umag_err**2 + imag_err**2)
        uz = umag - zmag
        uz_err = np.sqrt(umag_err**2 + zmag_err**2)
        gr = gmag - rmag
        gr_err = np.sqrt(gmag_err**2 + rmag_err**2)
        gi = gmag - imag
        gi_err = np.sqrt(gmag_err**2 + imag_err**2)
        gz = gmag - zmag
        gz_err = np.sqrt(gmag_err**2 + zmag_err**2)
        ri = rmag - imag
        ri_err = np.sqrt(rmag_err**2 + imag_err**2)
        rz = rmag - zmag
        rz_err = np.sqrt(rmag_err**2 + zmag_err**2)
        iz = imag - zmag
        iz_err = np.sqrt(imag_err**2 + zmag_err**2)

        X = np.vstack([
            x0, x1, c, z, logr, ug, ur, ui, uz, gr, gi, gz, ri, rz, iz, SB_u,
            SB_g, SB_r, SB_i, SB_z
        ]).T
        Xerr = np.zeros(X.shape + X.shape[-1:])
        diag = np.arange(X.shape[-1])
        Xerr[:, diag, diag] = np.vstack([
            x0_err**2, x1_err**2, c_err**2, z_err**2, logr_err**2, ug_err**2,
            ur_err**2, ui_err**2, uz_err**2, gr_err**2, gi_err**2, gz_err**2,
            ri_err**2, rz_err**2, iz_err**2, SB_u_err**2, SB_g_err**2,
            SB_r_err**2, SB_i_err**2, SB_z_err**2
        ]).T
        return X, Xerr
Exemplo n.º 15
0
def dataViz(survey='2MASS', ngauss=128, quantile=0.05, dataFilename='All.npz', iter='10th', Nsamples=3e5, contourColor='k', dustFile='dust.npz', sdss5=False, whatsThatFeature=False):

    if survey == 'APASS':
        mag1 = 'B'
        mag2 = 'V'
        absmag = 'G'
        xlabel='B-V'
        ylabel = r'M$_\mathrm{G}$'
        xlim = [-0.2, 2]
        ylim = [9, -2]

    if survey == '2MASS':
        mag1 = 'J'
        mag2 = 'K'
        absmag = 'J'
        xlabel = r'$(J-K)^C$'
        ylabel = r'$M_J^C$'
        xlim = [-0.25, 1.25]
        ylim = [6, -6]

    xdgmmFilename = 'xdgmm.'             + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename + '.fit'

    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    dustEBV = 0.0
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax'])

    color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)
    absMagKinda_err = tgas['parallax_error']*10.**(0.2*bandDictionary[absmag]['array'][bandDictionary[absmag]['key']])

    xdgmm = XDGMM(filename=xdgmmFilename)
    sample = xdgmm.sample(Nsamples)
    negParallax = sample[:,1] < 0
    nNegP = np.sum(negParallax)
    while nNegP > 0:
        sampleNew = xdgmm.sample(nNegP)
        sample[negParallax] = sampleNew
        negParallax = sample[:,1] < 0
        nNegP = np.sum(negParallax)
    positive = absMagKinda > 0
    y = absMagKinda[positive]
    yplus  = y + absMagKinda_err[positive]
    yminus = y - absMagKinda_err[positive]
    parallaxErrGoesNegative = yminus < 0
    absMagYMinus = testXD.absMagKinda2absMag(yminus)
    absMagYMinus[parallaxErrGoesNegative] = -50.
    yerr_minus = testXD.absMagKinda2absMag(y) - absMagYMinus
    yerr_plus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y)
    #yerr_minus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y)
    #yerr_plus = testXD.absMagKinda2absMag(y) - absMagYMinus
    """
    testfig, testax = plt.subplots(3)
    testax[0].scatter(testXD.absMagKinda2absMag(y), y, s=1)
    testax[0].set_xlabel('absMag')
    testax[0].set_ylabel('absMagKinda')
    testax[1].scatter(testXD.absMagKinda2absMag(y), absMagYMinus, s=1)
    testax[1].set_xlabel('absMag')
    testax[1].set_ylabel('absMag Minus')
    testax[2].scatter(testXD.absMagKinda2absMag(y), testXD.absMagKinda2absMag(yplus), s=1)
    testax[2].set_xlabel('absMag')
    testax[2].set_ylabel('absMag Plus')
    plt.show()
    """

    dp.plot_sample(color[positive], testXD.absMagKinda2absMag(y), sample[:,0], testXD.absMagKinda2absMag(sample[:,1]),
                xdgmm, xerr=color_err[positive], yerr=[yerr_minus, yerr_plus], xlabel=xlabel, ylabel=ylabel, xlim=xlim, ylim=ylim, errSubsample=2.4e3, thresholdScatter=2., binsScatter=200, contourColor=contourColor)
    dataFile = 'data_noDust.pdf'
    priorFile = 'prior_' + str(ngauss) +'gauss.pdf'
    os.rename('plot_sample.data.pdf', dataFile)
    os.rename('plot_sample.prior.pdf', priorFile)
    #import pdb; pdb.set_trace()
    data = np.load(dustFile)
    dustEBV = data['ebv']
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax'])

    cNorm  = plt.matplotlib.colors.Normalize(vmin=-6, vmax=2)
    posteriorFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename
    for file in [posteriorFile]:#, 'posteriorSimple.npz']:
        data = np.load(file)
        parallax = data['mean']
        parallax_err = np.sqrt(data['var'])
        notnans = ~np.isnan(parallax) & ~np.isnan(parallax_err)
        parallax = parallax[notnans]
        parallax_err = parallax_err[notnans]
        apparentMagnitudeGood = apparentMagnitude[notnans]
        c = np.log(data['var']) - np.log(tgas['parallax_error']**2.)
        absMagKinda = parallax*10.**(0.2*apparentMagnitudeGood)
        absMagKinda_err = parallax_err*10.**(0.2*apparentMagnitudeGood)
        y = absMagKinda
        yplus  = y + absMagKinda_err
        yminus = y - absMagKinda_err
        parallaxErrGoesNegative = yminus < 0
        absMagYMinus = testXD.absMagKinda2absMag(yminus)
        absMagYMinus[parallaxErrGoesNegative] = -50.
        absMag = testXD.absMagKinda2absMag(y)
        yerr_minus = absMag - absMagYMinus
        yerr_plus = testXD.absMagKinda2absMag(yplus) - absMag
        #notnan = ~np.isnan(color[notnans]) & ~np.isnan(absMag)
        contourColor = 'k'
        ascii.write([color[notnans], absMag, color_err[notnans], yerr_minus, yerr_plus, c[notnans]], 'cmdExpectation.txt', names=['color', 'absMag', 'color_err', 'absMag_errMinus', 'absMag_errPlus', 'logDeltaVar'])
        if whatsThatFeature & (file == posteriorFile):
            figFeature, axFeature = plt.subplots()
            x = color[notnans]
            y = absMag
            #levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2)
            im = corner.hist2d(x, y, ax=axFeature, levels=None, bins=200, no_fill_contours=True, plot_density=False, color=contourColor, rasterized=True, plot_contours=False)
            axFeature.set_xlim(xlim)
            axFeature.set_ylim(ylim)
            axFeature.set_xlabel(xlabel)
            axFeature.set_ylabel(ylabel)
            lowerMainSequence = (0.45, 5.5)
            upperMainSequence = (-0.225, 2)
            binarySequence = (0.75, 4)
            redClump = (0.35, -2)
            redGiantBranch = (1.0, -2)
            turnOff = (0.0, 3.5)
            features = [lowerMainSequence, upperMainSequence, binarySequence, redClump, redGiantBranch, turnOff]
            labels = ['lower MS', 'upper MS', 'binary sequence', 'red clump', 'RGB', 'MS turn off']
            for l, f in zip(labels, features): axFeature.text(f[0], f[1], l, fontsize=15)
            figFeature.savefig('whatsThatFeature.pdf', format='pdf')
Exemplo n.º 16
0
def dataViz(survey='2MASS',
            ngauss=128,
            quantile=0.05,
            dataFilename='All.npz',
            iter='10th',
            Nsamples=3e5,
            contourColor='k',
            dustFile='dust.npz',
            sdss5=False):

    if survey == 'APASS':
        mag1 = 'B'
        mag2 = 'V'
        absmag = 'G'
        xlabel = 'B-V'
        ylabel = r'M$_\mathrm{G}$'
        xlim = [-0.2, 2]
        ylim = [9, -2]

    if survey == '2MASS':
        mag1 = 'J'
        mag2 = 'K'
        absmag = 'J'
        xlabel = r'$(J-K)^C$'
        ylabel = r'$M_J^C$'
        xlim = [-0.25, 1.25]
        ylim = [6, -6]

    xdgmmFilename = 'xdgmm.' + str(ngauss) + 'gauss.dQ' + str(
        quantile) + '.' + iter + '.' + survey + '.' + dataFilename + '.fit'

    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    dustEBV = 0.0
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(
        absmag, dustEBV, bandDictionary, tgas['parallax'])

    color_err = np.sqrt(
        bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. +
        bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)
    absMagKinda_err = tgas['parallax_error'] * 10.**(
        0.2 * bandDictionary[absmag]['array'][bandDictionary[absmag]['key']])

    xdgmm = XDGMM(filename=xdgmmFilename)
    sample = xdgmm.sample(Nsamples)
    negParallax = sample[:, 1] < 0
    nNegP = np.sum(negParallax)
    while nNegP > 0:
        sampleNew = xdgmm.sample(nNegP)
        sample[negParallax] = sampleNew
        negParallax = sample[:, 1] < 0
        nNegP = np.sum(negParallax)
    positive = absMagKinda > 0
    y = absMagKinda[positive]
    yplus = y + absMagKinda_err[positive]
    yminus = y - absMagKinda_err[positive]
    parallaxErrGoesNegative = yminus < 0
    absMagYMinus = testXD.absMagKinda2absMag(yminus)
    absMagYMinus[parallaxErrGoesNegative] = -50.
    yerr_minus = testXD.absMagKinda2absMag(y) - absMagYMinus
    yerr_plus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y)
    #yerr_minus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y)
    #yerr_plus = testXD.absMagKinda2absMag(y) - absMagYMinus
    """
    testfig, testax = plt.subplots(3)
    testax[0].scatter(testXD.absMagKinda2absMag(y), y, s=1)
    testax[0].set_xlabel('absMag')
    testax[0].set_ylabel('absMagKinda')
    testax[1].scatter(testXD.absMagKinda2absMag(y), absMagYMinus, s=1)
    testax[1].set_xlabel('absMag')
    testax[1].set_ylabel('absMag Minus')
    testax[2].scatter(testXD.absMagKinda2absMag(y), testXD.absMagKinda2absMag(yplus), s=1)
    testax[2].set_xlabel('absMag')
    testax[2].set_ylabel('absMag Plus')
    plt.show()
    """
    dp.plot_sample(color[positive],
                   testXD.absMagKinda2absMag(y),
                   sample[:, 0],
                   testXD.absMagKinda2absMag(sample[:, 1]),
                   xdgmm,
                   xerr=color_err[positive],
                   yerr=[yerr_minus, yerr_plus],
                   xlabel=xlabel,
                   ylabel=ylabel,
                   xlim=xlim,
                   ylim=ylim,
                   errSubsample=2.4e3,
                   thresholdScatter=2.,
                   binsScatter=200,
                   contourColor=contourColor)
    dataFile = 'data_noDust.pdf'
    priorFile = 'prior_' + str(ngauss) + 'gauss.pdf'
    os.rename('plot_sample.data.pdf', dataFile)
    os.rename('plot_sample.prior.pdf', priorFile)
    #import pdb; pdb.set_trace()
    data = np.load(dustFile)
    dustEBV = data['ebv']
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(
        absmag, dustEBV, bandDictionary, tgas['parallax'])

    cNorm = plt.matplotlib.colors.Normalize(vmin=-6, vmax=2)
    posteriorFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(
        quantile) + '.' + iter + '.' + survey + '.' + dataFilename
    for file in [posteriorFile, 'posteriorSimple.npz']:
        data = np.load(file)
        parallax = data['mean']
        parallax_err = np.sqrt(data['var'])
        notnans = ~np.isnan(parallax) & ~np.isnan(parallax_err)
        parallax = parallax[notnans]
        parallax_err = parallax_err[notnans]
        apparentMagnitudeGood = apparentMagnitude[notnans]
        c = np.log(data['var']) - np.log(tgas['parallax_error']**2.)
        absMagKinda = parallax * 10.**(0.2 * apparentMagnitudeGood)
        absMagKinda_err = parallax_err * 10.**(0.2 * apparentMagnitudeGood)
        y = absMagKinda
        yplus = y + absMagKinda_err
        yminus = y - absMagKinda_err
        parallaxErrGoesNegative = yminus < 0
        absMagYMinus = testXD.absMagKinda2absMag(yminus)
        absMagYMinus[parallaxErrGoesNegative] = -50.
        absMag = testXD.absMagKinda2absMag(y)
        yerr_minus = absMag - absMagYMinus
        yerr_plus = testXD.absMagKinda2absMag(yplus) - absMag
        #notnan = ~np.isnan(color[notnans]) & ~np.isnan(absMag)
        contourColor = 'k'
        ascii.write([
            color[notnans], absMag, color_err[notnans], yerr_minus, yerr_plus,
            c[notnans]
        ],
                    'cmdExpectation.txt',
                    names=[
                        'color', 'absMag', 'color_err', 'absMag_errMinus',
                        'absMag_errPlus', 'logDeltaVar'
                    ])
        dp.plot_sample(color[notnans],
                       absMag,
                       sample[:, 0],
                       testXD.absMagKinda2absMag(sample[:, 1]),
                       xdgmm,
                       xerr=color_err[notnans],
                       yerr=[yerr_minus, yerr_plus],
                       xlabel=xlabel,
                       ylabel=ylabel,
                       xlim=xlim,
                       ylim=ylim,
                       errSubsample=1.2e3,
                       thresholdScatter=2.,
                       binsScatter=200,
                       c=c,
                       norm=cNorm,
                       cmap='Blues',
                       contourColor=contourColor,
                       posterior=True,
                       sdss5=sdss5,
                       rasterized=False)

        dataFile = 'inferredDistances_data_' + file.split('.')[0] + '.pdf'
        priorFile = 'prior_' + str(ngauss) + 'gauss.pdf'
        os.rename('plot_sample.data.pdf', dataFile)
        os.rename('plot_sample.prior.pdf', priorFile)
Exemplo n.º 17
0
class XDGMMTestCase(unittest.TestCase):
    "TestCase class for XDGMM class."

    def setUp(self):
        """
        Set up each test with a new XDGMM object and some data.
        """
        self.xdgmm = XDGMM(n_components=3)
        self.files = []
        """
        Use scikit-learn GaussianMixture for sampling some data points
        """
        self.gmm = skl_GMM(n_components=3,
                           max_iter=10,
                           covariance_type='full',
                           random_state=None)
        self.gmm.weights_ = np.array([0.3, 0.5, 0.2])
        self.gmm.means_ = np.array(
            [np.array([0, 1]),
             np.array([5, 4]),
             np.array([2, 4])])
        self.gmm.covariances_ = np.array([
            np.diag((2, 1)),
            np.array([[1, 0.2], [0.2, 1]]),
            np.diag((0.3, 0.5))
        ])

        self.gmm.precisions_ = np.linalg.inv(self.gmm.covariances_)
        self.gmm.precisions_cholesky_ = np.linalg.cholesky(
            self.gmm.precisions_)

        self.X = self.gmm.sample(1000)[0]
        errs = 0.2 * np.random.random_sample((1000, 2))
        self.Xerr = np.zeros(self.X.shape + self.X.shape[-1:])
        diag = np.arange(self.X.shape[-1])
        self.Xerr[:, diag, diag] = np.vstack([errs[:, 0]**2, errs[:, 1]**2]).T

    def tearDown(self):
        """
        Clean up files saved by tests
        """
        for fname in self.files:
            os.remove('test.fit')

    def test_Fit(self):
        this_mu = self.xdgmm.mu
        this_V = self.xdgmm.V
        this_weights = self.xdgmm.weights

        self.xdgmm.fit(self.X, self.Xerr)

        self.assertIsNotNone(self.xdgmm.mu)
        self.assertIsNotNone(self.xdgmm.V)
        self.assertIsNotNone(self.xdgmm.weights)

    def test_Sample(self):
        self.xdgmm.fit(self.X, self.Xerr)
        sam = self.xdgmm.sample(1000)
        self.assertEqual(sam.shape, (1000, 2))

    def test_Score(self):
        self.xdgmm.fit(self.X, self.Xerr)
        data = np.array([np.array([0, 2]), np.array([4, 4])])
        err = np.array([np.diag((0.2, 0.1)), np.diag((0.15, 0.15))])
        self.assertNotEqual(self.xdgmm.score(data, err), 0)

    def test_ReadWrite(self):
        self.xdgmm.fit(self.X, self.Xerr)
        self.xdgmm.save_model('test.fit')
        xd2 = XDGMM(filename='test.fit')

        self.assertLess(self.xdgmm.mu[0, 0] - xd2.mu[0, 0], 1e-5)
        self.assertLess(self.xdgmm.V[0, 0, 0] - xd2.V[0, 0, 0], 1e-5)
        self.assertLess(self.xdgmm.weights[0] - xd2.weights[0], 1e-5)
        self.files.append('test.fit')

    def test_Condition(self):
        self.xdgmm.fit(self.X, self.Xerr)
        cond_xd = self.xdgmm.condition(X_input=np.array([np.nan, 3.5]))

        self.assertEqual(cond_xd.mu.shape, (3, 1))
        self.assertEqual(cond_xd.V.shape, (3, 1, 1))
Exemplo n.º 18
0
def main():
    #    for label, style in zip(['paper', 'talk'],['seaborn-paper', 'seaborn-talk']):
    pdf = True
    plot_data = True
    plot_dust = False
    plot_prior = False
    plot_m67 = True
    plot_compare = False
    plot_expectation = True
    plot_odd_examples = False
    plot_examples = False
    plot_delta = False
    plot_deltacdf = False
    plot_nobias = False
    plot_wtf = False
    plot_toy = False

    #figsize2x1 = (12, 5.5)
    #figsize2x2 = (12, 11)
    #figsize3x2 = (18, 11)
    style = 'seaborn-paper'
    #plt.style.use(style)
    #fontsize = 12
    #annotateTextSize = 12
    #legendTextSize = 12
    params = {
        'axes.labelsize': 9,
        'font.size': 9,
        'legend.fontsize': 9,
        'xtick.labelsize': 9,
        'ytick.labelsize': 9,
        'text.usetex': False,
        'figure.figsize': [4.5, 4.5]
    }
    mpl.rcParams.update(params)
    #mpl.rcParams['xtick.labelsize'] = fontsize
    #mpl.rcParams['ytick.labelsize'] = fontsize
    #mpl.rcParams['axes.labelsize'] = fontsize
    #mpl.rcParams['font.size'] = fontsize
    nsubsamples = 1024
    np.random.seed(0)

    trueColor = '#FF8D28'
    priorColor = '#7bccc4'  #'#6baed6' #'#9ebcda' #'#9ecae1' #'royalblue'
    priorColor = '#6FB8B0'
    cmap_prior = 'Blues'
    posteriorColor = '#0977C4'  #'#0868ac' #'#984ea3' #'#7a0177' #'#8856a7' #'#810f7c' #'#08519c' #'darkblue'
    dataColor = 'black'
    posteriorMapColor = 'Blues'
    annotationColor = '#FF2412'
    color1 = np.array((240, 249, 232)) / 255.
    color1 = np.array((255, 255, 255)) / 255.
    #color2 = np.array((123,204,196))/255.
    color2 = np.array((112, 186, 179)) / 255.
    colors = [color1, color2]
    cm = LinearSegmentedColormap.from_list('my_color', colors, N=100)
    cmap_prior = LinearSegmentedColormap.from_list('my_color', colors, N=100)
    color2 = np.array((6, 82, 135)) / 255.
    colors = [color1, color2]
    cmap_posterior = LinearSegmentedColormap.from_list('my_color',
                                                       colors,
                                                       N=100)

    mag1 = 'J'
    mag2 = 'K'
    absmag = 'J'
    xlabel_cmd = r'$(J-K_s)^C$'
    ylabel_cmd = r'$M_J^C$'
    xlim_cmd = [-0.25, 1.25]
    ylim_cmd = [6, -6]

    dustFile = 'dustCorrection.128gauss.dQ0.05.10th.2MASS.All.npz'
    xdgmmFile = 'xdgmm.128gauss.dQ0.05.10th.2MASS.All.npz.fit'
    posteriorFile = 'posteriorParallax.128gauss.dQ0.05.10th.2MASS.All.npz'

    xdgmm = XDGMM(filename=xdgmmFile)
    #generate toy model plot
    mtrue = -1.37
    btrue = 0.2
    ttrue = 0.8
    nexamples = 5
    if plot_toy:
        fig, ax = makeFigureInstance(x=2, y=2, wspace=0.75)
        toy.makeplots(mtrue=mtrue,
                      btrue=btrue,
                      ttrue=ttrue,
                      nexamples=nexamples,
                      trueColor=trueColor,
                      priorColor=priorColor,
                      posteriorColor=posteriorColor,
                      dataColor=dataColor,
                      posteriorMapColor=posteriorMapColor,
                      fig=fig,
                      axes=ax)
        os.rename('toy.paper.pdf', 'paper/toy.pdf')
    #----------------------------------------------

    #generate raw data plot
    tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays()
    posterior = np.load(posteriorFile)
    mean = posterior['mean']
    sigma = np.sqrt(posterior['var'])
    positive = (tgas['parallax'] > 0.) & (mean > 0.)
    ind = np.random.randint(0, len(tgas[positive]), nsubsamples)

    dustEBV = 0.0
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(
        absmag, dustEBV, bandDictionary, tgas['parallax'])
    absMagKinda_err = tgas['parallax_error'] * 10.**(0.2 * apparentMagnitude)
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)[positive]
    color_err = np.sqrt(
        bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. +
        bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.
    )[positive]
    absMag = testXD.absMagKinda2absMag(
        tgas['parallax'][positive] * 10.**(0.2 * apparentMagnitude[positive]))
    absMag_err = absMagError(tgas['parallax'][positive],
                             tgas['parallax_error'][positive],
                             apparentMagnitude[positive], absMag)
    titles = ["Observed Distribution", "Obs+Noise Distribution"]

    if plot_data:
        plot_samples(
            color,
            absMag,
            color_err,
            absMag_err,
            ind,
            contourColor='grey',
            rasterized=True,
            plot_contours=True,
            dataColor=dataColor,
            titles=titles,
            xlim=xlim_cmd,
            ylim=ylim_cmd,
            xlabel=xlabel_cmd,
            ylabel=ylabel_cmd,
            pdf=pdf
        )  #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1)
        if pdf: os.rename('plot_sample.pdf', 'paper/data.pdf')
        os.rename('plot_sample.png', 'data.png')

    #color_raw = color
    #color_err_raw = color_err
    #absMag_raw = absMag
    #absMag_err_raw = absMag_err
    #absMagKinda_raw = absMagKinda
    #absMagKinda_err_raw = absMagKinda_err
    #-------------------------------------------------------

    #dust plot
    if plot_dust:
        fig, ax = makeFigureInstance(figureSize=(6, 3), left=0.75)
        comparePrior.dustViz(ngauss=128,
                             quantile=0.05,
                             iter='10th',
                             survey='2MASS',
                             dataFilename='All.npz',
                             ax=ax,
                             tgas=tgas)
        fig.savefig('paper/dust.pdf', dpi=400)
        fig.savefig('dust.png')
        plt.close(fig)
    #-------------------------------------------------------

    #generate prior plot
    if plot_prior:
        samplex, sampley = sampleXDGMM(xdgmm, len(tgas))
        titles = [
            "Extreme Deconvolution\n  resampling",
            "Extreme Deconvolution\n  cluster locations"
        ]
        plot_samples(
            samplex,
            sampley,
            None,
            None,
            ind,
            contourColor='black',
            rasterized=True,
            plot_contours=True,
            dataColor=priorColor,
            titles=titles,
            xlim=xlim_cmd,
            ylim=ylim_cmd,
            xlabel=xlabel_cmd,
            ylabel=ylabel_cmd,
            prior=True,
            xdgmm=xdgmm,
            pdf=pdf
        )  #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1)
        if pdf: os.rename('plot_sample.pdf', 'paper/prior.pdf')
        os.rename('plot_sample.png', 'prior.png')
    #-------------------------------------------------------

    data = np.load(dustFile)
    dustEBV = data['ebv']
    absMagKinda, apparentMagnitude = testXD.absMagKindaArray(
        absmag, dustEBV, bandDictionary, tgas['parallax'])
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    color_err = np.sqrt(
        bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. +
        bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)
    absMagKinda_err = tgas['parallax_error'] * 10.**(0.2 * apparentMagnitude)
    #-------------------------------------------------------

    #M67 plot
    if plot_m67:
        fig, ax = makeFigureInstance(x=2,
                                     y=2,
                                     hspace=1.0,
                                     wspace=1.0,
                                     figureSize=(2.5, 2.5))
        #setup_text_plots(fontsize=fontsize, usetex=True)
        #fig, ax = plt.subplots(2,2, figsize=figsize2x2)
        #fig.subplots_adjust(left=0.1, right=0.95,
        #                               bottom=0.1, top=0.95,
        #                            wspace=0.25, hspace=0.25)

        #ax = ax.flatten()
        nPosteriorPoints = 1000
        print(dataColor)
        #def distanceTest(tgas, xdgmm, nPosteriorPoints, data1, data2, err1, err2, xlim, ylim, plot2DPost=False, dataColor='black', priorColor='green', truthColor='red', posteriorColor='blue', dl=0.1, db=0.1):

        testXD.distanceTest(tgas,
                            xdgmm,
                            nPosteriorPoints,
                            color,
                            absMagKinda,
                            color_err,
                            absMagKinda_err,
                            xlim_cmd,
                            ylim_cmd,
                            bandDictionary,
                            absmag,
                            dataColor=dataColor,
                            priorColor=priorColor,
                            truthColor=trueColor,
                            posteriorColor=posteriorColor,
                            figDist=fig,
                            axDist=ax,
                            xlabel=xlabel_cmd,
                            ylabel=ylabel_cmd,
                            dl=0.075,
                            db=0.075)
        plt.tight_layout()
        if pdf: fig.savefig('paper/m67.pdf', dpi=400)
        fig.savefig('m67.png')
        plt.close(fig)
    #-------------------------------------------------------

    color = color[positive]
    color_err = color_err[positive]
    apparentMagnitude = apparentMagnitude[positive]
    absMagKinda_dust = absMagKinda[positive]
    absMagKinda_dust_err = absMagKinda_err[positive]
    absMag_dust = testXD.absMagKinda2absMag(absMagKinda[positive])
    absMag_dust_err = absMagError(tgas['parallax'][positive],
                                  tgas['parallax_error'][positive],
                                  apparentMagnitude, absMag_dust)

    #generate comparison prior plot
    if plot_compare:
        #setup_text_plots(fontsize=fontsize, usetex=True)
        plt.clf()
        alpha = 0.1
        alpha_points = 0.01
        fig, ax = makeFigureInstance(x=2, y=1)  #, figsize=figsize2x1)
        #fig = plt.figure(figsize=figsize2x1)
        #fig.subplots_adjust(left=0.1, right=0.95,
        #                    bottom=0.15, top=0.95,
        #                    wspace=0.1, hspace=0.1)
        #ax1 = fig.add_subplot(121)
        #ax2 = fig.add_subplot(122)
        #ax = [ax1, ax2]
        titles = ['Exp Dec Sp \nDen Prior', 'CMD Prior']
    for i, file in enumerate(['posteriorSimple.npz', posteriorFile]):
        data = np.load(file)
        posterior = data['posterior']
        sigma = np.sqrt(data['var'])
        mean = data['mean']
        absMag = testXD.absMagKinda2absMag(mean[positive] *
                                           10.**(0.2 * apparentMagnitude))
        absMag_err = absMagError(mean[positive], sigma[positive],
                                 apparentMagnitude, absMag)
        if plot_compare:  #ax[i].scatter(color[ind], absMag[ind], c=posteriorColor, s=1, lw=0, alpha=alpha, zorder=0)
            ax[i].errorbar(color[ind],
                           absMag[ind],
                           xerr=color_err[ind],
                           yerr=[absMag_err[0][ind], absMag_err[1][ind]],
                           fmt="none",
                           zorder=0,
                           mew=0,
                           ecolor=posteriorColor,
                           alpha=0.5,
                           elinewidth=0.5,
                           color=posteriorColor)
            ax[i].set_xlim(xlim_cmd)
            ax[i].set_ylim(ylim_cmd[0], ylim_cmd[1] * 1.1)
            ax[i].text(
                0.05,
                0.95,
                titles[i],
                ha='left',
                va='top',
                transform=ax[i].transAxes)  #, fontsize=annotateTextSize)
            ax[i].set_xlabel(xlabel_cmd)
            if i in [1]:
                ax[i].yaxis.set_major_formatter(plt.NullFormatter())
            else:
                ax[i].set_ylabel(ylabel_cmd)
    if plot_compare:
        if pdf: fig.savefig('paper/comparePrior.pdf', dpi=400)
        fig.savefig('comparePrior.png')
        plt.close(fig)
    #-------------------------------------------------------

    #generate expectation plot

    absMag = testXD.absMagKinda2absMag(mean[positive] *
                                       10.**(0.2 * apparentMagnitude))
    absMag_err = absMagError(mean[positive], sigma[positive],
                             apparentMagnitude, absMag)
    titles = ["De-noised Expectation \nValues", "Posterior Distributions"]
    if plot_expectation:
        plot_samples(
            color,
            absMag,
            color_err,
            absMag_err,
            ind,
            contourColor='black',
            rasterized=True,
            plot_contours=True,
            dataColor=posteriorColor,
            titles=titles,
            xlim=xlim_cmd,
            ylim=ylim_cmd,
            xlabel=xlabel_cmd,
            ylabel=ylabel_cmd,
            pdf=pdf
        )  #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1)
        if pdf: os.rename('plot_sample.pdf', 'paper/posteriorCMD.pdf')
        os.rename('plot_sample.png', 'posteriorCMD.png')
    #-------------------------------------------------------

    #posterior example plot
    if plot_examples:
        colorBins = [0.0, 0.2, 0.4, 0.7, 1.0]
        digit = np.digitize(color, colorBins)

        ndim = 2
        nPosteriorPoints = 1000  #number of elements in the posterior array
        projectedDimension = 1  #which dimension to project the prior onto
        xparallaxMAS = np.linspace(0, 10, nPosteriorPoints)

        #plot likelihood and posterior in each axes
        for iteration in np.arange(20, 40):
            fig, ax = makeFigureInstance(
                x=3, y=2, hspace=0.75,
                figureSize=(2, 2))  #, figsize=figsize3x2)
            #fig, ax = plt.subplots(2, 3, figsize=figsize3x2)
            #ax = ax.flatten()
            #fig.subplots_adjust(left=0.1, right=0.9,
            #                        bottom=0.1, top=0.8,
            #                        wspace=0.4, hspace=0.5)

            plotPrior(xdgmm, ax[0], c=priorColor, lw=1)
            ax[0].set_xlim(xlim_cmd)
            ax[0].set_ylim(ylim_cmd)
            ax[0].set_xlabel(xlabel_cmd)
            ax[0].set_ylabel(ylabel_cmd)

            for i in range(np.max(digit)):
                currentInd = np.where((digit == i))[0]
                index = currentInd[np.random.randint(0, high=len(currentInd))]
                ax[0].scatter(color[index],
                              absMag_dust[index],
                              c=dataColor,
                              s=20)
                ax[0].errorbar(color[index],
                               absMag_dust[index],
                               xerr=[[color_err[index], color_err[index]]],
                               yerr=[[
                                   absMag_dust_err[0][index],
                                   absMag_dust_err[1][index]
                               ]],
                               fmt="none",
                               zorder=0,
                               lw=2.0,
                               mew=0,
                               alpha=1.0,
                               color=dataColor,
                               ecolor=dataColor)
                ax[0].annotate(str(i + 1),
                               (color[index] + 0.075, absMag_dust[index] +
                                0.175))  #, fontsize=annotateTextSize)
                #print len(color), len(absMagKinda_dust), len(color_err), len(absMagKinda_dust_err), len(apparentMagnitude)
                likeParallax, priorParallax, posteriorParallax, posteriorColor = likePriorPost(
                    color[index],
                    absMagKinda_dust[index],
                    color_err[index],
                    absMagKinda_dust_err[index],
                    apparentMagnitude[index],
                    xdgmm,
                    xparallaxMAS,
                    ndim=2,
                    nPosteriorPoints=1000,
                    projectedDimension=1)

                l1, = ax[i + 1].plot(xparallaxMAS,
                                     likeParallax * np.max(posteriorParallax) /
                                     np.max(likeParallax),
                                     lw=1,
                                     color=dataColor,
                                     zorder=100)
                l2, = ax[i +
                         1].plot(xparallaxMAS,
                                 priorParallax * np.max(posteriorParallax) /
                                 np.max(priorParallax),
                                 lw=0.5,
                                 color=priorColor)
                l3, = ax[i + 1].plot(xparallaxMAS,
                                     posteriorParallax,
                                     lw=2,
                                     color=posteriorColor)
                maxInd = posteriorParallax == np.max(posteriorParallax)
                maxPar = xparallaxMAS[maxInd]
                maxY = posteriorParallax[maxInd]
                if maxPar < 5: annX = 9
                else: annX = 0
                if i == 1: annY = 0.75 * maxY
                else: annY = maxY / 1.1
                ax[i + 1].text(annX, annY, str(i + 1))
                ax[i + 1].set_xlabel(r'$\varpi$ [mas]')
                ax[i + 1].tick_params(
                    axis='y',  # changes apply to the x-axis
                    which='both',  # both major and minor ticks are affected
                    bottom='off',  # ticks along the bottom edge are off
                    top='off',  # ticks along the top edge are off
                    labelbottom='off')  # labels along the bottom edge are off
                if i + 1 == 1:
                    leg = fig.legend(
                        (l1, l2, l3), ('likelihood', 'prior', 'posterior'),
                        'upper right')  #, fontsize=legendTextSize)
                    leg.get_frame().set_alpha(1.0)
                #plt.tight_layout()
                if pdf:
                    fig.savefig('posterior_' + str(iteration) + '.pdf',
                                dpi=400)
            fig.savefig('paper/posterior.pdf', dpi=400)
            fig.tight_layout()
            fig.savefig('posterior.png')
            plt.close(fig)

    #-------------------------------------------------------
    #odd posterior example plot
    if plot_odd_examples:

        #choose indices for odd plot_examples
        #odd colors and magnitudes
        #come back and do parallax negative
        SN = tgas['parallax'][positive] / tgas['parallax_error'][positive]
        oddIndicesWD_LowSN = np.where(
            np.logical_and((absMag_dust > 6. * color + 5.), (SN <= 5)))[0]
        oddIndicesWD_HighSN = np.where(
            np.logical_and((absMag_dust > 6. * color + 5.),
                           (SN > 5)))[0]  #3.6)[0]
        oddIndicesSSG = np.where(
            np.logical_and((absMag_dust < 7.5 * color - 1.5),
                           (absMag_dust > -8.1 * color + 7.8)))[0]
        oddIndicesPN_LowSN = np.where(
            np.logical_and(
                SN <= 5,
                np.logical_and((absMag_dust < 7.5 * color - 4.25),
                               (absMag_dust < -4.75 * color - 0.6))))[0]
        oddIndicesPN_HighSN = np.where(
            np.logical_and(
                SN > 5,
                np.logical_and((absMag_dust < 7.5 * color - 4.25),
                               (absMag_dust < -4.75 * color - 0.6))))[0]

        ndim = 2
        nPosteriorPoints = 1000  #number of elements in the posterior array
        projectedDimension = 1  #which dimension to project the prior onto
        xparallaxMAS = np.linspace(0, 10, nPosteriorPoints)
        xarray = np.logspace(-2, 2, 1000)
        xColor = np.linspace(-2, 4, nPosteriorPoints)
        samplex, sampley = sampleXDGMM(xdgmm, len(tgas) * 10)
        #plot likelihood and posterior in each axes
        for iteration in np.arange(0, 10):
            fig, ax = makeFigureInstance(
                x=3, y=2, hspace=0.75,
                figureSize=(2, 2))  #, figsize=figsize3x2)
            #fig, ax = plt.subplots(2, 3, figsize=figsize3x2)
            #ax = ax.flatten()
            #fig.subplots_adjust(left=0.1, right=0.9,
            #                        bottom=0.1, top=0.8,
            #                        wspace=0.4, hspace=0.5)
            ax[0].hist2d(samplex,
                         sampley,
                         bins=500,
                         norm=mpl.colors.LogNorm(),
                         cmap=plt.get_cmap(cmap_prior),
                         zorder=-1)
            #plotPrior(xdgmm, ax[0], c=priorColor, lw=1, stretch=True)
            ax[0].set_ylim(15, -10)
            ax[0].set_xlim(-1.2, 2)
            ax[0].set_ylim(ylim_cmd[0] + 3, ylim_cmd[1] - 3)
            ax[0].set_xlabel(xlabel_cmd)
            ax[0].set_ylabel(ylabel_cmd)

            for i, indices in enumerate([
                    oddIndicesWD_LowSN, oddIndicesWD_HighSN, oddIndicesSSG,
                    oddIndicesPN_LowSN, oddIndicesPN_HighSN
            ]):
                print(len(indices), indices)
                #if i == 0: index = indices[iteration]
                #else: index = indices[np.random.randint(0, high=len(indices))]
                index = indices[np.random.randint(0, high=len(indices))]
                ax[0].scatter(color[index],
                              absMag_dust[index],
                              c=dataColor,
                              s=20)
                yplus = absMag_dust_err[0][index]
                yminus = absMag_dust_err[1][index]
                if np.isnan(yplus): yplus = 10.
                if np.isnan(yminus): yminus = 10.
                print(yplus, yminus)
                ax[0].errorbar(color[index],
                               absMag_dust[index],
                               xerr=[[color_err[index]], [color_err[index]]],
                               yerr=[[yplus], [yminus]],
                               fmt="none",
                               zorder=0,
                               lw=2.0,
                               mew=0,
                               alpha=1.0,
                               color=dataColor,
                               ecolor=dataColor)
                ax[0].annotate(str(i + 1),
                               (color[index] + 0.075, absMag_dust[index] +
                                0.175))  #, fontsize=annotateTextSize)
                #print len(color), len(absMagKinda_dust), len(color_err), len(absMagKinda_dust_err), len(apparentMagnitude)
                likeParallax, priorParallax, posteriorParallax, posteriorColorArray = likePriorPost(
                    color[index],
                    absMagKinda_dust[index],
                    color_err[index],
                    absMagKinda_dust_err[index],
                    apparentMagnitude[index],
                    xdgmm,
                    xparallaxMAS,
                    ndim=2,
                    nPosteriorPoints=1000,
                    projectedDimension=1)

                likeParallaxFull, priorParallaxFull, posteriorParallaxFull, posteriorColorFull = likePriorPost(
                    color[index],
                    absMagKinda_dust[index],
                    color_err[index],
                    absMagKinda_dust_err[index],
                    apparentMagnitude[index],
                    xdgmm,
                    xarray,
                    ndim=2,
                    nPosteriorPoints=1000,
                    projectedDimension=1)

                meanPosteriorParallax = scipy.integrate.cumtrapz(
                    posteriorParallaxFull * xarray, x=xarray)[-1]
                x2PosteriorParallax = scipy.integrate.cumtrapz(
                    posteriorParallaxFull * xarray**2., x=xarray)[-1]
                varPosteriorParallax = x2PosteriorParallax - meanPosteriorParallax**2.
                meanPosteriorColor = scipy.integrate.cumtrapz(
                    posteriorColorFull * xColor, x=xColor)[-1]
                x2PosteriorColor = scipy.integrate.cumtrapz(
                    posteriorColorFull * xColor**2., x=xColor)[-1]
                varPosteriorColor = x2PosteriorColor - meanPosteriorColor**2.

                absMagPost = testXD.absMagKinda2absMag(
                    meanPosteriorParallax *
                    10.**(0.2 * apparentMagnitude[index]))
                absMag_errPost = absMagError(meanPosteriorParallax,
                                             np.sqrt(varPosteriorParallax),
                                             apparentMagnitude[index],
                                             absMagPost)
                yplus = absMag_dust_err[0][index]
                yminus = absMag_dust_err[1][index]

                if np.isnan(yplus): yplus = 10.
                if np.isnan(yminus): yminus = 10.

                l1, = ax[i + 1].plot(xparallaxMAS,
                                     likeParallax * np.max(posteriorParallax) /
                                     np.max(likeParallax),
                                     lw=2,
                                     color=dataColor,
                                     zorder=100)
                l2, = ax[i +
                         1].plot(xparallaxMAS,
                                 priorParallax * np.max(posteriorParallax) /
                                 np.max(priorParallax),
                                 lw=2,
                                 color=priorColor,
                                 linestyle='--')
                l3, = ax[i + 1].plot(xparallaxMAS,
                                     posteriorParallax,
                                     lw=2,
                                     color=posteriorColor)
                ax[0].scatter(meanPosteriorColor,
                              absMagPost,
                              c=posteriorColor,
                              s=20)
                ax[0].errorbar(meanPosteriorColor,
                               absMagPost,
                               xerr=[[np.sqrt(varPosteriorColor)],
                                     [np.sqrt(varPosteriorColor)]],
                               yerr=[[yplus], [yminus]],
                               fmt="none",
                               zorder=0,
                               lw=2.0,
                               mew=0,
                               alpha=1.0,
                               color=posteriorColor,
                               ecolor=posteriorColor)
                maxInd = np.where(
                    posteriorParallax == np.max(posteriorParallax))[0]
                maxPar = xparallaxMAS[maxInd]
                maxY = posteriorParallax[maxInd]
                if maxPar < 5: annX = 9
                else: annX = 0
                if i == 1: annY = 0.75 * maxY
                else: annY = maxY / 1.1
                ax[i + 1].text(annX, annY, str(i + 1))
                ax[i + 1].set_xlabel(r'$\varpi$ [mas]')
                ax[i + 1].tick_params(
                    axis='y',  # changes apply to the x-axis
                    which='both',  # both major and minor ticks are affected
                    bottom='off',  # ticks along the bottom edge are off
                    top='off',  # ticks along the top edge are off
                    labelbottom='off')  # labels along the bottom edge are off
                if i + 1 == 1:
                    leg = fig.legend(
                        (l1, l2, l3), ('likelihood', 'prior', 'posterior'),
                        'upper right')  #, fontsize=legendTextSize)
                    leg.get_frame().set_alpha(1.0)
                #plt.tight_layout()
                if pdf:
                    fig.savefig('posterior_' + str(iteration) + '_odd.pdf',
                                dpi=400)
            fig.savefig('paper/posterior_odd.pdf', dpi=400)
            fig.tight_layout()
            fig.savefig('posterior_odd.png')
            plt.close(fig)
    #-------------------------------------

    #delta plot
    label = r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$'
    contourColor = '#1f77b4'
    color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)
    color_err = np.sqrt(
        bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. +
        bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.)

    x = color
    y = np.log(sigma**2.) - np.log(tgas['parallax_error']**2.)
    colorDeltaVar = y
    notnans = ~np.isnan(sigma) & ~np.isnan(
        tgas['parallax_error']) & ~np.isnan(color)

    if plot_delta:
        fig, ax = makeFigureInstance(x=2, y=1,
                                     wspace=1.0)  # , figsize=figsize2x1)
        #fig, ax = plt.subplots(1, 2, figsize=figsize2x1)
        levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0)**2)
        norm = plt.matplotlib.colors.Normalize(vmin=-1.5, vmax=1)
        cmap = 'inferno'
        ax[0].scatter(x[notnans],
                      y[notnans],
                      c=y[notnans],
                      s=1,
                      lw=0,
                      alpha=0.05,
                      norm=norm,
                      cmap=cmap,
                      rasterized=True)
        #corner.hist2d(x[notnans], y[notnans], bins=200, ax=ax[0], levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor, rasterized=True)
        ax[0].set_xlabel(xlabel_cmd)
        ax[0].set_ylim(-6, 2)
        ax[0].set_xlim(-0.5, 2)
        ax[0].set_ylabel(label)
        cNorm = plt.matplotlib.colors.Normalize(vmin=0.1, vmax=2)
        ax[1].scatter(x[positive],
                      absMag,
                      s=1,
                      lw=0,
                      c=y[positive],
                      alpha=0.05,
                      norm=norm,
                      cmap=cmap,
                      rasterized=True)
        ax[1].set_xlim(xlim_cmd)
        ax[1].set_ylim(ylim_cmd)
        ax[1].set_xlabel(xlabel_cmd)
        ax[1].set_ylabel(ylabel_cmd)
        if pdf: fig.savefig('paper/delta.pdf', dpi=400)
        fig.savefig('delta.png')
        plt.close(fig)

    #delta cdf plot
    ratioCmd = sigma[notnans]**2. / tgas['parallax_error'][notnans]**2.
    lnratio = np.log(ratioCmd)

    if plot_deltacdf:
        plt.clf()
        fig, ax = makeFigureInstance(left=0.75)
        N = len(lnratio)
        ys = np.arange(0 + 0.5 / N, 1, 1.0 / N)
        sinds = np.argsort(lnratio)
        f = scipy.interpolate.interp1d(lnratio[sinds], ys)
        f_inv = scipy.interpolate.interp1d(ys, lnratio[sinds])
        ax.plot(lnratio[sinds], ys, 'k-', lw=2)
        fac2 = np.log(1 / 4.)
        fac1 = 0.
        ax.plot([fac2, fac2], [-1, f(fac2)], 'k--', lw=2)
        ax.plot([-6, fac2], [f(fac2), f(fac2)], 'k--', lw=2)
        ax.plot([fac1, fac1], [-1, f(fac1)], 'k--', lw=2)
        ax.plot([-6, fac1], [f(fac1), f(fac1)], 'k--', lw=2)
        ax.plot([f_inv(0.5), f_inv(0.5)], [-1, 0.5], 'k--', lw=2)
        ax.plot([-6, f_inv(0.5)], [0.5, 0.5], 'k--', lw=2)
        ax.set_xlabel(label)
        ax.set_ylabel('cumulative fraction')
        ax.set_xlim(-6, 2)
        ax.set_ylim(-0.05, 1.05)
        if pdf: fig.savefig('paper/deltaCDF.pdf', dpi=400)
        fig.savefig('deltaCDF.png')
        plt.close(fig)
        print('fraction of stars which decreased in variance: ', f(fac1))
    #delta mean vs gaia uncertainty
    y = mean - tgas['parallax']
    x = tgas['parallax_error']
    good = ~np.isnan(y) & ~np.isnan(x)

    if plot_nobias:
        plt.clf()
        fig, ax = makeFigureInstance(left=0.75)
        levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0)**2)
        contourColor = '#1f77b4'
        contourColor = 'black'
        #corner.hist2d(x[good], y[good], bins=200, ax=ax, levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor, rasterized=True)
        #norm = plt.matplotlib.colors.Normalize(vmin=0.0, vmax=1)
        ax.scatter(x[notnans],
                   y[notnans],
                   c=colorDeltaVar[notnans],
                   s=1,
                   lw=0,
                   alpha=0.05,
                   norm=norm,
                   cmap=cmap,
                   rasterized=True)
        #ax.scatter(x[good], y[good], c=sigma[good], s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap, rasterized=True)
        #ax.scatter(x[good], y[good], c=np.sqrt(sigma[good]), s=1, rasterized=True, zorder=0, alpha=0.1, cmap=cmap, norm=norm)
        ax.plot([0, 1.1], [0, 0], 'k--', lw=1)
        ax.set_xlim(0.15, 1.05)
        ax.set_ylim(-2.5, 2.5)
        ylabel = r'$\mathrm{Posterior \, Expectation \, Value} - \varpi_n$'
        xlabel = r'$\sigma_{\varpi,n}$'
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)
        if pdf: fig.savefig('paper/deltaParallax.pdf', dpi=400)
        fig.savefig('deltaParallax.png')
        plt.close(fig)

    #what's that feature plot
    if plot_wtf:
        fig, ax = makeFigureInstance(left=0.75)
        ax.scatter(color[positive],
                   absMag,
                   s=1,
                   lw=0,
                   c=dataColor,
                   alpha=0.01,
                   zorder=0,
                   rasterized=True)
        ax.set_xlim(xlim_cmd)
        ax.set_ylim(ylim_cmd)
        ax.set_xlabel(xlabel_cmd)
        ax.set_ylabel(ylabel_cmd)
        lowerMainSequence = (0.4, 5.5)
        upperMainSequence = (-0.225, 2)
        binarySequence = (0.65, 4)
        redClump = (0.35, -2)
        redGiantBranch = (1.0, -2)
        turnOff = (-0.15, 3.5)
        features = [
            lowerMainSequence, upperMainSequence, binarySequence, redClump,
            redGiantBranch, turnOff
        ]
        labels = [
            'lower MS', 'upper MS', 'binary sequence', 'red clump', 'RGB',
            'MS turn off', 'subgiant branch'
        ]
        for l, f in zip(labels, features):
            ax.text(f[0], f[1], l)  #, fontsize=annotateTextSize)
        if pdf: fig.savefig('paper/whatsThatFeature.pdf', dpi=400)
        fig.savefig('whatsThatFeature.png')
        plt.close(fig)
Exemplo n.º 19
0
#plot likelihood and posterior in each axes
for iteration in np.arange(20, 40):
    fig, ax = plt.subplots(2, 3, figsize=(15, 9))
    ax = ax.flatten()
    fig.subplots_adjust(left=0.1,
                        right=0.9,
                        bottom=0.1,
                        top=0.9,
                        wspace=0.4,
                        hspace=0.5)

    #plot prior in upper left
    xdgmmFilename = 'xdgmm.' + str(ngauss) + 'gauss.dQ' + str(
        quantile) + '.' + iter + '.2MASS.All.npz.fit'
    xdgmm = XDGMM(filename=xdgmmFilename)
    testXD.plotPrior(xdgmm, ax[0], c='k', lw=1)
    ax[0].set_xlim(xlim)
    ax[0].set_ylim(ylim)
    ax[0].set_xlabel('$(J-K)^C$', fontsize=18)
    ax[0].set_ylabel('$M_J^C$', fontsize=18)

    for i in range(np.max(digit)):
        currentInd = np.where((digit == i))[0]
        index = currentInd[np.random.randint(0, high=len(currentInd))]

        print 'yerr minus: ' + str(yerr_minus[index]) + ' yerr plus: ' + str(
            yerr_plus[index])
        ax[0].scatter(color[index],
                      testXD.absMagKinda2absMag(absMagKinda[index]),
                      c='black')
Exemplo n.º 20
0
class Empiricist(object):
    """
    Worker object that can fit supernova and host galaxy parameters 
    given noisy inputs using an XDGMM model, and then predict new
    supernovae based on this model and a set of new host galaxies.

    Parameters
    ----------
    model_file: string (optional)
        Name of text file containing model being used (default=None).
    fit_method: string (optional)
        Name of XD fitting method to use (default='astroML'). Must be
        either 'astroML' or 'Bovy'.

    Notes
    -----
    The class can be initialized with a model or one can be loaded or
        fit to data.
    """
    def __init__(self, model_file=None, fit_method='astroML'):

        self.XDGMM = XDGMM(n_components=7, method=fit_method)
        self.fit_method = fit_method

        if model_file is not None:
            self.read_model(model_file)

    def get_SN(self, X, Xerr=None, n_SN=1):
        """
        Conditions the XDGMM model based on the data in X and returns
        SN parameters sampled from the conditioned model.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data. First 3 entries (SN parameters) should be NaN.
        Xerr: array_like, shape = (n_samples, n_features), optional
            Error on input data. SN errors should be 0.0. If None,
            errors are not used for the conditioning.
        n_SN: int (optional)
            Number of SNe to sample (default = 1).

        Returns
        -------
        SN_data: array_like, shape = (n_SN, 3)
            Sample of SN data taken from the conditioned model.

        Notes
        -----
        Assumes that the first three parameters used when fitting
            the model are the SN parameters.
        """
        if self.model_file is None:
            raise StandardError("Model parameters not set.")

        if Xerr is None: cond_XDGMM = self.XDGMM.condition(X)
        else: cond_XDGMM = self.XDGMM.condition(X, Xerr)

        return np.atleast_2d(cond_XDGMM.sample(n_SN))

    def fit_model(self, X, Xerr, filename='empiriciSN_model.fit',
                  n_components=6):
        """
        Fits the XD model to data.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
        filename: string (optional)
            Filename for model fit to be saved to (default =
            'empiriciSN_model.fit').
        n_components: float (optional)
            Number of Gaussian components to use (default = 6)

        Notes
        -----
        The specified method and n_components Gaussian components will
        be used (typical BIC-optimized numbers of components for ~100s
        of training datapoints are 6 or 7).

        The fit will be saved in the file with name defined by the 
        filename variable.
        """
        self.XDGMM.n_components = n_components
        self.XDGMM = self.XDGMM.fit(X, Xerr)
        self.XDGMM.save_model(filename)
        self.model_file = filename
        return

    def fit_from_files(self, filelist, filename='empiriciSN_model.fit',
                  n_components=7):
        """
        Fits the XD model to data contained in the files provided.

        Parameters
        ----------
        filelist: array_like
            Array of strings containing names of files containing data
            to fit.
        filename: string (optional)
            Filename for model fit (default = 'empiriciSN_model.fit').
        n_components: float (optional)
            Number of Gaussian components to use (default = 7)
        method: string (optional)
            XD fitting method to use (default = 'astroML')

        Notes
        -----
        The model is fitted using the data contained in the files
        named in the `filelist` variable. This assumes that the data
        files are in the same format as those provided with this code
        and that only redshift, distance from host nucleus, host colors,
        and local host surface brightness are being used for the fit.
        """
        X, Xerr = self.get_data(filelist)
        self.fit_model(X, Xerr, filename=filename,
                       n_components=n_components)
        return

    def read_model(self, filename):
        """
        Reads the parameters of a model from a file.

        Parameters
        ----------
        filename: string
            Name of the file to read from.

        Notes
        -----
        Model parameters are stored in the self.XDGMM model object. 
        The model filename is stored self.model_file.
        """
        self.XDGMM.read_model(filename)
        self.model_file = filename
        return

    def component_test(self, X, Xerr, component_range, no_err=False):
        """
        Test the performance of the model for a range of numbers of
        Gaussian components.

        Parameters
        ----------
        X: array_like, shape = (n_samples, n_features)
            Input data.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on input data.
        component_range: array_like
            Range of n_components to test.
        no_err: bool (optional)
            Flag for whether to calculate the BIC with the errors
            included or not. (default = False)

        Returns
        -------
        bics: array_like, shape = (len(param_range),)
            BIC for each value of n_components
        optimal_n_comp: float
            Number of components with lowest BIC score
        lowest_bic: float
            Lowest BIC from the scores computed.

        Notes
        -----
        Uses the XDGMM.bic_test method to compute the BIC score for
        each n_components in the component_range array.
        """
        bics, optimal_n_comp, lowest_bic = \
            self.XDGMM.bic_test(X, Xerr, component_range, no_err)
        return bics, optimal_n_comp, lowest_bic

    def get_logR(self,cond_indices, R_index, X, Xerr=None):
        """
        Uses a subset of parameters in the given data to condition the
        model and return a sample value for log(R/Re).

        Parameters
        ----------
        cond_indices: array_like
            Array of indices indicating which parameters to use to
            condition the model. Cannot contain [0, 1, 2] since these
            are SN parameters.
        R_index: int
            Index of log(R/Re) in the list of parameters that were used
            to fit the model.
        X: array_like, shape = (n < n_features,)
            Input data.
        Xerr: array_like, shape = (X.shape,) (optional)
            Error on input data. If none, no error used to condition.

        Returns
        -------
        logR: float
            Sample value of log(R/Re) taken from the conditioned model.

        Notes
        -----
        The fit_params array specifies a list of indices to use to
        condition the model. The model will be conditioned and then
        a radius will be drawn from the conditioned model.

        This is so that the radius can then be used to calculate local
        surface brightness to fully condition the model to sample
        likely SN parameters.

        This does not make assumptions about what parameters are being
        used in the model, but does assume that the model has been
        fit already and that the first three parameters in the data
        that were used to fit the model are the SN parameters.
        """
        if self.model_file is None:
            raise StandardError("Model parameters not set.")

        if 0 in cond_indices or 1 in cond_indices or 2 in cond_indices:
            raise ValueError("Cannot condition model on SN parameters.")
        if R_index in cond_indices:
            raise ValueError("Cannot condition model on log(R/Re).")

        cond_data = np.array([])
        if Xerr is not None: cond_err = np.array([])
        R_cond_idx = R_index
        n_features = self.XDGMM.mu.shape[1]
        j = 0

        for i in range(n_features):
            if i in cond_indices:
                cond_data = np.append(cond_data,X[j])
                if Xerr is not None: cond_err = np.append(cond_err, Xerr[j])
                j += 1
                if i < R_index: R_cond_idx -= 1
            else:
                cond_data = np.append(cond_data,np.nan)
                if Xerr is not None: cond_err = np.append(cond_err, 0.0)

        if Xerr is not None:
            cond_XDGMM = self.XDGMM.condition(cond_data, cond_err)
        else: cond_XDGMM = self.XDGMM.condition(cond_data)

        sample = cond_XDGMM.sample()
        logR = sample[0][R_cond_idx]
        return logR

    def get_local_SB(self, SB_params, R ):
        """
        Uses magnitudes, a surface brightness (SB) profile, and
        a SN location to fit local surface brightnesses at the location
        of the SN.

        Parameters
        ----------
        SB_params: array_like, shape = (21,)
            Array of parameters needed for the SB fit. First entry 
            should be a sersic index of 1 or 4, indicating whether to
            use an exponential or de Vaucouleurs profile. Following this
            should be sets of 
            (magnitude, mag_unc, effective radius, rad_unc) data for
            each of the 5 ugriz filters, giving a total array length of
            21. These data are assumed to be known by the user.
        R: float
            Separation from host nucleus in units of log(R/Re).
            It is assumed that the Re used here is the r-band Re, as is
            output by the get_logR function.

        Returns
        -------
        SBs: array_list, shape = (5,)
            Local surface brightness at the location of the SN for each
            of the 5 ugriz filters. Units = mag/arcsec^2
        SB_errs: array_like, shape = (5,)
            Uncertainties on the local surface brightnesses.
        """
        if SB_params[0]!=1 and SB_params[0]!=4:
            raise ValueError("Sersic index must be 1 or 4")
            

        sep = (10**R) * SB_params[11] # separation in arcsec

        SBs = np.array([])
        SB_errs = np.array([])

        for j in range(5):
            halfmag = SB_params[j*4+1] + 0.75257
            magerr = SB_params[j*4+2]
            Re = SB_params[j*4+3]
            Re_err = SB_params[j*4+4]
            r = sep/Re

            Ie = halfmag + 2.5 * np.log10(np.pi*Re**2)
            Re2_unc = 2 * Re * Re_err * np.pi
            log_unc = 2.5 * Re2_unc/(np.log10(np.pi*Re**2) * np.log(10))
            Ie_unc = np.sqrt(magerr**2 + log_unc**2)

            if SB_params[0] == 1:
                Io = Ie-1.824
                Io_unc = Ie_unc
                sb = Io*np.exp(-1.68*(r))
                exp_unc = np.exp(-1.68*(r))*1.68*sep*Re_err/(Re**2)
                sb_unc = sb * np.sqrt((Io_unc/Io)**2 +
                                      (exp_unc/np.exp(-1.68*(r)))**2)
                if np.isnan(sb_unc): sb_unc = 0.0
                if sb_unc < 0: sb_unc = sb_unc*-1.0
                SBs = np.append(SBs,sb)
                SB_errs = np.append(SB_errs,sb_unc)

            if SB_params[0] == 4:
                Io = Ie-8.328
                Io_unc = Ie_unc
                sb = Io*np.exp(-7.67*((r)**0.25))
                exp_unc = np.exp(-7.67*((r)**0.25))*7.67*sep \
                          *Re_err/(4*Re**(1.25))
                sb_unc = sb*np.sqrt((Io_unc/Io)**2+(exp_unc \
                       /np.exp(-7.67*((r)**0.25))))
                if np.isnan(sb_unc): sb_unc = 0.0
                if sb_unc < 0: sb_unc = sb_unc*-1.0
                SBs = np.append(SBs,sb)
                SB_errs = np.append(SB_errs,sb_unc)

        return SBs, SB_errs

    def set_fit_method(self, fit_method):
        """
        Sets the XD fitting method to use.

        Parameters
        ----------
        fit_method: string
            Name of fitting method to use. Must be either 'astroML' or
            'Bovy'.

        Notes
        -----
        Changes the fitting method of self.XDGMM to the one specified
        in `fit_method`.
        """
        if fit_method == 'astroML':
            n_iter = 100
        elif fit_method == 'Bovy':
            n_iter = 10**9
        else:
            raise ValueError("Method must be either 'astroML' or 'Bovy'")
        self.XDGMM.method = fit_method
        self.XDGMM.n_iter = n_iter
        self.fit_method = fit_method
        return


    def get_data(self, filelist):
        """
        Parses SN and host data from a list of data files.

        Parameters
        ----------
        filelist: array_like
            Array of strings containing names of files containing data
            to fit.

        Returns
        -------
        X: array_like, shape = (n_samples, n_features)
            Output data. Contains SALT2 SN parameters, host redshift,
            log(R/Re), host colors, and host brightnesses at the
            locations of the SN in each filter.
        Xerr: array_like, shape = (n_samples, n_features, n_features)
            Error on output data.

        Notes
        -----
        Reads in each data file and returns an array of data and a
        matrix of errors, which can be used to fit the XDGMM model.

        Currently reads the SALT2 SN parameters, host redshift,
        log(R/Re), host magnitudes, and host surface brightnesses
        at the location of the SN.

        This method needs further modularizing, to enable the worker
        to calculate host surface brightnesses separately (in a static method).
        """
        x0 = np.array([])
        x0_err = np.array([])
        x1 = np.array([])
        x1_err = np.array([])
        c = np.array([])
        c_err = np.array([])
        z = np.array([])
        z_err = np.array([])
        logr = np.array([])
        logr_err = np.array([])
        umag = np.array([])
        umag_err = np.array([])
        gmag = np.array([])
        gmag_err = np.array([])
        rmag = np.array([])
        rmag_err = np.array([])
        imag = np.array([])
        imag_err = np.array([])
        zmag = np.array([])
        zmag_err = np.array([])
        SB_u = np.array([])
        SB_u_err = np.array([])
        SB_g = np.array([])
        SB_g_err = np.array([])
        SB_r = np.array([])
        SB_r_err = np.array([])
        SB_i = np.array([])
        SB_i_err = np.array([])
        SB_z = np.array([])
        SB_z_err = np.array([])

        for filename in filelist:
            infile = open(filename,'r')
            inlines = infile.readlines()
            infile.close()

            for line1 in inlines:
                if line1[0]=='#': continue
                line = line1.split(',')
                if line[33]=='nan' or line[39]=='nan' or line[45]=='nan'\
                    or line[51]=='nan' or line[57]=='nan': continue

                # SN params
                x0 = np.append(x0,float(line[7])) #x0
                x0_err = np.append(x0_err,float(line[8]))
                x1 = np.append(x1,float(line[9]))  # x1
                x1_err = np.append(x1_err,float(line[10]))
                c = np.append(c,float(line[11]))  # c
                c_err = np.append(c_err,float(line[12]))

                # Host params
                z = np.append(z,float(line[4]))
                z_err = np.append(z_err,0.0)
                logr = np.append(logr,np.log10(float(line[15])/float(line[42]))) # r
                logr_err = np.append(logr_err,float(line[43])/(float(line[42])*np.log(10)))
                umag = np.append(umag,float(line[18]))  # u_mag
                umag_err = np.append(umag_err,float(line[19]))
                gmag = np.append(gmag,float(line[20]))  # g_mag
                gmag_err = np.append(gmag_err,float(line[21]))
                rmag = np.append(rmag,float(line[22]))  # r_mag
                rmag_err = np.append(rmag_err,float(line[23]))
                imag = np.append(imag,float(line[24]))  # i_mag
                imag_err = np.append(imag_err,float(line[25]))
                zmag = np.append(zmag,float(line[26]))  # z_mag
                zmag_err = np.append(zmag_err,float(line[27]))
                SB_u = np.append(SB_u,float(line[32]))  # SB_u
                SB_u_err = np.append(SB_u_err,float(line[33]))
                SB_g = np.append(SB_g,float(line[38]))  # SB_g
                SB_g_err = np.append(SB_g_err,float(line[39]))
                SB_r = np.append(SB_r,float(line[44]))  # SB_r
                SB_r_err = np.append(SB_r_err,float(line[45]))
                SB_i = np.append(SB_i,float(line[50]))  # SB_i
                SB_i_err = np.append(SB_i_err,float(line[52]))
                SB_z = np.append(SB_z,float(line[56]))  # SB_z
                SB_z_err = np.append(SB_z_err,float(line[57]))

        ug = umag-gmag
        ug_err = np.sqrt(umag_err**2+gmag_err**2)
        ur = umag-rmag
        ur_err = np.sqrt(umag_err**2+rmag_err**2)
        ui = umag-imag
        ui_err = np.sqrt(umag_err**2+imag_err**2)
        uz = umag-zmag
        uz_err = np.sqrt(umag_err**2+zmag_err**2)
        gr = gmag-rmag
        gr_err = np.sqrt(gmag_err**2+rmag_err**2)
        gi = gmag-imag
        gi_err = np.sqrt(gmag_err**2+imag_err**2)
        gz = gmag-zmag
        gz_err = np.sqrt(gmag_err**2+zmag_err**2)
        ri = rmag-imag
        ri_err = np.sqrt(rmag_err**2+imag_err**2)
        rz = rmag-zmag
        rz_err = np.sqrt(rmag_err**2+zmag_err**2)
        iz = imag-zmag
        iz_err = np.sqrt(imag_err**2+zmag_err**2)

        X = np.vstack([x0,x1,c,z,logr,ug,ur,ui,uz,gr,gi,gz,ri,rz,iz,SB_u,
                     SB_g,SB_r,SB_i,SB_z]).T
        Xerr = np.zeros(X.shape + X.shape[-1:])
        diag = np.arange(X.shape[-1])
        Xerr[:, diag, diag] = np.vstack([x0_err**2,x1_err**2,c_err**2,
                                         z_err**2,logr_err**2,ug_err**2,
                                         ur_err**2,ui_err**2,uz_err**2,
                                         gr_err**2,gi_err**2,gz_err**2,
                                         ri_err**2,rz_err**2,iz_err**2,
                                         SB_u_err**2,SB_g_err**2,
                                         SB_r_err**2,SB_i_err**2,
                                         SB_z_err**2]).T
        return X, Xerr