def setUp(self): """ Set up each test with a new XDGMM object and some data. """ self.xdgmm = XDGMM(n_components=3) self.files = [] """ Use scikit-learn GaussianMixture for sampling some data points """ self.gmm = skl_GMM(n_components=3, max_iter=10, covariance_type='full', random_state=None) self.gmm.weights_ = np.array([0.3, 0.5, 0.2]) self.gmm.means_ = np.array( [np.array([0, 1]), np.array([5, 4]), np.array([2, 4])]) self.gmm.covariances_ = np.array([ np.diag((2, 1)), np.array([[1, 0.2], [0.2, 1]]), np.diag((0.3, 0.5)) ]) self.gmm.precisions_ = np.linalg.inv(self.gmm.covariances_) self.gmm.precisions_cholesky_ = np.linalg.cholesky( self.gmm.precisions_) self.X = self.gmm.sample(1000)[0] errs = 0.2 * np.random.random_sample((1000, 2)) self.Xerr = np.zeros(self.X.shape + self.X.shape[-1:]) diag = np.arange(self.X.shape[-1]) self.Xerr[:, diag, diag] = np.vstack([errs[:, 0]**2, errs[:, 1]**2]).T
def plot_ellipses(X, Xerr, optimal_n_components): xdgmm = XDGMM(n_components=2, n_iter=1000) plot_results(X, xdgmm.predict(X, Xerr), xdgmm.mu, xdgmm.V, 0, 'Gaussian Mixture') plt.xlabel('Log(T90)', size=20) plt.ylabel('Log(Hardness Ratio)', size=20) plt.show()
def __init__(self, model_file=None, fit_method='astroML'): self.XDGMM = XDGMM(n_components=7, method=fit_method) self.fit_method = fit_method if model_file is not None: self.read_model(model_file)
def get_computed_models(X, Xerr): param_range = np.arange(1, 6) n_iter = 10**3 xdgmm = XDGMM(n_iter=n_iter) bic, optimal_n_comp, lowest_bic = xdgmm.bic_test(X, Xerr, param_range) aic, optimal_n_aic_comp, lowest_aic = xdgmm.aic_test(X, Xerr, param_range) print("optimal bic {}".format(optimal_n_comp)) print("optimal aic {}".format(optimal_n_aic_comp)) return bic, aic, optimal_n_comp
def priorSample(ngauss=128, quantile=0.5, iter='8th', survey='2MASS', dataFilename='All.npz', Nsamples=1.2e6, xdgmmFilename='xdgmm.fit', xlabel='X', ylabel='Y', contourColor='k'): setup_text_plots(fontsize=16, usetex=True) xdgmm = XDGMM(filename=xdgmmFilename) figPrior = plt.figure(figsize=(12, 5.5)) figPrior.subplots_adjust(left=0.1, right=0.95, bottom=0.15, top=0.95, wspace=0.1, hspace=0.1) sample = xdgmm.sample(Nsamples) negParallax = sample[:,1] < 0 nNegP = np.sum(negParallax) while nNegP > 0: sampleNew = xdgmm.sample(nNegP) sample[negParallax] = sampleNew negParallax = sample[:,1] < 0 nNegP = np.sum(negParallax) samplex = sample[:,0] sampley = testXD.absMagKinda2absMag(sample[:,1]) ax3 = figPrior.add_subplot(121) alpha = 0.1 xlim = [-0.25, 1.25] ylim = [6, -6] levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2) corner.hist2d(samplex, sampley, ax=ax3, levels=levels, bins=200, plot_datapoints=False, no_fill_contours=True, plot_density=False, color=contourColor) ax3.scatter(samplex, sampley, s=1, lw=0, c='k', alpha=alpha) ax4 = figPrior.add_subplot(122) for i in range(xdgmm.n_components): points = drawEllipse.plotvector(xdgmm.mu[i], xdgmm.V[i]) ax4.plot(points[0, :], testXD.absMagKinda2absMag(points[1,:]), 'k-', alpha=xdgmm.weights[i]/np.max(xdgmm.weights)) titles = ["Extreme Deconvolution\n resampling", "Extreme Deconvolution\n cluster locations"] ax = [ax3, ax4] for i in range(2): ax[i].set_xlim(xlim) ax[i].set_ylim(ylim[0], ylim[1]*1.1) ax[i].text(0.05, 0.95, titles[i], ha='left', va='top', transform=ax[i].transAxes, fontsize=18) ax[i].set_xlabel(xlabel, fontsize = 18) if i in (1, 3): ax[i].yaxis.set_major_formatter(plt.NullFormatter()) else: ax[i].set_ylabel(ylabel, fontsize = 18) figPrior.savefig('prior_ngauss' + str(ngauss) +'.png')
def GMM(ai, aj, ak, eai, eaj, eak, n_components=1, method="Bovy", mu=None, V=None, weights=None, fit=True): X = np.vstack([ai, aj, ak]).T Xerr = np.zeros(X.shape + X.shape[-1:]) diag = np.arange(X.shape[-1]) Xerr[:, diag, diag] = np.vstack([eai** 2, eaj ** 2, eak**2]).T xdgmm = XDGMM(n_components=n_components, method=method, mu=mu, V=V, weights=weights) if fit: xdgmm.fit(X, Xerr) LogL=xdgmm.logL(X, Xerr) return xdgmm, LogL
def comparePrior(): ngauss = [512, 128] iter = ['1st', '6th'] color = ['k', 'red'] label = ['512 Gaussians', '128 Gaussians'] fig, ax = plt.subplots(1,2, figsize=(12,5)) for n, i, c, l in zip(ngauss, iter, color, label): xdgmmFilename = 'xdgmm.' + str(n) + 'gauss.dQ0.05.' + i + '.2MASS.All.npz.fit' xdgmm = XDGMM(filename=xdgmmFilename) for gg in range(xdgmm.n_components): if xdgmm.weights[gg] == np.max(xdgmm.weights): lab = l else: lab = None points = drawEllipse.plotvector(xdgmm.mu[gg], xdgmm.V[gg]) ax[0].plot(points[0,:],testXD.absMagKinda2absMag(points[1,:]), c, lw=1, alpha=xdgmm.weights[gg]/np.max(xdgmm.weights)) ax[1].plot(points[0,:], points[1,:], c, lw=1, alpha=xdgmm.weights[gg]/np.max(xdgmm.weights), label=lab) for a in ax: a.set_xlim(-0.5, 1.5) a.set_xlabel(r'$(J - K)^C$') ax[0].set_ylabel(r'$M_J^C$') ax[1].set_ylabel(r'$\varpi 10^{0.2\,m_J}$') ax[0].set_ylim(6, -6) ax[1].set_ylim(1100, -100) ax[1].legend(loc='lower left', fontsize=10) plt.tight_layout() fig.savefig('priorNgaussComparison.png')
def test_ReadWrite(self): self.xdgmm.fit(self.X, self.Xerr) self.xdgmm.save_model('test.fit') xd2 = XDGMM(filename='test.fit') self.assertLess(self.xdgmm.mu[0, 0] - xd2.mu[0, 0], 1e-5) self.assertLess(self.xdgmm.V[0, 0, 0] - xd2.V[0, 0, 0], 1e-5) self.assertLess(self.xdgmm.weights[0] - xd2.weights[0], 1e-5) self.files.append('test.fit')
def examplePosterior(nexamples=100, postFile='posteriorSimple.npz', dustFile='dust.npz', nPosteriorPoints=1000, xdgmmFilename='xdgmm.fit'): tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() xdgmm = XDGMM(filename=xdgmmFilename) absmag = 'J' mag1 = 'J' mag2 = 'K' ndim = 2 data = np.load(dustFile) dustEBV = data['ebv'] absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax']) color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) xparallaxMAS = np.logspace(-2, 2, 1000) data = np.load(postFile) posterior = data['posterior'] mean = data['mean'] var = data['var'] notnans = ~np.isnan(var) & ~np.isnan(tgas['parallax_error']) print 'The median of the differences of the logs: ', np.median(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.)) varDiff = var - tgas['parallax_error']**2. ind = np.argsort(varDiff)[::-1] for i in ind[0:nexamples]: xabsMagKinda = testXD.parallax2absMagKinda(xparallaxMAS, apparentMagnitude[i]) likelihood = st.gaussian(tgas['parallax'][i], tgas['parallax_error'][i], xparallaxMAS) meanPrior, covPrior = testXD.matrixize(color[i], absMagKinda[i], color_err[i], 1e3) meanPrior = meanPrior[0] covPrior = covPrior[0] allMeans, allAmps, allCovs, summedPriorAbsMagKinda = testXD.absMagKindaPosterior(xdgmm, ndim, meanPrior, covPrior, xabsMagKinda, projectedDimension=1, nPosteriorPoints=nPosteriorPoints, prior=True) norm = scipy.integrate.cumtrapz(summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i]), x=xparallaxMAS)[-1] plotPrior = summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i])/norm posteriorFly = likelihood*summedPriorAbsMagKinda*10.**(0.2*apparentMagnitude[i]) norm = scipy.integrate.cumtrapz(posteriorFly, x=xparallaxMAS)[-1] if norm > 0.0 : posteriorFly = posteriorFly/norm plt.clf() plt.plot(xparallaxMAS, posterior[i], label='posterior') plt.plot(xparallaxMAS, likelihood, label='likelhood') plt.plot(xparallaxMAS, plotPrior, label='prior') plt.plot(xparallaxMAS, posteriorFly, label='posterior on the Fly') plt.xlim(tgas['parallax'][i] - 5.*tgas['parallax_error'][i], tgas['parallax'][i] + 5.*tgas['parallax_error'][i]) #plt.xscale('log') plt.legend(loc='best') plt.tight_layout() plt.xlabel('parallax [mas]', fontsize=18) plt.title('J-K: ' + '{0:.1f}'.format(color[i]) + ' M: ' + '{0:.1f}'.format(testXD.absMagKinda2absMag(absMagKinda[i]))) plt.savefig('exampleCMDPosteriorLargerVariance_' + str(i) + '.png')
def paperComparePrior(ngauss=128, quantile=0.05, iter='10th', survey='2MASS', dataFilename='All.npz', contourColor='k', posteriorColor='royalblue'): setup_text_plots(fontsize=16, usetex=True) tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() xdgmm = XDGMM(filename=xdgmmFilename) absmag = 'J' mag1 = 'J' mag2 = 'K' xlabel = '$(J-K)^C$' ylabel = r'$M_J^C$' xlim = [-0.25, 1.25] ylim = [6, -6] ndim = 2 data = np.load(dustFile) dustEBV = data['ebv'] absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax']) color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) postFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename yim = (-1, 5) indices = np.random.randint(0, high=len(color), size=1024) fig, ax = plt.subplots(1, 2) for i, file in enumerate(['posteriorSimple.npz', postFile]): data = np.load(file) posterior = data['posterior'] sigma = np.sqrt(data['var']) mean = data['mean'] absMag = testXD.absMagKinda2absMag(mean*10.**(0.2*apparentMagnitude)) absMagSigma = testXD.absMagKinda2absMag(sigma*10.**(0.2*apparentMagnitude)) ax[0].scatter(color[indices], absMag[indices]) ax[0].errorbar(color[indices], absMag[indices], xerr=color_err[inidces], yerr=[absMag/absMagSigma, absMag*absMagSigma], fmt=None, zorder=0, lw=0.5, mew=0, color=posteriorColor)) ax[0].set_xlabel(xlabel, fontsize=18) ax[0].set_ylabel(ylabel, fontsize=18) plt.tight_layout() #if file == 'posteriorSimple.npz': ax[0].set_ylim(ylim) ax[0].set_xlim(xlim) fig.savefig('comparePriorPaper.png')
def plotvector(mean, var, step=0.001): """ mean, var should be *projected* to the 2-d space in which plotting is about to occur """ assert mean.shape == (2, ) assert var.shape == (2, 2) ts = np.arange(0, 2. * np.pi, step) #magic w, v = np.linalg.eigh(var) ps = np.sqrt(w[0]) * (v[:, 0])[:,None] * (np.cos(ts))[None, :] + \ np.sqrt(w[1]) * (v[:, 1])[:,None] * (np.sin(ts))[None, :] + \ mean[:, None] return ps if __name__ == "__main__": from xdgmm import XDGMM import pylab as plt xdgmm = XDGMM(filename='xdgmm.1028gauss.1.2M.fit') amps = xdgmm.weights mus = xdgmm.mu Vs = xdgmm.V plt.clf() for amp, mean, var in zip(amps, mus, Vs): ps = plotvector(mean, var) plt.plot(ps[0, :], fixAbsMag(ps[1, :]), "k-", alpha=amp / np.max(amps)) plt.xlim(-2, 3) plt.ylim(10, -6) plt.savefig("drawEllipse.png")
def compareSimpleGaia(ngauss=128, quantile=0.05, iter='10th', survey='2MASS', dataFilename='All.npz', contourColor='k'): setup_text_plots(fontsize=16, usetex=True) tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() xdgmm = XDGMM(filename=xdgmmFilename) absmag = 'J' mag1 = 'J' mag2 = 'K' xlabel = '$(J-K)^C$' ylabel = r'$M_J^C$' xlim = [-0.25, 1.25] ylim = [6, -6] ndim = 2 data = np.load(dustFile) dustEBV = data['ebv'] absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax']) color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) postFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename yim = (-1, 5) for file in ['posteriorSimple.npz', postFile]: data = np.load(file) posterior = data['posterior'] samples = np.zeros(np.shape(posterior)[0]) xparallaxMAS = np.logspace(-2, 2, np.shape(posterior)[1]) for i, p in enumerate(posterior): try: samples[i] = testXD.samples(xparallaxMAS, p, 1, plot=False)[0] except IndexError: samples[i] = -999 mean = data['mean'] var = data['var'] absMag = testXD.absMagKinda2absMag(mean*10.**(0.2*apparentMagnitude)) absMagSample = testXD.absMagKinda2absMag(samples*10.**(0.2*apparentMagnitude)) neg = tgas['parallax'] < 0 fig, ax = plt.subplots(1, 2) ax[0].plot(data['mean'][~neg], mean[~neg] - tgas['parallax'][~neg], 'ko', markersize=0.5) ax[0].plot(data['mean'][neg], mean[neg] - tgas['parallax'][neg], 'ro', markersize=0.5) ax[0].set_xscale('log') ax[1].plot(data['mean'][~neg], np.log(var[~neg]) - np.log(tgas['parallax_error'][~neg]**2.), 'ko', markersize=0.5) ax[1].plot(data['mean'][neg], np.log(var[neg]) - np.log(tgas['parallax_error'][neg]**2.), 'ro', markersize=0.5) ax[1].set_xscale('log') ax[0].set_xlabel(r'$E[\varpi]$', fontsize=18) ax[1].set_xlabel(r'$E[\varpi]$', fontsize=18) ax[0].set_ylabel(r'$E[\varpi] - \varpi$', fontsize=18) ax[1].set_ylabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18) plt.tight_layout() #if file == 'posteriorSimple.npz': ax[0].set_ylim(-5, 5) ax[1].set_ylim(-6, 2) ax[0].set_xlim(1e-1, 1e1) ax[1].set_xlim(1e-1, 1e2) fig.savefig(file.split('.')[0] + '_Comparison2Gaia.png') notnans = ~np.isnan(var) & ~np.isnan(tgas['parallax_error']) print 'The median of the differences of the logs: ', np.median(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.)) cNorm = plt.matplotlib.colors.Normalize(vmin=-6, vmax=6) fig, ax = plt.subplots(1, 2, figsize=(14, 7)) x = color[notnans] y = np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.) levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2) #(counts, xedges, yedges, Image) = ax[0].hist2d(x, y, bins=100, cmap='Greys', norm=cNorm) #figcount, axcounts = plt.subplots() #nonzero = counts > 0 #axcounts.hist(np.log10(counts[nonzero]), log=True) #axcounts.set_xlabel('log counts') #figcount.savefig('counts.png') norm = plt.matplotlib.colors.Normalize(vmin=-1.5, vmax=1) cmap = 'inferno' ax[0].scatter(x, y, c=y, s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap) corner.hist2d(x, y, bins=200, ax=ax[0], levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor) #ax[0].scatter(color[notnans], np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), lw=0, s=1, alpha=0.5, c=tesXD.absMagKinda2absMag(absMagKinda[notnans]), norm=cNorm, cmap='plasma') ax[0].set_xlabel(r'$(J-K)^c$', fontsize=18) ax[0].set_ylim(-6, 2) ax[0].set_xlim(-0.5, 2) ax[0].set_ylabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18) #ax[0].errorbar(color, np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), fmt="none", zorder=0, lw=0.5, mew=0, color='grey') cNorm = plt.matplotlib.colors.Normalize(vmin=0.1, vmax=2) ax[1].scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap) ax[1].set_xlim(xlim) ax[1].set_ylim(ylim) ax[1].set_xlabel(xlabel, fontsize=18) ax[1].set_ylabel(ylabel, fontsize=18) #ax[1].hist(np.log(var[notnans]) - np.log(tgas['parallax_error'][notnans]**2.), bins=100, histtype='step', lw=2, log=True, color='black') #ax[1].set_xlabel(r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$', fontsize=18) #ax[1].set_xlim(-6, 2) #ax[1].set_ylim(1,) fig.savefig('deltaLogVariance_' + file.split('.')[0] + '.png') figVarDiff = plt.figure(figsize=(14,7)) ax1 = figVarDiff.add_subplot(121) ax2 = figVarDiff.add_subplot(122) ax1.scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap) ax2.scatter(x, absMag[notnans], s=1, lw=0, c=tgas['parallax_error'][notnans]**2., alpha=0.05, cmap=cmap) titles = ["Colored by change in variance", "Colored by observed variance"] ax = [ax1, ax2] for i in range(2): ax[i].set_xlim(xlim) ax[i].set_ylim(ylim[0], ylim[1]*1.1) ax[i].text(0.05, 0.95, titles[i], ha='left', va='top', transform=ax[i].transAxes, fontsize=18) ax[i].set_xlabel(xlabel, fontsize = 18) #if i in (1, 3): #ax[i].yaxis.set_major_formatter(plt.NullFormatter()) #else: ax[i].set_ylabel(ylabel, fontsize = 18) figVarDiff.savefig('denoisedVariance_' + file.split('.')[0] + '.png') figVarDiff.clf() ax1 = figVarDiff.add_subplot(121) ax2 = figVarDiff.add_subplot(122) ax1.scatter(x, absMag[notnans], s=1, lw=0, c=y, alpha=0.05, norm=norm, cmap=cmap) ax2.scatter(x, absMagSample[notnans], s=1, lw=0, c=tgas['parallax_error'][notnans]**2., alpha=0.05, cmap=cmap) titles = ["Colored by change in variance", "Colored by observed variance"] ax = [ax1, ax2] for i in range(2): ax[i].set_xlim(xlim) ax[i].set_ylim(ylim[0], ylim[1]*1.1) ax[i].text(0.05, 0.95, titles[i], ha='left', va='top', transform=ax[i].transAxes, fontsize=18) ax[i].set_xlabel(xlabel, fontsize = 18) #if i in (1, 3): #ax[i].yaxis.set_major_formatter(plt.NullFormatter()) #else: ax[i].set_ylabel(ylabel, fontsize = 18) figVarDiff.savefig('denoisedVarianceSamples_' + file.split('.')[0] + '.png')
class Empiricist(object): """ Worker object that can fit supernova and host galaxy parameters given noisy inputs using an XDGMM model, and then predict new supernovae based on this model and a set of new host galaxies. Parameters ---------- model_file: string (optional) Name of text file containing model being used (default=None). fit_method: string (optional) Name of XD fitting method to use (default='astroML'). Must be either 'astroML' or 'Bovy'. Notes ----- The class can be initialized with a model or one can be loaded or fit to data. """ def __init__(self, model_file=None, fit_method='astroML'): self.XDGMM = XDGMM(n_components=7, method=fit_method) self.fit_method = fit_method if model_file is not None: self.read_model(model_file) def get_SN(self, X, Xerr=None, n_SN=1): """ Conditions the XDGMM model based on the data in X and returns SN parameters sampled from the conditioned model. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. First 3 entries (SN parameters) should be NaN. Xerr: array_like, shape = (n_samples, n_features), optional Error on input data. SN errors should be 0.0. If None, errors are not used for the conditioning. n_SN: int (optional) Number of SNe to sample (default = 1). Returns ------- SN_data: array_like, shape = (n_SN, 3) Sample of SN data taken from the conditioned model. Notes ----- Assumes that the first three parameters used when fitting the model are the SN parameters. """ if self.model_file is None: raise StandardError("Model parameters not set.") if Xerr is None: cond_XDGMM = self.XDGMM.condition(X) else: cond_XDGMM = self.XDGMM.condition(X, Xerr) return np.atleast_2d(cond_XDGMM.sample(n_SN)) def fit_model(self, X, Xerr, filename='empiriciSN_model.fit', n_components=6): """ Fits the XD model to data. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on input data. filename: string (optional) Filename for model fit to be saved to (default = 'empiriciSN_model.fit'). n_components: float (optional) Number of Gaussian components to use (default = 6) Notes ----- The specified method and n_components Gaussian components will be used (typical BIC-optimized numbers of components for ~100s of training datapoints are 6 or 7). The fit will be saved in the file with name defined by the filename variable. """ self.XDGMM.n_components = n_components self.XDGMM = self.XDGMM.fit(X, Xerr) self.XDGMM.save_model(filename) self.model_file = filename return def fit_from_files(self, filelist, filename='empiriciSN_model.fit', n_components=7): """ Fits the XD model to data contained in the files provided. Parameters ---------- filelist: array_like Array of strings containing names of files containing data to fit. filename: string (optional) Filename for model fit (default = 'empiriciSN_model.fit'). n_components: float (optional) Number of Gaussian components to use (default = 7) method: string (optional) XD fitting method to use (default = 'astroML') Notes ----- The model is fitted using the data contained in the files named in the `filelist` variable. This assumes that the data files are in the same format as those provided with this code and that only redshift, distance from host nucleus, host colors, and local host surface brightness are being used for the fit. """ X, Xerr = self.get_data(filelist) self.fit_model(X, Xerr, filename=filename, n_components=n_components) return def read_model(self, filename): """ Reads the parameters of a model from a file. Parameters ---------- filename: string Name of the file to read from. Notes ----- Model parameters are stored in the self.XDGMM model object. The model filename is stored self.model_file. """ self.XDGMM.read_model(filename) self.model_file = filename return def component_test(self, X, Xerr, component_range, no_err=False): """ Test the performance of the model for a range of numbers of Gaussian components. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on input data. component_range: array_like Range of n_components to test. no_err: bool (optional) Flag for whether to calculate the BIC with the errors included or not. (default = False) Returns ------- bics: array_like, shape = (len(param_range),) BIC for each value of n_components optimal_n_comp: float Number of components with lowest BIC score lowest_bic: float Lowest BIC from the scores computed. Notes ----- Uses the XDGMM.bic_test method to compute the BIC score for each n_components in the component_range array. """ bics, optimal_n_comp, lowest_bic = \ self.XDGMM.bic_test(X, Xerr, component_range, no_err) return bics, optimal_n_comp, lowest_bic def get_logR(self, cond_indices, R_index, X, Xerr=None): """ Uses a subset of parameters in the given data to condition the model and return a sample value for log(R/Re). Parameters ---------- cond_indices: array_like Array of indices indicating which parameters to use to condition the model. Cannot contain [0, 1, 2] since these are SN parameters. R_index: int Index of log(R/Re) in the list of parameters that were used to fit the model. X: array_like, shape = (n < n_features,) Input data. Xerr: array_like, shape = (X.shape,) (optional) Error on input data. If none, no error used to condition. Returns ------- logR: float Sample value of log(R/Re) taken from the conditioned model. Notes ----- The fit_params array specifies a list of indices to use to condition the model. The model will be conditioned and then a radius will be drawn from the conditioned model. This is so that the radius can then be used to calculate local surface brightness to fully condition the model to sample likely SN parameters. This does not make assumptions about what parameters are being used in the model, but does assume that the model has been fit already and that the first three parameters in the data that were used to fit the model are the SN parameters. """ if self.model_file is None: raise StandardError("Model parameters not set.") if 0 in cond_indices or 1 in cond_indices or 2 in cond_indices: raise ValueError("Cannot condition model on SN parameters.") if R_index in cond_indices: raise ValueError("Cannot condition model on log(R/Re).") cond_data = np.array([]) if Xerr is not None: cond_err = np.array([]) R_cond_idx = R_index n_features = self.XDGMM.mu.shape[1] j = 0 for i in range(n_features): if i in cond_indices: cond_data = np.append(cond_data, X[j]) if Xerr is not None: cond_err = np.append(cond_err, Xerr[j]) j += 1 if i < R_index: R_cond_idx -= 1 else: cond_data = np.append(cond_data, np.nan) if Xerr is not None: cond_err = np.append(cond_err, 0.0) if Xerr is not None: cond_XDGMM = self.XDGMM.condition(cond_data, cond_err) else: cond_XDGMM = self.XDGMM.condition(cond_data) sample = cond_XDGMM.sample() logR = sample[0][R_cond_idx] return logR def get_local_SB(self, SB_params, R): """ Uses magnitudes, a surface brightness (SB) profile, and a SN location to fit local surface brightnesses at the location of the SN. Parameters ---------- SB_params: array_like, shape = (21,) Array of parameters needed for the SB fit. First entry should be a sersic index of 1 or 4, indicating whether to use an exponential or de Vaucouleurs profile. Following this should be sets of (magnitude, mag_unc, effective radius, rad_unc) data for each of the 5 ugriz filters, giving a total array length of 21. These data are assumed to be known by the user. R: float Separation from host nucleus in units of log(R/Re). It is assumed that the Re used here is the r-band Re, as is output by the get_logR function. Returns ------- SBs: array_list, shape = (5,) Local surface brightness at the location of the SN for each of the 5 ugriz filters. Units = mag/arcsec^2 SB_errs: array_like, shape = (5,) Uncertainties on the local surface brightnesses. """ if SB_params[0] != 1 and SB_params[0] != 4: raise ValueError("Sersic index must be 1 or 4") sep = (10**R) * SB_params[11] # separation in arcsec SBs = np.array([]) SB_errs = np.array([]) for j in range(5): halfmag = SB_params[j * 4 + 1] + 0.75257 magerr = SB_params[j * 4 + 2] Re = SB_params[j * 4 + 3] Re_err = SB_params[j * 4 + 4] r = sep / Re Ie = halfmag + 2.5 * np.log10(np.pi * Re**2) Re2_unc = 2 * Re * Re_err * np.pi log_unc = 2.5 * Re2_unc / (np.log10(np.pi * Re**2) * np.log(10)) Ie_unc = np.sqrt(magerr**2 + log_unc**2) if SB_params[0] == 1: Io = Ie - 1.824 Io_unc = Ie_unc sb = Io * np.exp(-1.68 * (r)) exp_unc = np.exp(-1.68 * (r)) * 1.68 * sep * Re_err / (Re**2) sb_unc = sb * np.sqrt((Io_unc / Io)**2 + (exp_unc / np.exp(-1.68 * (r)))**2) if np.isnan(sb_unc): sb_unc = 0.0 if sb_unc < 0: sb_unc = sb_unc * -1.0 SBs = np.append(SBs, sb) SB_errs = np.append(SB_errs, sb_unc) if SB_params[0] == 4: Io = Ie - 8.328 Io_unc = Ie_unc sb = Io * np.exp(-7.67 * ((r)**0.25)) exp_unc = np.exp(-7.67*((r)**0.25))*7.67*sep \ *Re_err/(4*Re**(1.25)) sb_unc = sb*np.sqrt((Io_unc/Io)**2+(exp_unc \ /np.exp(-7.67*((r)**0.25)))) if np.isnan(sb_unc): sb_unc = 0.0 if sb_unc < 0: sb_unc = sb_unc * -1.0 SBs = np.append(SBs, sb) SB_errs = np.append(SB_errs, sb_unc) return SBs, SB_errs def set_fit_method(self, fit_method): """ Sets the XD fitting method to use. Parameters ---------- fit_method: string Name of fitting method to use. Must be either 'astroML' or 'Bovy'. Notes ----- Changes the fitting method of self.XDGMM to the one specified in `fit_method`. """ if fit_method == 'astroML': n_iter = 100 elif fit_method == 'Bovy': n_iter = 10**9 else: raise ValueError("Method must be either 'astroML' or 'Bovy'") self.XDGMM.method = fit_method self.XDGMM.n_iter = n_iter self.fit_method = fit_method return def get_data(self, filelist): """ Parses SN and host data from a list of data files. Parameters ---------- filelist: array_like Array of strings containing names of files containing data to fit. Returns ------- X: array_like, shape = (n_samples, n_features) Output data. Contains SALT2 SN parameters, host redshift, log(R/Re), host colors, and host brightnesses at the locations of the SN in each filter. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on output data. Notes ----- Reads in each data file and returns an array of data and a matrix of errors, which can be used to fit the XDGMM model. Currently reads the SALT2 SN parameters, host redshift, log(R/Re), host magnitudes, and host surface brightnesses at the location of the SN. This method needs further modularizing, to enable the worker to calculate host surface brightnesses separately (in a static method). """ x0 = np.array([]) x0_err = np.array([]) x1 = np.array([]) x1_err = np.array([]) c = np.array([]) c_err = np.array([]) z = np.array([]) z_err = np.array([]) logr = np.array([]) logr_err = np.array([]) umag = np.array([]) umag_err = np.array([]) gmag = np.array([]) gmag_err = np.array([]) rmag = np.array([]) rmag_err = np.array([]) imag = np.array([]) imag_err = np.array([]) zmag = np.array([]) zmag_err = np.array([]) SB_u = np.array([]) SB_u_err = np.array([]) SB_g = np.array([]) SB_g_err = np.array([]) SB_r = np.array([]) SB_r_err = np.array([]) SB_i = np.array([]) SB_i_err = np.array([]) SB_z = np.array([]) SB_z_err = np.array([]) for filename in filelist: infile = open(filename, 'r') inlines = infile.readlines() infile.close() for line1 in inlines: if line1[0] == '#': continue line = line1.split(',') if line[33]=='nan' or line[39]=='nan' or line[45]=='nan'\ or line[51]=='nan' or line[57]=='nan': continue # SN params x0 = np.append(x0, float(line[7])) #x0 x0_err = np.append(x0_err, float(line[8])) x1 = np.append(x1, float(line[9])) # x1 x1_err = np.append(x1_err, float(line[10])) c = np.append(c, float(line[11])) # c c_err = np.append(c_err, float(line[12])) # Host params z = np.append(z, float(line[4])) z_err = np.append(z_err, 0.0) logr = np.append(logr, np.log10(float(line[15]) / float(line[42]))) # r logr_err = np.append( logr_err, float(line[43]) / (float(line[42]) * np.log(10))) umag = np.append(umag, float(line[18])) # u_mag umag_err = np.append(umag_err, float(line[19])) gmag = np.append(gmag, float(line[20])) # g_mag gmag_err = np.append(gmag_err, float(line[21])) rmag = np.append(rmag, float(line[22])) # r_mag rmag_err = np.append(rmag_err, float(line[23])) imag = np.append(imag, float(line[24])) # i_mag imag_err = np.append(imag_err, float(line[25])) zmag = np.append(zmag, float(line[26])) # z_mag zmag_err = np.append(zmag_err, float(line[27])) SB_u = np.append(SB_u, float(line[32])) # SB_u SB_u_err = np.append(SB_u_err, float(line[33])) SB_g = np.append(SB_g, float(line[38])) # SB_g SB_g_err = np.append(SB_g_err, float(line[39])) SB_r = np.append(SB_r, float(line[44])) # SB_r SB_r_err = np.append(SB_r_err, float(line[45])) SB_i = np.append(SB_i, float(line[50])) # SB_i SB_i_err = np.append(SB_i_err, float(line[52])) SB_z = np.append(SB_z, float(line[56])) # SB_z SB_z_err = np.append(SB_z_err, float(line[57])) ug = umag - gmag ug_err = np.sqrt(umag_err**2 + gmag_err**2) ur = umag - rmag ur_err = np.sqrt(umag_err**2 + rmag_err**2) ui = umag - imag ui_err = np.sqrt(umag_err**2 + imag_err**2) uz = umag - zmag uz_err = np.sqrt(umag_err**2 + zmag_err**2) gr = gmag - rmag gr_err = np.sqrt(gmag_err**2 + rmag_err**2) gi = gmag - imag gi_err = np.sqrt(gmag_err**2 + imag_err**2) gz = gmag - zmag gz_err = np.sqrt(gmag_err**2 + zmag_err**2) ri = rmag - imag ri_err = np.sqrt(rmag_err**2 + imag_err**2) rz = rmag - zmag rz_err = np.sqrt(rmag_err**2 + zmag_err**2) iz = imag - zmag iz_err = np.sqrt(imag_err**2 + zmag_err**2) X = np.vstack([ x0, x1, c, z, logr, ug, ur, ui, uz, gr, gi, gz, ri, rz, iz, SB_u, SB_g, SB_r, SB_i, SB_z ]).T Xerr = np.zeros(X.shape + X.shape[-1:]) diag = np.arange(X.shape[-1]) Xerr[:, diag, diag] = np.vstack([ x0_err**2, x1_err**2, c_err**2, z_err**2, logr_err**2, ug_err**2, ur_err**2, ui_err**2, uz_err**2, gr_err**2, gi_err**2, gz_err**2, ri_err**2, rz_err**2, iz_err**2, SB_u_err**2, SB_g_err**2, SB_r_err**2, SB_i_err**2, SB_z_err**2 ]).T return X, Xerr
def dataViz(survey='2MASS', ngauss=128, quantile=0.05, dataFilename='All.npz', iter='10th', Nsamples=3e5, contourColor='k', dustFile='dust.npz', sdss5=False, whatsThatFeature=False): if survey == 'APASS': mag1 = 'B' mag2 = 'V' absmag = 'G' xlabel='B-V' ylabel = r'M$_\mathrm{G}$' xlim = [-0.2, 2] ylim = [9, -2] if survey == '2MASS': mag1 = 'J' mag2 = 'K' absmag = 'J' xlabel = r'$(J-K)^C$' ylabel = r'$M_J^C$' xlim = [-0.25, 1.25] ylim = [6, -6] xdgmmFilename = 'xdgmm.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename + '.fit' tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() dustEBV = 0.0 color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax']) color_err = np.sqrt(bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) absMagKinda_err = tgas['parallax_error']*10.**(0.2*bandDictionary[absmag]['array'][bandDictionary[absmag]['key']]) xdgmm = XDGMM(filename=xdgmmFilename) sample = xdgmm.sample(Nsamples) negParallax = sample[:,1] < 0 nNegP = np.sum(negParallax) while nNegP > 0: sampleNew = xdgmm.sample(nNegP) sample[negParallax] = sampleNew negParallax = sample[:,1] < 0 nNegP = np.sum(negParallax) positive = absMagKinda > 0 y = absMagKinda[positive] yplus = y + absMagKinda_err[positive] yminus = y - absMagKinda_err[positive] parallaxErrGoesNegative = yminus < 0 absMagYMinus = testXD.absMagKinda2absMag(yminus) absMagYMinus[parallaxErrGoesNegative] = -50. yerr_minus = testXD.absMagKinda2absMag(y) - absMagYMinus yerr_plus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y) #yerr_minus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y) #yerr_plus = testXD.absMagKinda2absMag(y) - absMagYMinus """ testfig, testax = plt.subplots(3) testax[0].scatter(testXD.absMagKinda2absMag(y), y, s=1) testax[0].set_xlabel('absMag') testax[0].set_ylabel('absMagKinda') testax[1].scatter(testXD.absMagKinda2absMag(y), absMagYMinus, s=1) testax[1].set_xlabel('absMag') testax[1].set_ylabel('absMag Minus') testax[2].scatter(testXD.absMagKinda2absMag(y), testXD.absMagKinda2absMag(yplus), s=1) testax[2].set_xlabel('absMag') testax[2].set_ylabel('absMag Plus') plt.show() """ dp.plot_sample(color[positive], testXD.absMagKinda2absMag(y), sample[:,0], testXD.absMagKinda2absMag(sample[:,1]), xdgmm, xerr=color_err[positive], yerr=[yerr_minus, yerr_plus], xlabel=xlabel, ylabel=ylabel, xlim=xlim, ylim=ylim, errSubsample=2.4e3, thresholdScatter=2., binsScatter=200, contourColor=contourColor) dataFile = 'data_noDust.pdf' priorFile = 'prior_' + str(ngauss) +'gauss.pdf' os.rename('plot_sample.data.pdf', dataFile) os.rename('plot_sample.prior.pdf', priorFile) #import pdb; pdb.set_trace() data = np.load(dustFile) dustEBV = data['ebv'] color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) absMagKinda, apparentMagnitude = testXD.absMagKindaArray(absmag, dustEBV, bandDictionary, tgas['parallax']) cNorm = plt.matplotlib.colors.Normalize(vmin=-6, vmax=2) posteriorFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str(quantile) + '.' + iter + '.' + survey + '.' + dataFilename for file in [posteriorFile]:#, 'posteriorSimple.npz']: data = np.load(file) parallax = data['mean'] parallax_err = np.sqrt(data['var']) notnans = ~np.isnan(parallax) & ~np.isnan(parallax_err) parallax = parallax[notnans] parallax_err = parallax_err[notnans] apparentMagnitudeGood = apparentMagnitude[notnans] c = np.log(data['var']) - np.log(tgas['parallax_error']**2.) absMagKinda = parallax*10.**(0.2*apparentMagnitudeGood) absMagKinda_err = parallax_err*10.**(0.2*apparentMagnitudeGood) y = absMagKinda yplus = y + absMagKinda_err yminus = y - absMagKinda_err parallaxErrGoesNegative = yminus < 0 absMagYMinus = testXD.absMagKinda2absMag(yminus) absMagYMinus[parallaxErrGoesNegative] = -50. absMag = testXD.absMagKinda2absMag(y) yerr_minus = absMag - absMagYMinus yerr_plus = testXD.absMagKinda2absMag(yplus) - absMag #notnan = ~np.isnan(color[notnans]) & ~np.isnan(absMag) contourColor = 'k' ascii.write([color[notnans], absMag, color_err[notnans], yerr_minus, yerr_plus, c[notnans]], 'cmdExpectation.txt', names=['color', 'absMag', 'color_err', 'absMag_errMinus', 'absMag_errPlus', 'logDeltaVar']) if whatsThatFeature & (file == posteriorFile): figFeature, axFeature = plt.subplots() x = color[notnans] y = absMag #levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0) ** 2) im = corner.hist2d(x, y, ax=axFeature, levels=None, bins=200, no_fill_contours=True, plot_density=False, color=contourColor, rasterized=True, plot_contours=False) axFeature.set_xlim(xlim) axFeature.set_ylim(ylim) axFeature.set_xlabel(xlabel) axFeature.set_ylabel(ylabel) lowerMainSequence = (0.45, 5.5) upperMainSequence = (-0.225, 2) binarySequence = (0.75, 4) redClump = (0.35, -2) redGiantBranch = (1.0, -2) turnOff = (0.0, 3.5) features = [lowerMainSequence, upperMainSequence, binarySequence, redClump, redGiantBranch, turnOff] labels = ['lower MS', 'upper MS', 'binary sequence', 'red clump', 'RGB', 'MS turn off'] for l, f in zip(labels, features): axFeature.text(f[0], f[1], l, fontsize=15) figFeature.savefig('whatsThatFeature.pdf', format='pdf')
def dataViz(survey='2MASS', ngauss=128, quantile=0.05, dataFilename='All.npz', iter='10th', Nsamples=3e5, contourColor='k', dustFile='dust.npz', sdss5=False): if survey == 'APASS': mag1 = 'B' mag2 = 'V' absmag = 'G' xlabel = 'B-V' ylabel = r'M$_\mathrm{G}$' xlim = [-0.2, 2] ylim = [9, -2] if survey == '2MASS': mag1 = 'J' mag2 = 'K' absmag = 'J' xlabel = r'$(J-K)^C$' ylabel = r'$M_J^C$' xlim = [-0.25, 1.25] ylim = [6, -6] xdgmmFilename = 'xdgmm.' + str(ngauss) + 'gauss.dQ' + str( quantile) + '.' + iter + '.' + survey + '.' + dataFilename + '.fit' tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() dustEBV = 0.0 color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) absMagKinda, apparentMagnitude = testXD.absMagKindaArray( absmag, dustEBV, bandDictionary, tgas['parallax']) color_err = np.sqrt( bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) absMagKinda_err = tgas['parallax_error'] * 10.**( 0.2 * bandDictionary[absmag]['array'][bandDictionary[absmag]['key']]) xdgmm = XDGMM(filename=xdgmmFilename) sample = xdgmm.sample(Nsamples) negParallax = sample[:, 1] < 0 nNegP = np.sum(negParallax) while nNegP > 0: sampleNew = xdgmm.sample(nNegP) sample[negParallax] = sampleNew negParallax = sample[:, 1] < 0 nNegP = np.sum(negParallax) positive = absMagKinda > 0 y = absMagKinda[positive] yplus = y + absMagKinda_err[positive] yminus = y - absMagKinda_err[positive] parallaxErrGoesNegative = yminus < 0 absMagYMinus = testXD.absMagKinda2absMag(yminus) absMagYMinus[parallaxErrGoesNegative] = -50. yerr_minus = testXD.absMagKinda2absMag(y) - absMagYMinus yerr_plus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y) #yerr_minus = testXD.absMagKinda2absMag(yplus) - testXD.absMagKinda2absMag(y) #yerr_plus = testXD.absMagKinda2absMag(y) - absMagYMinus """ testfig, testax = plt.subplots(3) testax[0].scatter(testXD.absMagKinda2absMag(y), y, s=1) testax[0].set_xlabel('absMag') testax[0].set_ylabel('absMagKinda') testax[1].scatter(testXD.absMagKinda2absMag(y), absMagYMinus, s=1) testax[1].set_xlabel('absMag') testax[1].set_ylabel('absMag Minus') testax[2].scatter(testXD.absMagKinda2absMag(y), testXD.absMagKinda2absMag(yplus), s=1) testax[2].set_xlabel('absMag') testax[2].set_ylabel('absMag Plus') plt.show() """ dp.plot_sample(color[positive], testXD.absMagKinda2absMag(y), sample[:, 0], testXD.absMagKinda2absMag(sample[:, 1]), xdgmm, xerr=color_err[positive], yerr=[yerr_minus, yerr_plus], xlabel=xlabel, ylabel=ylabel, xlim=xlim, ylim=ylim, errSubsample=2.4e3, thresholdScatter=2., binsScatter=200, contourColor=contourColor) dataFile = 'data_noDust.pdf' priorFile = 'prior_' + str(ngauss) + 'gauss.pdf' os.rename('plot_sample.data.pdf', dataFile) os.rename('plot_sample.prior.pdf', priorFile) #import pdb; pdb.set_trace() data = np.load(dustFile) dustEBV = data['ebv'] color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) absMagKinda, apparentMagnitude = testXD.absMagKindaArray( absmag, dustEBV, bandDictionary, tgas['parallax']) cNorm = plt.matplotlib.colors.Normalize(vmin=-6, vmax=2) posteriorFile = 'posteriorParallax.' + str(ngauss) + 'gauss.dQ' + str( quantile) + '.' + iter + '.' + survey + '.' + dataFilename for file in [posteriorFile, 'posteriorSimple.npz']: data = np.load(file) parallax = data['mean'] parallax_err = np.sqrt(data['var']) notnans = ~np.isnan(parallax) & ~np.isnan(parallax_err) parallax = parallax[notnans] parallax_err = parallax_err[notnans] apparentMagnitudeGood = apparentMagnitude[notnans] c = np.log(data['var']) - np.log(tgas['parallax_error']**2.) absMagKinda = parallax * 10.**(0.2 * apparentMagnitudeGood) absMagKinda_err = parallax_err * 10.**(0.2 * apparentMagnitudeGood) y = absMagKinda yplus = y + absMagKinda_err yminus = y - absMagKinda_err parallaxErrGoesNegative = yminus < 0 absMagYMinus = testXD.absMagKinda2absMag(yminus) absMagYMinus[parallaxErrGoesNegative] = -50. absMag = testXD.absMagKinda2absMag(y) yerr_minus = absMag - absMagYMinus yerr_plus = testXD.absMagKinda2absMag(yplus) - absMag #notnan = ~np.isnan(color[notnans]) & ~np.isnan(absMag) contourColor = 'k' ascii.write([ color[notnans], absMag, color_err[notnans], yerr_minus, yerr_plus, c[notnans] ], 'cmdExpectation.txt', names=[ 'color', 'absMag', 'color_err', 'absMag_errMinus', 'absMag_errPlus', 'logDeltaVar' ]) dp.plot_sample(color[notnans], absMag, sample[:, 0], testXD.absMagKinda2absMag(sample[:, 1]), xdgmm, xerr=color_err[notnans], yerr=[yerr_minus, yerr_plus], xlabel=xlabel, ylabel=ylabel, xlim=xlim, ylim=ylim, errSubsample=1.2e3, thresholdScatter=2., binsScatter=200, c=c, norm=cNorm, cmap='Blues', contourColor=contourColor, posterior=True, sdss5=sdss5, rasterized=False) dataFile = 'inferredDistances_data_' + file.split('.')[0] + '.pdf' priorFile = 'prior_' + str(ngauss) + 'gauss.pdf' os.rename('plot_sample.data.pdf', dataFile) os.rename('plot_sample.prior.pdf', priorFile)
class XDGMMTestCase(unittest.TestCase): "TestCase class for XDGMM class." def setUp(self): """ Set up each test with a new XDGMM object and some data. """ self.xdgmm = XDGMM(n_components=3) self.files = [] """ Use scikit-learn GaussianMixture for sampling some data points """ self.gmm = skl_GMM(n_components=3, max_iter=10, covariance_type='full', random_state=None) self.gmm.weights_ = np.array([0.3, 0.5, 0.2]) self.gmm.means_ = np.array( [np.array([0, 1]), np.array([5, 4]), np.array([2, 4])]) self.gmm.covariances_ = np.array([ np.diag((2, 1)), np.array([[1, 0.2], [0.2, 1]]), np.diag((0.3, 0.5)) ]) self.gmm.precisions_ = np.linalg.inv(self.gmm.covariances_) self.gmm.precisions_cholesky_ = np.linalg.cholesky( self.gmm.precisions_) self.X = self.gmm.sample(1000)[0] errs = 0.2 * np.random.random_sample((1000, 2)) self.Xerr = np.zeros(self.X.shape + self.X.shape[-1:]) diag = np.arange(self.X.shape[-1]) self.Xerr[:, diag, diag] = np.vstack([errs[:, 0]**2, errs[:, 1]**2]).T def tearDown(self): """ Clean up files saved by tests """ for fname in self.files: os.remove('test.fit') def test_Fit(self): this_mu = self.xdgmm.mu this_V = self.xdgmm.V this_weights = self.xdgmm.weights self.xdgmm.fit(self.X, self.Xerr) self.assertIsNotNone(self.xdgmm.mu) self.assertIsNotNone(self.xdgmm.V) self.assertIsNotNone(self.xdgmm.weights) def test_Sample(self): self.xdgmm.fit(self.X, self.Xerr) sam = self.xdgmm.sample(1000) self.assertEqual(sam.shape, (1000, 2)) def test_Score(self): self.xdgmm.fit(self.X, self.Xerr) data = np.array([np.array([0, 2]), np.array([4, 4])]) err = np.array([np.diag((0.2, 0.1)), np.diag((0.15, 0.15))]) self.assertNotEqual(self.xdgmm.score(data, err), 0) def test_ReadWrite(self): self.xdgmm.fit(self.X, self.Xerr) self.xdgmm.save_model('test.fit') xd2 = XDGMM(filename='test.fit') self.assertLess(self.xdgmm.mu[0, 0] - xd2.mu[0, 0], 1e-5) self.assertLess(self.xdgmm.V[0, 0, 0] - xd2.V[0, 0, 0], 1e-5) self.assertLess(self.xdgmm.weights[0] - xd2.weights[0], 1e-5) self.files.append('test.fit') def test_Condition(self): self.xdgmm.fit(self.X, self.Xerr) cond_xd = self.xdgmm.condition(X_input=np.array([np.nan, 3.5])) self.assertEqual(cond_xd.mu.shape, (3, 1)) self.assertEqual(cond_xd.V.shape, (3, 1, 1))
def main(): # for label, style in zip(['paper', 'talk'],['seaborn-paper', 'seaborn-talk']): pdf = True plot_data = True plot_dust = False plot_prior = False plot_m67 = True plot_compare = False plot_expectation = True plot_odd_examples = False plot_examples = False plot_delta = False plot_deltacdf = False plot_nobias = False plot_wtf = False plot_toy = False #figsize2x1 = (12, 5.5) #figsize2x2 = (12, 11) #figsize3x2 = (18, 11) style = 'seaborn-paper' #plt.style.use(style) #fontsize = 12 #annotateTextSize = 12 #legendTextSize = 12 params = { 'axes.labelsize': 9, 'font.size': 9, 'legend.fontsize': 9, 'xtick.labelsize': 9, 'ytick.labelsize': 9, 'text.usetex': False, 'figure.figsize': [4.5, 4.5] } mpl.rcParams.update(params) #mpl.rcParams['xtick.labelsize'] = fontsize #mpl.rcParams['ytick.labelsize'] = fontsize #mpl.rcParams['axes.labelsize'] = fontsize #mpl.rcParams['font.size'] = fontsize nsubsamples = 1024 np.random.seed(0) trueColor = '#FF8D28' priorColor = '#7bccc4' #'#6baed6' #'#9ebcda' #'#9ecae1' #'royalblue' priorColor = '#6FB8B0' cmap_prior = 'Blues' posteriorColor = '#0977C4' #'#0868ac' #'#984ea3' #'#7a0177' #'#8856a7' #'#810f7c' #'#08519c' #'darkblue' dataColor = 'black' posteriorMapColor = 'Blues' annotationColor = '#FF2412' color1 = np.array((240, 249, 232)) / 255. color1 = np.array((255, 255, 255)) / 255. #color2 = np.array((123,204,196))/255. color2 = np.array((112, 186, 179)) / 255. colors = [color1, color2] cm = LinearSegmentedColormap.from_list('my_color', colors, N=100) cmap_prior = LinearSegmentedColormap.from_list('my_color', colors, N=100) color2 = np.array((6, 82, 135)) / 255. colors = [color1, color2] cmap_posterior = LinearSegmentedColormap.from_list('my_color', colors, N=100) mag1 = 'J' mag2 = 'K' absmag = 'J' xlabel_cmd = r'$(J-K_s)^C$' ylabel_cmd = r'$M_J^C$' xlim_cmd = [-0.25, 1.25] ylim_cmd = [6, -6] dustFile = 'dustCorrection.128gauss.dQ0.05.10th.2MASS.All.npz' xdgmmFile = 'xdgmm.128gauss.dQ0.05.10th.2MASS.All.npz.fit' posteriorFile = 'posteriorParallax.128gauss.dQ0.05.10th.2MASS.All.npz' xdgmm = XDGMM(filename=xdgmmFile) #generate toy model plot mtrue = -1.37 btrue = 0.2 ttrue = 0.8 nexamples = 5 if plot_toy: fig, ax = makeFigureInstance(x=2, y=2, wspace=0.75) toy.makeplots(mtrue=mtrue, btrue=btrue, ttrue=ttrue, nexamples=nexamples, trueColor=trueColor, priorColor=priorColor, posteriorColor=posteriorColor, dataColor=dataColor, posteriorMapColor=posteriorMapColor, fig=fig, axes=ax) os.rename('toy.paper.pdf', 'paper/toy.pdf') #---------------------------------------------- #generate raw data plot tgas, twoMass, Apass, bandDictionary, indices = testXD.dataArrays() posterior = np.load(posteriorFile) mean = posterior['mean'] sigma = np.sqrt(posterior['var']) positive = (tgas['parallax'] > 0.) & (mean > 0.) ind = np.random.randint(0, len(tgas[positive]), nsubsamples) dustEBV = 0.0 absMagKinda, apparentMagnitude = testXD.absMagKindaArray( absmag, dustEBV, bandDictionary, tgas['parallax']) absMagKinda_err = tgas['parallax_error'] * 10.**(0.2 * apparentMagnitude) color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary)[positive] color_err = np.sqrt( bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2. )[positive] absMag = testXD.absMagKinda2absMag( tgas['parallax'][positive] * 10.**(0.2 * apparentMagnitude[positive])) absMag_err = absMagError(tgas['parallax'][positive], tgas['parallax_error'][positive], apparentMagnitude[positive], absMag) titles = ["Observed Distribution", "Obs+Noise Distribution"] if plot_data: plot_samples( color, absMag, color_err, absMag_err, ind, contourColor='grey', rasterized=True, plot_contours=True, dataColor=dataColor, titles=titles, xlim=xlim_cmd, ylim=ylim_cmd, xlabel=xlabel_cmd, ylabel=ylabel_cmd, pdf=pdf ) #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1) if pdf: os.rename('plot_sample.pdf', 'paper/data.pdf') os.rename('plot_sample.png', 'data.png') #color_raw = color #color_err_raw = color_err #absMag_raw = absMag #absMag_err_raw = absMag_err #absMagKinda_raw = absMagKinda #absMagKinda_err_raw = absMagKinda_err #------------------------------------------------------- #dust plot if plot_dust: fig, ax = makeFigureInstance(figureSize=(6, 3), left=0.75) comparePrior.dustViz(ngauss=128, quantile=0.05, iter='10th', survey='2MASS', dataFilename='All.npz', ax=ax, tgas=tgas) fig.savefig('paper/dust.pdf', dpi=400) fig.savefig('dust.png') plt.close(fig) #------------------------------------------------------- #generate prior plot if plot_prior: samplex, sampley = sampleXDGMM(xdgmm, len(tgas)) titles = [ "Extreme Deconvolution\n resampling", "Extreme Deconvolution\n cluster locations" ] plot_samples( samplex, sampley, None, None, ind, contourColor='black', rasterized=True, plot_contours=True, dataColor=priorColor, titles=titles, xlim=xlim_cmd, ylim=ylim_cmd, xlabel=xlabel_cmd, ylabel=ylabel_cmd, prior=True, xdgmm=xdgmm, pdf=pdf ) #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1) if pdf: os.rename('plot_sample.pdf', 'paper/prior.pdf') os.rename('plot_sample.png', 'prior.png') #------------------------------------------------------- data = np.load(dustFile) dustEBV = data['ebv'] absMagKinda, apparentMagnitude = testXD.absMagKindaArray( absmag, dustEBV, bandDictionary, tgas['parallax']) color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) color_err = np.sqrt( bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) absMagKinda_err = tgas['parallax_error'] * 10.**(0.2 * apparentMagnitude) #------------------------------------------------------- #M67 plot if plot_m67: fig, ax = makeFigureInstance(x=2, y=2, hspace=1.0, wspace=1.0, figureSize=(2.5, 2.5)) #setup_text_plots(fontsize=fontsize, usetex=True) #fig, ax = plt.subplots(2,2, figsize=figsize2x2) #fig.subplots_adjust(left=0.1, right=0.95, # bottom=0.1, top=0.95, # wspace=0.25, hspace=0.25) #ax = ax.flatten() nPosteriorPoints = 1000 print(dataColor) #def distanceTest(tgas, xdgmm, nPosteriorPoints, data1, data2, err1, err2, xlim, ylim, plot2DPost=False, dataColor='black', priorColor='green', truthColor='red', posteriorColor='blue', dl=0.1, db=0.1): testXD.distanceTest(tgas, xdgmm, nPosteriorPoints, color, absMagKinda, color_err, absMagKinda_err, xlim_cmd, ylim_cmd, bandDictionary, absmag, dataColor=dataColor, priorColor=priorColor, truthColor=trueColor, posteriorColor=posteriorColor, figDist=fig, axDist=ax, xlabel=xlabel_cmd, ylabel=ylabel_cmd, dl=0.075, db=0.075) plt.tight_layout() if pdf: fig.savefig('paper/m67.pdf', dpi=400) fig.savefig('m67.png') plt.close(fig) #------------------------------------------------------- color = color[positive] color_err = color_err[positive] apparentMagnitude = apparentMagnitude[positive] absMagKinda_dust = absMagKinda[positive] absMagKinda_dust_err = absMagKinda_err[positive] absMag_dust = testXD.absMagKinda2absMag(absMagKinda[positive]) absMag_dust_err = absMagError(tgas['parallax'][positive], tgas['parallax_error'][positive], apparentMagnitude, absMag_dust) #generate comparison prior plot if plot_compare: #setup_text_plots(fontsize=fontsize, usetex=True) plt.clf() alpha = 0.1 alpha_points = 0.01 fig, ax = makeFigureInstance(x=2, y=1) #, figsize=figsize2x1) #fig = plt.figure(figsize=figsize2x1) #fig.subplots_adjust(left=0.1, right=0.95, # bottom=0.15, top=0.95, # wspace=0.1, hspace=0.1) #ax1 = fig.add_subplot(121) #ax2 = fig.add_subplot(122) #ax = [ax1, ax2] titles = ['Exp Dec Sp \nDen Prior', 'CMD Prior'] for i, file in enumerate(['posteriorSimple.npz', posteriorFile]): data = np.load(file) posterior = data['posterior'] sigma = np.sqrt(data['var']) mean = data['mean'] absMag = testXD.absMagKinda2absMag(mean[positive] * 10.**(0.2 * apparentMagnitude)) absMag_err = absMagError(mean[positive], sigma[positive], apparentMagnitude, absMag) if plot_compare: #ax[i].scatter(color[ind], absMag[ind], c=posteriorColor, s=1, lw=0, alpha=alpha, zorder=0) ax[i].errorbar(color[ind], absMag[ind], xerr=color_err[ind], yerr=[absMag_err[0][ind], absMag_err[1][ind]], fmt="none", zorder=0, mew=0, ecolor=posteriorColor, alpha=0.5, elinewidth=0.5, color=posteriorColor) ax[i].set_xlim(xlim_cmd) ax[i].set_ylim(ylim_cmd[0], ylim_cmd[1] * 1.1) ax[i].text( 0.05, 0.95, titles[i], ha='left', va='top', transform=ax[i].transAxes) #, fontsize=annotateTextSize) ax[i].set_xlabel(xlabel_cmd) if i in [1]: ax[i].yaxis.set_major_formatter(plt.NullFormatter()) else: ax[i].set_ylabel(ylabel_cmd) if plot_compare: if pdf: fig.savefig('paper/comparePrior.pdf', dpi=400) fig.savefig('comparePrior.png') plt.close(fig) #------------------------------------------------------- #generate expectation plot absMag = testXD.absMagKinda2absMag(mean[positive] * 10.**(0.2 * apparentMagnitude)) absMag_err = absMagError(mean[positive], sigma[positive], apparentMagnitude, absMag) titles = ["De-noised Expectation \nValues", "Posterior Distributions"] if plot_expectation: plot_samples( color, absMag, color_err, absMag_err, ind, contourColor='black', rasterized=True, plot_contours=True, dataColor=posteriorColor, titles=titles, xlim=xlim_cmd, ylim=ylim_cmd, xlabel=xlabel_cmd, ylabel=ylabel_cmd, pdf=pdf ) #, annotateTextSize=annotateTextSize, figsize2x1=figsize2x1) if pdf: os.rename('plot_sample.pdf', 'paper/posteriorCMD.pdf') os.rename('plot_sample.png', 'posteriorCMD.png') #------------------------------------------------------- #posterior example plot if plot_examples: colorBins = [0.0, 0.2, 0.4, 0.7, 1.0] digit = np.digitize(color, colorBins) ndim = 2 nPosteriorPoints = 1000 #number of elements in the posterior array projectedDimension = 1 #which dimension to project the prior onto xparallaxMAS = np.linspace(0, 10, nPosteriorPoints) #plot likelihood and posterior in each axes for iteration in np.arange(20, 40): fig, ax = makeFigureInstance( x=3, y=2, hspace=0.75, figureSize=(2, 2)) #, figsize=figsize3x2) #fig, ax = plt.subplots(2, 3, figsize=figsize3x2) #ax = ax.flatten() #fig.subplots_adjust(left=0.1, right=0.9, # bottom=0.1, top=0.8, # wspace=0.4, hspace=0.5) plotPrior(xdgmm, ax[0], c=priorColor, lw=1) ax[0].set_xlim(xlim_cmd) ax[0].set_ylim(ylim_cmd) ax[0].set_xlabel(xlabel_cmd) ax[0].set_ylabel(ylabel_cmd) for i in range(np.max(digit)): currentInd = np.where((digit == i))[0] index = currentInd[np.random.randint(0, high=len(currentInd))] ax[0].scatter(color[index], absMag_dust[index], c=dataColor, s=20) ax[0].errorbar(color[index], absMag_dust[index], xerr=[[color_err[index], color_err[index]]], yerr=[[ absMag_dust_err[0][index], absMag_dust_err[1][index] ]], fmt="none", zorder=0, lw=2.0, mew=0, alpha=1.0, color=dataColor, ecolor=dataColor) ax[0].annotate(str(i + 1), (color[index] + 0.075, absMag_dust[index] + 0.175)) #, fontsize=annotateTextSize) #print len(color), len(absMagKinda_dust), len(color_err), len(absMagKinda_dust_err), len(apparentMagnitude) likeParallax, priorParallax, posteriorParallax, posteriorColor = likePriorPost( color[index], absMagKinda_dust[index], color_err[index], absMagKinda_dust_err[index], apparentMagnitude[index], xdgmm, xparallaxMAS, ndim=2, nPosteriorPoints=1000, projectedDimension=1) l1, = ax[i + 1].plot(xparallaxMAS, likeParallax * np.max(posteriorParallax) / np.max(likeParallax), lw=1, color=dataColor, zorder=100) l2, = ax[i + 1].plot(xparallaxMAS, priorParallax * np.max(posteriorParallax) / np.max(priorParallax), lw=0.5, color=priorColor) l3, = ax[i + 1].plot(xparallaxMAS, posteriorParallax, lw=2, color=posteriorColor) maxInd = posteriorParallax == np.max(posteriorParallax) maxPar = xparallaxMAS[maxInd] maxY = posteriorParallax[maxInd] if maxPar < 5: annX = 9 else: annX = 0 if i == 1: annY = 0.75 * maxY else: annY = maxY / 1.1 ax[i + 1].text(annX, annY, str(i + 1)) ax[i + 1].set_xlabel(r'$\varpi$ [mas]') ax[i + 1].tick_params( axis='y', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off if i + 1 == 1: leg = fig.legend( (l1, l2, l3), ('likelihood', 'prior', 'posterior'), 'upper right') #, fontsize=legendTextSize) leg.get_frame().set_alpha(1.0) #plt.tight_layout() if pdf: fig.savefig('posterior_' + str(iteration) + '.pdf', dpi=400) fig.savefig('paper/posterior.pdf', dpi=400) fig.tight_layout() fig.savefig('posterior.png') plt.close(fig) #------------------------------------------------------- #odd posterior example plot if plot_odd_examples: #choose indices for odd plot_examples #odd colors and magnitudes #come back and do parallax negative SN = tgas['parallax'][positive] / tgas['parallax_error'][positive] oddIndicesWD_LowSN = np.where( np.logical_and((absMag_dust > 6. * color + 5.), (SN <= 5)))[0] oddIndicesWD_HighSN = np.where( np.logical_and((absMag_dust > 6. * color + 5.), (SN > 5)))[0] #3.6)[0] oddIndicesSSG = np.where( np.logical_and((absMag_dust < 7.5 * color - 1.5), (absMag_dust > -8.1 * color + 7.8)))[0] oddIndicesPN_LowSN = np.where( np.logical_and( SN <= 5, np.logical_and((absMag_dust < 7.5 * color - 4.25), (absMag_dust < -4.75 * color - 0.6))))[0] oddIndicesPN_HighSN = np.where( np.logical_and( SN > 5, np.logical_and((absMag_dust < 7.5 * color - 4.25), (absMag_dust < -4.75 * color - 0.6))))[0] ndim = 2 nPosteriorPoints = 1000 #number of elements in the posterior array projectedDimension = 1 #which dimension to project the prior onto xparallaxMAS = np.linspace(0, 10, nPosteriorPoints) xarray = np.logspace(-2, 2, 1000) xColor = np.linspace(-2, 4, nPosteriorPoints) samplex, sampley = sampleXDGMM(xdgmm, len(tgas) * 10) #plot likelihood and posterior in each axes for iteration in np.arange(0, 10): fig, ax = makeFigureInstance( x=3, y=2, hspace=0.75, figureSize=(2, 2)) #, figsize=figsize3x2) #fig, ax = plt.subplots(2, 3, figsize=figsize3x2) #ax = ax.flatten() #fig.subplots_adjust(left=0.1, right=0.9, # bottom=0.1, top=0.8, # wspace=0.4, hspace=0.5) ax[0].hist2d(samplex, sampley, bins=500, norm=mpl.colors.LogNorm(), cmap=plt.get_cmap(cmap_prior), zorder=-1) #plotPrior(xdgmm, ax[0], c=priorColor, lw=1, stretch=True) ax[0].set_ylim(15, -10) ax[0].set_xlim(-1.2, 2) ax[0].set_ylim(ylim_cmd[0] + 3, ylim_cmd[1] - 3) ax[0].set_xlabel(xlabel_cmd) ax[0].set_ylabel(ylabel_cmd) for i, indices in enumerate([ oddIndicesWD_LowSN, oddIndicesWD_HighSN, oddIndicesSSG, oddIndicesPN_LowSN, oddIndicesPN_HighSN ]): print(len(indices), indices) #if i == 0: index = indices[iteration] #else: index = indices[np.random.randint(0, high=len(indices))] index = indices[np.random.randint(0, high=len(indices))] ax[0].scatter(color[index], absMag_dust[index], c=dataColor, s=20) yplus = absMag_dust_err[0][index] yminus = absMag_dust_err[1][index] if np.isnan(yplus): yplus = 10. if np.isnan(yminus): yminus = 10. print(yplus, yminus) ax[0].errorbar(color[index], absMag_dust[index], xerr=[[color_err[index]], [color_err[index]]], yerr=[[yplus], [yminus]], fmt="none", zorder=0, lw=2.0, mew=0, alpha=1.0, color=dataColor, ecolor=dataColor) ax[0].annotate(str(i + 1), (color[index] + 0.075, absMag_dust[index] + 0.175)) #, fontsize=annotateTextSize) #print len(color), len(absMagKinda_dust), len(color_err), len(absMagKinda_dust_err), len(apparentMagnitude) likeParallax, priorParallax, posteriorParallax, posteriorColorArray = likePriorPost( color[index], absMagKinda_dust[index], color_err[index], absMagKinda_dust_err[index], apparentMagnitude[index], xdgmm, xparallaxMAS, ndim=2, nPosteriorPoints=1000, projectedDimension=1) likeParallaxFull, priorParallaxFull, posteriorParallaxFull, posteriorColorFull = likePriorPost( color[index], absMagKinda_dust[index], color_err[index], absMagKinda_dust_err[index], apparentMagnitude[index], xdgmm, xarray, ndim=2, nPosteriorPoints=1000, projectedDimension=1) meanPosteriorParallax = scipy.integrate.cumtrapz( posteriorParallaxFull * xarray, x=xarray)[-1] x2PosteriorParallax = scipy.integrate.cumtrapz( posteriorParallaxFull * xarray**2., x=xarray)[-1] varPosteriorParallax = x2PosteriorParallax - meanPosteriorParallax**2. meanPosteriorColor = scipy.integrate.cumtrapz( posteriorColorFull * xColor, x=xColor)[-1] x2PosteriorColor = scipy.integrate.cumtrapz( posteriorColorFull * xColor**2., x=xColor)[-1] varPosteriorColor = x2PosteriorColor - meanPosteriorColor**2. absMagPost = testXD.absMagKinda2absMag( meanPosteriorParallax * 10.**(0.2 * apparentMagnitude[index])) absMag_errPost = absMagError(meanPosteriorParallax, np.sqrt(varPosteriorParallax), apparentMagnitude[index], absMagPost) yplus = absMag_dust_err[0][index] yminus = absMag_dust_err[1][index] if np.isnan(yplus): yplus = 10. if np.isnan(yminus): yminus = 10. l1, = ax[i + 1].plot(xparallaxMAS, likeParallax * np.max(posteriorParallax) / np.max(likeParallax), lw=2, color=dataColor, zorder=100) l2, = ax[i + 1].plot(xparallaxMAS, priorParallax * np.max(posteriorParallax) / np.max(priorParallax), lw=2, color=priorColor, linestyle='--') l3, = ax[i + 1].plot(xparallaxMAS, posteriorParallax, lw=2, color=posteriorColor) ax[0].scatter(meanPosteriorColor, absMagPost, c=posteriorColor, s=20) ax[0].errorbar(meanPosteriorColor, absMagPost, xerr=[[np.sqrt(varPosteriorColor)], [np.sqrt(varPosteriorColor)]], yerr=[[yplus], [yminus]], fmt="none", zorder=0, lw=2.0, mew=0, alpha=1.0, color=posteriorColor, ecolor=posteriorColor) maxInd = np.where( posteriorParallax == np.max(posteriorParallax))[0] maxPar = xparallaxMAS[maxInd] maxY = posteriorParallax[maxInd] if maxPar < 5: annX = 9 else: annX = 0 if i == 1: annY = 0.75 * maxY else: annY = maxY / 1.1 ax[i + 1].text(annX, annY, str(i + 1)) ax[i + 1].set_xlabel(r'$\varpi$ [mas]') ax[i + 1].tick_params( axis='y', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off labelbottom='off') # labels along the bottom edge are off if i + 1 == 1: leg = fig.legend( (l1, l2, l3), ('likelihood', 'prior', 'posterior'), 'upper right') #, fontsize=legendTextSize) leg.get_frame().set_alpha(1.0) #plt.tight_layout() if pdf: fig.savefig('posterior_' + str(iteration) + '_odd.pdf', dpi=400) fig.savefig('paper/posterior_odd.pdf', dpi=400) fig.tight_layout() fig.savefig('posterior_odd.png') plt.close(fig) #------------------------------------- #delta plot label = r'$\mathrm{ln} \, \tilde{\sigma}_{\varpi}^2 - \mathrm{ln} \, \sigma_{\varpi}^2$' contourColor = '#1f77b4' color = testXD.colorArray(mag1, mag2, dustEBV, bandDictionary) color_err = np.sqrt( bandDictionary[mag1]['array'][bandDictionary[mag1]['err_key']]**2. + bandDictionary[mag2]['array'][bandDictionary[mag2]['err_key']]**2.) x = color y = np.log(sigma**2.) - np.log(tgas['parallax_error']**2.) colorDeltaVar = y notnans = ~np.isnan(sigma) & ~np.isnan( tgas['parallax_error']) & ~np.isnan(color) if plot_delta: fig, ax = makeFigureInstance(x=2, y=1, wspace=1.0) # , figsize=figsize2x1) #fig, ax = plt.subplots(1, 2, figsize=figsize2x1) levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0)**2) norm = plt.matplotlib.colors.Normalize(vmin=-1.5, vmax=1) cmap = 'inferno' ax[0].scatter(x[notnans], y[notnans], c=y[notnans], s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap, rasterized=True) #corner.hist2d(x[notnans], y[notnans], bins=200, ax=ax[0], levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor, rasterized=True) ax[0].set_xlabel(xlabel_cmd) ax[0].set_ylim(-6, 2) ax[0].set_xlim(-0.5, 2) ax[0].set_ylabel(label) cNorm = plt.matplotlib.colors.Normalize(vmin=0.1, vmax=2) ax[1].scatter(x[positive], absMag, s=1, lw=0, c=y[positive], alpha=0.05, norm=norm, cmap=cmap, rasterized=True) ax[1].set_xlim(xlim_cmd) ax[1].set_ylim(ylim_cmd) ax[1].set_xlabel(xlabel_cmd) ax[1].set_ylabel(ylabel_cmd) if pdf: fig.savefig('paper/delta.pdf', dpi=400) fig.savefig('delta.png') plt.close(fig) #delta cdf plot ratioCmd = sigma[notnans]**2. / tgas['parallax_error'][notnans]**2. lnratio = np.log(ratioCmd) if plot_deltacdf: plt.clf() fig, ax = makeFigureInstance(left=0.75) N = len(lnratio) ys = np.arange(0 + 0.5 / N, 1, 1.0 / N) sinds = np.argsort(lnratio) f = scipy.interpolate.interp1d(lnratio[sinds], ys) f_inv = scipy.interpolate.interp1d(ys, lnratio[sinds]) ax.plot(lnratio[sinds], ys, 'k-', lw=2) fac2 = np.log(1 / 4.) fac1 = 0. ax.plot([fac2, fac2], [-1, f(fac2)], 'k--', lw=2) ax.plot([-6, fac2], [f(fac2), f(fac2)], 'k--', lw=2) ax.plot([fac1, fac1], [-1, f(fac1)], 'k--', lw=2) ax.plot([-6, fac1], [f(fac1), f(fac1)], 'k--', lw=2) ax.plot([f_inv(0.5), f_inv(0.5)], [-1, 0.5], 'k--', lw=2) ax.plot([-6, f_inv(0.5)], [0.5, 0.5], 'k--', lw=2) ax.set_xlabel(label) ax.set_ylabel('cumulative fraction') ax.set_xlim(-6, 2) ax.set_ylim(-0.05, 1.05) if pdf: fig.savefig('paper/deltaCDF.pdf', dpi=400) fig.savefig('deltaCDF.png') plt.close(fig) print('fraction of stars which decreased in variance: ', f(fac1)) #delta mean vs gaia uncertainty y = mean - tgas['parallax'] x = tgas['parallax_error'] good = ~np.isnan(y) & ~np.isnan(x) if plot_nobias: plt.clf() fig, ax = makeFigureInstance(left=0.75) levels = 1.0 - np.exp(-0.5 * np.arange(1.0, 2.1, 1.0)**2) contourColor = '#1f77b4' contourColor = 'black' #corner.hist2d(x[good], y[good], bins=200, ax=ax, levels=levels, no_fill_contours=True, plot_density=False, plot_data=False, color=contourColor, rasterized=True) #norm = plt.matplotlib.colors.Normalize(vmin=0.0, vmax=1) ax.scatter(x[notnans], y[notnans], c=colorDeltaVar[notnans], s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap, rasterized=True) #ax.scatter(x[good], y[good], c=sigma[good], s=1, lw=0, alpha=0.05, norm=norm, cmap=cmap, rasterized=True) #ax.scatter(x[good], y[good], c=np.sqrt(sigma[good]), s=1, rasterized=True, zorder=0, alpha=0.1, cmap=cmap, norm=norm) ax.plot([0, 1.1], [0, 0], 'k--', lw=1) ax.set_xlim(0.15, 1.05) ax.set_ylim(-2.5, 2.5) ylabel = r'$\mathrm{Posterior \, Expectation \, Value} - \varpi_n$' xlabel = r'$\sigma_{\varpi,n}$' ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if pdf: fig.savefig('paper/deltaParallax.pdf', dpi=400) fig.savefig('deltaParallax.png') plt.close(fig) #what's that feature plot if plot_wtf: fig, ax = makeFigureInstance(left=0.75) ax.scatter(color[positive], absMag, s=1, lw=0, c=dataColor, alpha=0.01, zorder=0, rasterized=True) ax.set_xlim(xlim_cmd) ax.set_ylim(ylim_cmd) ax.set_xlabel(xlabel_cmd) ax.set_ylabel(ylabel_cmd) lowerMainSequence = (0.4, 5.5) upperMainSequence = (-0.225, 2) binarySequence = (0.65, 4) redClump = (0.35, -2) redGiantBranch = (1.0, -2) turnOff = (-0.15, 3.5) features = [ lowerMainSequence, upperMainSequence, binarySequence, redClump, redGiantBranch, turnOff ] labels = [ 'lower MS', 'upper MS', 'binary sequence', 'red clump', 'RGB', 'MS turn off', 'subgiant branch' ] for l, f in zip(labels, features): ax.text(f[0], f[1], l) #, fontsize=annotateTextSize) if pdf: fig.savefig('paper/whatsThatFeature.pdf', dpi=400) fig.savefig('whatsThatFeature.png') plt.close(fig)
#plot likelihood and posterior in each axes for iteration in np.arange(20, 40): fig, ax = plt.subplots(2, 3, figsize=(15, 9)) ax = ax.flatten() fig.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.4, hspace=0.5) #plot prior in upper left xdgmmFilename = 'xdgmm.' + str(ngauss) + 'gauss.dQ' + str( quantile) + '.' + iter + '.2MASS.All.npz.fit' xdgmm = XDGMM(filename=xdgmmFilename) testXD.plotPrior(xdgmm, ax[0], c='k', lw=1) ax[0].set_xlim(xlim) ax[0].set_ylim(ylim) ax[0].set_xlabel('$(J-K)^C$', fontsize=18) ax[0].set_ylabel('$M_J^C$', fontsize=18) for i in range(np.max(digit)): currentInd = np.where((digit == i))[0] index = currentInd[np.random.randint(0, high=len(currentInd))] print 'yerr minus: ' + str(yerr_minus[index]) + ' yerr plus: ' + str( yerr_plus[index]) ax[0].scatter(color[index], testXD.absMagKinda2absMag(absMagKinda[index]), c='black')
class Empiricist(object): """ Worker object that can fit supernova and host galaxy parameters given noisy inputs using an XDGMM model, and then predict new supernovae based on this model and a set of new host galaxies. Parameters ---------- model_file: string (optional) Name of text file containing model being used (default=None). fit_method: string (optional) Name of XD fitting method to use (default='astroML'). Must be either 'astroML' or 'Bovy'. Notes ----- The class can be initialized with a model or one can be loaded or fit to data. """ def __init__(self, model_file=None, fit_method='astroML'): self.XDGMM = XDGMM(n_components=7, method=fit_method) self.fit_method = fit_method if model_file is not None: self.read_model(model_file) def get_SN(self, X, Xerr=None, n_SN=1): """ Conditions the XDGMM model based on the data in X and returns SN parameters sampled from the conditioned model. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. First 3 entries (SN parameters) should be NaN. Xerr: array_like, shape = (n_samples, n_features), optional Error on input data. SN errors should be 0.0. If None, errors are not used for the conditioning. n_SN: int (optional) Number of SNe to sample (default = 1). Returns ------- SN_data: array_like, shape = (n_SN, 3) Sample of SN data taken from the conditioned model. Notes ----- Assumes that the first three parameters used when fitting the model are the SN parameters. """ if self.model_file is None: raise StandardError("Model parameters not set.") if Xerr is None: cond_XDGMM = self.XDGMM.condition(X) else: cond_XDGMM = self.XDGMM.condition(X, Xerr) return np.atleast_2d(cond_XDGMM.sample(n_SN)) def fit_model(self, X, Xerr, filename='empiriciSN_model.fit', n_components=6): """ Fits the XD model to data. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on input data. filename: string (optional) Filename for model fit to be saved to (default = 'empiriciSN_model.fit'). n_components: float (optional) Number of Gaussian components to use (default = 6) Notes ----- The specified method and n_components Gaussian components will be used (typical BIC-optimized numbers of components for ~100s of training datapoints are 6 or 7). The fit will be saved in the file with name defined by the filename variable. """ self.XDGMM.n_components = n_components self.XDGMM = self.XDGMM.fit(X, Xerr) self.XDGMM.save_model(filename) self.model_file = filename return def fit_from_files(self, filelist, filename='empiriciSN_model.fit', n_components=7): """ Fits the XD model to data contained in the files provided. Parameters ---------- filelist: array_like Array of strings containing names of files containing data to fit. filename: string (optional) Filename for model fit (default = 'empiriciSN_model.fit'). n_components: float (optional) Number of Gaussian components to use (default = 7) method: string (optional) XD fitting method to use (default = 'astroML') Notes ----- The model is fitted using the data contained in the files named in the `filelist` variable. This assumes that the data files are in the same format as those provided with this code and that only redshift, distance from host nucleus, host colors, and local host surface brightness are being used for the fit. """ X, Xerr = self.get_data(filelist) self.fit_model(X, Xerr, filename=filename, n_components=n_components) return def read_model(self, filename): """ Reads the parameters of a model from a file. Parameters ---------- filename: string Name of the file to read from. Notes ----- Model parameters are stored in the self.XDGMM model object. The model filename is stored self.model_file. """ self.XDGMM.read_model(filename) self.model_file = filename return def component_test(self, X, Xerr, component_range, no_err=False): """ Test the performance of the model for a range of numbers of Gaussian components. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on input data. component_range: array_like Range of n_components to test. no_err: bool (optional) Flag for whether to calculate the BIC with the errors included or not. (default = False) Returns ------- bics: array_like, shape = (len(param_range),) BIC for each value of n_components optimal_n_comp: float Number of components with lowest BIC score lowest_bic: float Lowest BIC from the scores computed. Notes ----- Uses the XDGMM.bic_test method to compute the BIC score for each n_components in the component_range array. """ bics, optimal_n_comp, lowest_bic = \ self.XDGMM.bic_test(X, Xerr, component_range, no_err) return bics, optimal_n_comp, lowest_bic def get_logR(self,cond_indices, R_index, X, Xerr=None): """ Uses a subset of parameters in the given data to condition the model and return a sample value for log(R/Re). Parameters ---------- cond_indices: array_like Array of indices indicating which parameters to use to condition the model. Cannot contain [0, 1, 2] since these are SN parameters. R_index: int Index of log(R/Re) in the list of parameters that were used to fit the model. X: array_like, shape = (n < n_features,) Input data. Xerr: array_like, shape = (X.shape,) (optional) Error on input data. If none, no error used to condition. Returns ------- logR: float Sample value of log(R/Re) taken from the conditioned model. Notes ----- The fit_params array specifies a list of indices to use to condition the model. The model will be conditioned and then a radius will be drawn from the conditioned model. This is so that the radius can then be used to calculate local surface brightness to fully condition the model to sample likely SN parameters. This does not make assumptions about what parameters are being used in the model, but does assume that the model has been fit already and that the first three parameters in the data that were used to fit the model are the SN parameters. """ if self.model_file is None: raise StandardError("Model parameters not set.") if 0 in cond_indices or 1 in cond_indices or 2 in cond_indices: raise ValueError("Cannot condition model on SN parameters.") if R_index in cond_indices: raise ValueError("Cannot condition model on log(R/Re).") cond_data = np.array([]) if Xerr is not None: cond_err = np.array([]) R_cond_idx = R_index n_features = self.XDGMM.mu.shape[1] j = 0 for i in range(n_features): if i in cond_indices: cond_data = np.append(cond_data,X[j]) if Xerr is not None: cond_err = np.append(cond_err, Xerr[j]) j += 1 if i < R_index: R_cond_idx -= 1 else: cond_data = np.append(cond_data,np.nan) if Xerr is not None: cond_err = np.append(cond_err, 0.0) if Xerr is not None: cond_XDGMM = self.XDGMM.condition(cond_data, cond_err) else: cond_XDGMM = self.XDGMM.condition(cond_data) sample = cond_XDGMM.sample() logR = sample[0][R_cond_idx] return logR def get_local_SB(self, SB_params, R ): """ Uses magnitudes, a surface brightness (SB) profile, and a SN location to fit local surface brightnesses at the location of the SN. Parameters ---------- SB_params: array_like, shape = (21,) Array of parameters needed for the SB fit. First entry should be a sersic index of 1 or 4, indicating whether to use an exponential or de Vaucouleurs profile. Following this should be sets of (magnitude, mag_unc, effective radius, rad_unc) data for each of the 5 ugriz filters, giving a total array length of 21. These data are assumed to be known by the user. R: float Separation from host nucleus in units of log(R/Re). It is assumed that the Re used here is the r-band Re, as is output by the get_logR function. Returns ------- SBs: array_list, shape = (5,) Local surface brightness at the location of the SN for each of the 5 ugriz filters. Units = mag/arcsec^2 SB_errs: array_like, shape = (5,) Uncertainties on the local surface brightnesses. """ if SB_params[0]!=1 and SB_params[0]!=4: raise ValueError("Sersic index must be 1 or 4") sep = (10**R) * SB_params[11] # separation in arcsec SBs = np.array([]) SB_errs = np.array([]) for j in range(5): halfmag = SB_params[j*4+1] + 0.75257 magerr = SB_params[j*4+2] Re = SB_params[j*4+3] Re_err = SB_params[j*4+4] r = sep/Re Ie = halfmag + 2.5 * np.log10(np.pi*Re**2) Re2_unc = 2 * Re * Re_err * np.pi log_unc = 2.5 * Re2_unc/(np.log10(np.pi*Re**2) * np.log(10)) Ie_unc = np.sqrt(magerr**2 + log_unc**2) if SB_params[0] == 1: Io = Ie-1.824 Io_unc = Ie_unc sb = Io*np.exp(-1.68*(r)) exp_unc = np.exp(-1.68*(r))*1.68*sep*Re_err/(Re**2) sb_unc = sb * np.sqrt((Io_unc/Io)**2 + (exp_unc/np.exp(-1.68*(r)))**2) if np.isnan(sb_unc): sb_unc = 0.0 if sb_unc < 0: sb_unc = sb_unc*-1.0 SBs = np.append(SBs,sb) SB_errs = np.append(SB_errs,sb_unc) if SB_params[0] == 4: Io = Ie-8.328 Io_unc = Ie_unc sb = Io*np.exp(-7.67*((r)**0.25)) exp_unc = np.exp(-7.67*((r)**0.25))*7.67*sep \ *Re_err/(4*Re**(1.25)) sb_unc = sb*np.sqrt((Io_unc/Io)**2+(exp_unc \ /np.exp(-7.67*((r)**0.25)))) if np.isnan(sb_unc): sb_unc = 0.0 if sb_unc < 0: sb_unc = sb_unc*-1.0 SBs = np.append(SBs,sb) SB_errs = np.append(SB_errs,sb_unc) return SBs, SB_errs def set_fit_method(self, fit_method): """ Sets the XD fitting method to use. Parameters ---------- fit_method: string Name of fitting method to use. Must be either 'astroML' or 'Bovy'. Notes ----- Changes the fitting method of self.XDGMM to the one specified in `fit_method`. """ if fit_method == 'astroML': n_iter = 100 elif fit_method == 'Bovy': n_iter = 10**9 else: raise ValueError("Method must be either 'astroML' or 'Bovy'") self.XDGMM.method = fit_method self.XDGMM.n_iter = n_iter self.fit_method = fit_method return def get_data(self, filelist): """ Parses SN and host data from a list of data files. Parameters ---------- filelist: array_like Array of strings containing names of files containing data to fit. Returns ------- X: array_like, shape = (n_samples, n_features) Output data. Contains SALT2 SN parameters, host redshift, log(R/Re), host colors, and host brightnesses at the locations of the SN in each filter. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on output data. Notes ----- Reads in each data file and returns an array of data and a matrix of errors, which can be used to fit the XDGMM model. Currently reads the SALT2 SN parameters, host redshift, log(R/Re), host magnitudes, and host surface brightnesses at the location of the SN. This method needs further modularizing, to enable the worker to calculate host surface brightnesses separately (in a static method). """ x0 = np.array([]) x0_err = np.array([]) x1 = np.array([]) x1_err = np.array([]) c = np.array([]) c_err = np.array([]) z = np.array([]) z_err = np.array([]) logr = np.array([]) logr_err = np.array([]) umag = np.array([]) umag_err = np.array([]) gmag = np.array([]) gmag_err = np.array([]) rmag = np.array([]) rmag_err = np.array([]) imag = np.array([]) imag_err = np.array([]) zmag = np.array([]) zmag_err = np.array([]) SB_u = np.array([]) SB_u_err = np.array([]) SB_g = np.array([]) SB_g_err = np.array([]) SB_r = np.array([]) SB_r_err = np.array([]) SB_i = np.array([]) SB_i_err = np.array([]) SB_z = np.array([]) SB_z_err = np.array([]) for filename in filelist: infile = open(filename,'r') inlines = infile.readlines() infile.close() for line1 in inlines: if line1[0]=='#': continue line = line1.split(',') if line[33]=='nan' or line[39]=='nan' or line[45]=='nan'\ or line[51]=='nan' or line[57]=='nan': continue # SN params x0 = np.append(x0,float(line[7])) #x0 x0_err = np.append(x0_err,float(line[8])) x1 = np.append(x1,float(line[9])) # x1 x1_err = np.append(x1_err,float(line[10])) c = np.append(c,float(line[11])) # c c_err = np.append(c_err,float(line[12])) # Host params z = np.append(z,float(line[4])) z_err = np.append(z_err,0.0) logr = np.append(logr,np.log10(float(line[15])/float(line[42]))) # r logr_err = np.append(logr_err,float(line[43])/(float(line[42])*np.log(10))) umag = np.append(umag,float(line[18])) # u_mag umag_err = np.append(umag_err,float(line[19])) gmag = np.append(gmag,float(line[20])) # g_mag gmag_err = np.append(gmag_err,float(line[21])) rmag = np.append(rmag,float(line[22])) # r_mag rmag_err = np.append(rmag_err,float(line[23])) imag = np.append(imag,float(line[24])) # i_mag imag_err = np.append(imag_err,float(line[25])) zmag = np.append(zmag,float(line[26])) # z_mag zmag_err = np.append(zmag_err,float(line[27])) SB_u = np.append(SB_u,float(line[32])) # SB_u SB_u_err = np.append(SB_u_err,float(line[33])) SB_g = np.append(SB_g,float(line[38])) # SB_g SB_g_err = np.append(SB_g_err,float(line[39])) SB_r = np.append(SB_r,float(line[44])) # SB_r SB_r_err = np.append(SB_r_err,float(line[45])) SB_i = np.append(SB_i,float(line[50])) # SB_i SB_i_err = np.append(SB_i_err,float(line[52])) SB_z = np.append(SB_z,float(line[56])) # SB_z SB_z_err = np.append(SB_z_err,float(line[57])) ug = umag-gmag ug_err = np.sqrt(umag_err**2+gmag_err**2) ur = umag-rmag ur_err = np.sqrt(umag_err**2+rmag_err**2) ui = umag-imag ui_err = np.sqrt(umag_err**2+imag_err**2) uz = umag-zmag uz_err = np.sqrt(umag_err**2+zmag_err**2) gr = gmag-rmag gr_err = np.sqrt(gmag_err**2+rmag_err**2) gi = gmag-imag gi_err = np.sqrt(gmag_err**2+imag_err**2) gz = gmag-zmag gz_err = np.sqrt(gmag_err**2+zmag_err**2) ri = rmag-imag ri_err = np.sqrt(rmag_err**2+imag_err**2) rz = rmag-zmag rz_err = np.sqrt(rmag_err**2+zmag_err**2) iz = imag-zmag iz_err = np.sqrt(imag_err**2+zmag_err**2) X = np.vstack([x0,x1,c,z,logr,ug,ur,ui,uz,gr,gi,gz,ri,rz,iz,SB_u, SB_g,SB_r,SB_i,SB_z]).T Xerr = np.zeros(X.shape + X.shape[-1:]) diag = np.arange(X.shape[-1]) Xerr[:, diag, diag] = np.vstack([x0_err**2,x1_err**2,c_err**2, z_err**2,logr_err**2,ug_err**2, ur_err**2,ui_err**2,uz_err**2, gr_err**2,gi_err**2,gz_err**2, ri_err**2,rz_err**2,iz_err**2, SB_u_err**2,SB_g_err**2, SB_r_err**2,SB_i_err**2, SB_z_err**2]).T return X, Xerr