def test_fitness_function_results(): """Test results for several fitness functions""" rng = np.random.RandomState(42) # Event Data t = rng.randn(100) edges = bayesian_blocks(t, fitness='events') assert_allclose(edges, [-2.6197451, -0.71094865, 0.36866702, 1.85227818]) # Event data with repeats t[80:] = t[:20] edges = bayesian_blocks(t, fitness='events', p0=0.01) assert_allclose(edges, [-2.6197451, -0.47432431, -0.46202823, 1.85227818]) # Regular event data dt = 0.01 t = dt * np.arange(1000) x = np.zeros(len(t)) N = len(t) // 10 x[rng.randint(0, len(t), N)] = 1 x[rng.randint(0, len(t) // 2, N)] = 1 edges = bayesian_blocks(t, x, fitness='regular_events', dt=dt) assert_allclose(edges, [0, 5.105, 9.99]) # Measured point data with errors t = 100 * rng.rand(20) x = np.exp(-0.5 * (t - 50) ** 2) sigma = 0.1 x_obs = x + sigma * rng.randn(len(x)) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures') assert_allclose(edges, [4.360377, 48.456895, 52.597917, 99.455051])
def bin_edges_f(bin_method, mags_cols_cl): ''' Obtain bin edges for each photometric dimension using the cluster region diagram. The 'bin_edges' list will contain all magnitudes first, and then all colors (in the same order in which they are read). ''' bin_edges = [] if bin_method in ( 'auto', 'fd', 'doane', 'scott', 'rice', 'sturges', 'sqrt'): for mag in mags_cols_cl[0]: bin_edges.append(np.histogram(mag, bins=bin_method)[1]) for col in mags_cols_cl[1]: bin_edges.append(np.histogram(col, bins=bin_method)[1]) elif bin_method == 'fixed': # Based on Bonatto & Bica (2007) 377, 3, 1301-1323 but using larger # the values used by them (0.25 for colors and 0.5 for magnitudes) for mag in mags_cols_cl[0]: b_num = max(2, (max(mag) - min(mag)) / 1.) bin_edges.append(np.histogram(mag, bins=int(b_num))[1]) for col in mags_cols_cl[1]: b_num = max(2, (max(col) - min(col)) / .5) bin_edges.append(np.histogram(col, bins=int(b_num))[1]) elif bin_method == 'knuth': for mag in mags_cols_cl[0]: bin_edges.append(knuth_bin_width( mag, return_bins=True, quiet=True)[1]) for col in mags_cols_cl[1]: bin_edges.append(knuth_bin_width( col, return_bins=True, quiet=True)[1]) elif bin_method == 'blocks': for mag in mags_cols_cl[0]: bin_edges.append(bayesian_blocks(mag)) for col in mags_cols_cl[1]: bin_edges.append(bayesian_blocks(col)) # TODO this method is currently hidden from the params file. # To be used when #325 is implemented. Currently used to test # multi-dimensional likelihoods. # # For 4 to 6 dimensions the rule below appears to be a somewhat reasonable # rule of thumb for the number of bins for each dimension. # There is a trade-off between a large number of smaller bins which # better match features of the observed cluster but benefits larger # mass values, and fewer larger bins which better match masses but losing # finer details of the cluster. elif bin_method == 'man': d = len(mags_cols_cl[0]) + len(mags_cols_cl[1]) b_num = [15, 10, 7][d - 4] for mag in mags_cols_cl[0]: bin_edges.append(np.histogram(mag, bins=int(b_num))[1]) for col in mags_cols_cl[1]: bin_edges.append(np.histogram(col, bins=int(b_num))[1]) return bin_edges
def test_duplicate_events(rseed=0): rng = np.random.RandomState(rseed) t = rng.rand(100) t[80:] = t[:20] x = np.ones_like(t) x[:20] += 1 bins1 = bayesian_blocks(t) bins2 = bayesian_blocks(t[:80], x[:80]) assert_allclose(bins1, bins2)
def test_duplicate_events(rseed=0): rng = np.random.default_rng(rseed) t = rng.random(100) t[80:] = t[:20] # Using int array as a regression test for gh-6877 x = np.ones(t.shape, dtype=int) x[:20] += 1 bins1 = bayesian_blocks(t) bins2 = bayesian_blocks(t[:80], x[:80]) assert_allclose(bins1, bins2)
def test_duplicate_events(rseed=0): rng = np.random.RandomState(rseed) t = rng.rand(100) t[80:] = t[:20] x = np.ones_like(t) x[:20] += 1 with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): bins1 = bayesian_blocks(t) bins2 = bayesian_blocks(t[:80], x[:80]) assert_allclose(bins1, bins2)
def test_duplicate_events(rseed=0): rng = np.random.RandomState(rseed) t = rng.rand(100) t[80:] = t[:20] # Using int array as a regression test for gh-6877 x = np.ones(t.shape, dtype=int) x[:20] += 1 with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): bins1 = bayesian_blocks(t) bins2 = bayesian_blocks(t[:80], x[:80]) assert_allclose(bins1, bins2)
def Bayesian(self, x): I = bayesian_blocks(x[:, 0], p0=0.01) #Interval partition I = np.ceil(I).astype(int) self.I = zip(I[:-1], I[1:]) if self.I[-1][0] == self.I[-1][1]: del self.I[-1] self.I_number = len(self.I)
def timeslice(self, lcbinwidth=0.05, gamma=1e-300): det = ['n3', 'n4'] for i in range(1): file = glob(self.datadir + 'glg_tte_' + det2[i] + '_' + self.bnname + '_v*.fit') print(file) fitfile = file hdu = fits.open(fitfile) data = hdu['events'].data['time'] trigtime = hdu[0].header['trigtime'] time = data - trigtime tte = time[(time > -10) & (time < 50)] fig = plt.figure() edges = np.arange(tte[0], tte[-1] + lcbinwidth, lcbinwidth) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / lcbinwidth plotrate = np.concatenate(([plotrate[0]], plotrate)) edges = bayesian_blocks(plottime, plotrate, fitness='events', p0=1e-1, gamma=1e-300) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / (histbin[1:] - histbin[:-1]) plotrate = np.concatenate(([plotrate[0]], plotrate)) l = len(edges) for i in range(1, l - 1): time_slice.append(edges[i]) print(time_slice)
def histogram(a, bins=10, range=None, **kwargs): """Enhanced histogram This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Aside from the `bins` argument allowing a string specified how bins are computed, the parameters are the same as numpy.histogram(). Parameters ---------- a : array_like array of data to be histogrammed bins : int or list or str (optional) If bins is a string, then it must be one of: 'blocks' : use bayesian blocks for dynamic bin widths 'knuth' : use Knuth's rule to determine bins 'scotts' : use Scott's rule to determine bins 'freedman' : use the Freedman-diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) other keyword arguments are described in numpy.hist(). Returns ------- hist : array The values of the histogram. See `normed` and `weights` for a description of the possible semantics. bin_edges : array of dtype float Return the bin edges ``(length(hist)+1)``. See Also -------- numpy.histogram astroML.plotting.hist """ a = np.asarray(a) # if range is specified, we need to truncate the data for # the bin-finding routines if (range is not None and (bins in ['blocks', 'knuth', 'scotts', 'freedman'])): a = a[(a >= range[0]) & (a <= range[1])] if isinstance(bins, str): if bins == 'blocks': bins = astropy_stats.bayesian_blocks(a) elif bins == 'knuth': da, bins = astropy_stats.knuth_bin_width(a, True) elif bins == 'scotts': da, bins = astropy_stats.scott_bin_width(a, True) elif bins == 'freedman': da, bins = astropy_stats.freedman_bin_width(a, True) else: raise ValueError("unrecognized bin code: '{}'".format(bins)) return np.histogram(a, bins, range, **kwargs)
def estimate_bayesian_blocks(self, x, y, yerr): ''' :param x: x-data points (typically JD dates) :param y: y-data points (typically mag/fluxes) :param yerr: y-data errors (in the same units :return: bayesianblocks dict with the x values, the xerr (extension of the block) and y/yerr (weighted average and errors). ''' # make sure we have have numpy arrays x = np.asarray(x) y = np.asarray(y) yerr = np.asarray(yerr) # false alarm probability p0 = self.run_config['bblocks_p0'] edges = astats.bayesian_blocks(x, y, yerr, fitness='measures', p0=p0) bayesianblocks = {'x': [], 'xerr': [], 'y': [], 'yerr': []} for xmin, xmax in zip(edges[:-1], edges[1:]): filt = (x >= xmin) * (x < xmax) if np.sum(filt) == 0: continue xave = (xmin + xmax) / 2. xerr = (xmax - xmin) / 2. yave = np.average(y[filt], weights=1. / yerr[filt]**2) ystd = (np.sum(1. / yerr[filt]**2))**(-1. / 2) bayesianblocks['x'].append(xave) bayesianblocks['xerr'].append(xerr) bayesianblocks['y'].append(yave) bayesianblocks['yerr'].append(ystd) return (bayesianblocks)
def test_zero_change_points(rseed=0): ''' Ensure that edges contains both endpoints when there are no change points ''' np.random.seed(rseed) # Using the failed edge case from https://github.com/astropy/astropy/issues/8558 values = np.array([1, 1, 1, 1, 1, 1, 1, 1, 2]) bins = bayesian_blocks(values) assert values.min() == bins[0] assert values.max() == bins[-1]
def test_single_change_point(rseed=0): rng = np.random.default_rng(rseed) x = np.concatenate([rng.random(100), 1 + rng.random(200)]) bins = bayesian_blocks(x) assert (len(bins) == 3) assert_allclose(bins[1], 0.927289, rtol=0.02)
def test_single_change_point(rseed=0): rng = np.random.RandomState(rseed) x = np.concatenate([rng.rand(100), 1 + rng.rand(200)]) bins = bayesian_blocks(x) assert (len(bins) == 3) assert_allclose(bins[1], 1, rtol=0.02)
def bbduration(self, lcbinwidth=0.05, gamma=1e-300): os.chdir(self.resultdir) det = ['n9', 'n4'] file = glob(self.datadir + 'glg_tte_' + det[0] + '_' + self.bnname + '_v*.fit') print(file) fitfile = file[0] hdu = fits.open(fitfile) data = hdu['events'].data['time'] trigtime = hdu[0].header['trigtime'] time = data - trigtime tte = time[(time > -10) & (time < 50)] fig = plt.figure() ax1 = fig.add_subplot(111) ax2 = ax1.twinx() edges = np.arange(tte[0], tte[-1] + lcbinwidth, lcbinwidth) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / lcbinwidth plotrate = np.concatenate(([plotrate[0]], plotrate)) ax1.plot(plottime, plotrate, linestyle='steps', color='lightgreen') edges = bayesian_blocks(plottime, plotrate, fitness='events', p0=1e-1, gamma=1e-300) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / (histbin[1:] - histbin[:-1]) plotrate = np.concatenate(([plotrate[0]], plotrate)) #ax1.plot(plottime,plotrate,linestyle='steps',color='b') ax1.set_xlabel('time') ax1.set_ylabel('Count') l = len(edges) x = [] dx = [] for i in range(l - 3): s = (edges[i + 1] + edges[i + 2]) / 2 z = (edges[i + 2] - edges[i + 1]) / 2 x.append(s) dx.append(z) dy = [epeak_error_p, epeak_error_n] ax2.scatter(x, epeak, color='black', zorder=2, marker='.', s=50.) ax2.errorbar(x, epeak, xerr=dx, yerr=dy, zorder=1, fmt='o', color='0.15', markersize=1e-50) ax2.set_ylim(0, 600) ax2.set_ylabel('Epeak') plt.savefig('bbdurations.png')
def test_fitness_function_results(): """Test results for several fitness functions""" rng = np.random.RandomState(42) # Event Data t = rng.randn(100) with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): edges = bayesian_blocks(t, fitness='events') assert_allclose(edges, [-2.6197451, -0.71094865, 0.36866702, 1.85227818]) # Event data with repeats t[80:] = t[:20] with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): edges = bayesian_blocks(t, fitness='events', p0=0.01) assert_allclose(edges, [-2.6197451, -0.47432431, -0.46202823, 1.85227818]) # Regular event data dt = 0.01 t = dt * np.arange(1000) x = np.zeros(len(t)) N = len(t) // 10 x[rng.randint(0, len(t), N)] = 1 x[rng.randint(0, len(t) // 2, N)] = 1 edges = bayesian_blocks(t, x, fitness='regular_events', dt=dt) assert_allclose(edges, [0, 5.105, 9.99]) # Measured point data with errors t = 100 * rng.rand(20) x = np.exp(-0.5 * (t - 50)**2) sigma = 0.1 x_obs = x + sigma * rng.randn(len(x)) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures') expected = [4.360377, 48.456895, 52.597917, 99.455051] assert_allclose(edges, expected) # Optional arguments are passed (p0) p0_sel = 0.05 edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', p0=p0_sel) assert_allclose(edges, expected) # Optional arguments are passed (ncp_prior) ncp_prior_sel = 4 - np.log(73.53 * p0_sel * (len(t)**-0.478)) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', ncp_prior=ncp_prior_sel) assert_allclose(edges, expected) # Optional arguments are passed (gamma) gamma_sel = np.exp(-ncp_prior_sel) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', gamma=gamma_sel) assert_allclose(edges, expected)
def test_single_change_point(rseed=0): rng = np.random.RandomState(rseed) x = np.concatenate([rng.rand(100), 1 + rng.rand(200)]) with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): bins = bayesian_blocks(x) assert (len(bins) == 3) assert_allclose(bins[1], 1, rtol=0.02)
def test_regular_events(): rng = np.random.RandomState(0) dt = 0.01 steps = np.concatenate([np.unique(rng.randint(0, 500, 100)), np.unique(rng.randint(500, 1000, 200))]) t = dt * steps # string fitness bins1 = bayesian_blocks(t, fitness='regular_events', dt=dt) assert (len(bins1) == 3) assert_allclose(bins1[1], 5, rtol=0.05) # class name fitness bins2 = bayesian_blocks(t, fitness=RegularEvents, dt=dt) assert_allclose(bins1, bins2) # class instance fitness bins3 = bayesian_blocks(t, fitness=RegularEvents(dt=dt)) assert_allclose(bins1, bins3)
def test_measures_fitness_heteroscedastic(): rng = np.random.RandomState(1) t = np.linspace(0, 1, 11) x = np.exp(-0.5 * (t - 0.5)**2 / 0.01**2) sigma = 0.02 + 0.02 * rng.rand(len(x)) x = x + sigma * rng.randn(len(x)) bins = bayesian_blocks(t, x, sigma, fitness='measures') assert_allclose(bins, [0, 0.45, 0.55, 1])
def test_measures_fitness_homoscedastic(rseed=0): rng = np.random.default_rng(rseed) t = np.linspace(0, 1, 11) x = np.exp(-0.5 * (t - 0.5)**2 / 0.01**2) sigma = 0.05 x = x + sigma * rng.standard_normal(len(x)) bins = bayesian_blocks(t, x, sigma, fitness='measures') assert_allclose(bins, [0, 0.45, 0.55, 1])
def test_measures_fitness_heteroscedastic(): rng = np.random.RandomState(1) t = np.linspace(0, 1, 11) x = np.exp(-0.5 * (t - 0.5) ** 2 / 0.01 ** 2) sigma = 0.02 + 0.02 * rng.rand(len(x)) x = x + sigma * rng.randn(len(x)) bins = bayesian_blocks(t, x, sigma, fitness='measures') assert_allclose(bins, [0, 0.45, 0.55, 1])
def prepObsMass(obs_mass, bin_edges): """ """ # Obtain histogram for observed cluster. if bin_edges == 'knuth': bin_edges = knuth_bin_width(obs_mass, return_bins=True, quiet=True)[1] elif bin_edges == 'block': bin_edges = bayesian_blocks(obs_mass) cl_histo, bin_edges = np.histogram(obs_mass, bins=bin_edges) return [bin_edges, cl_histo]
def test_fitness_function_results(): """Test results for several fitness functions""" rng = np.random.default_rng(42) # Event Data t = rng.standard_normal(100) edges = bayesian_blocks(t, fitness='events') assert_allclose(edges, [-1.95103519, -1.01861547, 0.95442154, 2.1416476]) # Event data with repeats t[80:] = t[:20] edges = bayesian_blocks(t, fitness='events', p0=0.01) assert_allclose(edges, [-1.95103519, -1.08663566, 1.17575682, 2.1416476]) # Regular event data dt = 0.01 t = dt * np.arange(1000) x = np.zeros(len(t)) N = len(t) // 10 x[rng.integers(0, len(t), N)] = 1 x[rng.integers(0, len(t) // 2, N)] = 1 edges = bayesian_blocks(t, x, fitness='regular_events', dt=dt) assert_allclose(edges, [0, 4.365, 4.995, 9.99]) # Measured point data with errors t = 100 * rng.random(20) x = np.exp(-0.5 * (t - 50)**2) sigma = 0.1 x_obs = x + sigma * rng.standard_normal(len(x)) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures') expected = [1.39362877, 44.30811196, 49.46626158, 54.37232704, 92.7562551] assert_allclose(edges, expected) # Optional arguments are passed (p0) p0_sel = 0.05 edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', p0=p0_sel) assert_allclose(edges, expected) # Optional arguments are passed (ncp_prior) ncp_prior_sel = 4 - np.log(73.53 * p0_sel * (len(t)**-0.478)) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', ncp_prior=ncp_prior_sel) assert_allclose(edges, expected) # Optional arguments are passed (gamma) gamma_sel = np.exp(-ncp_prior_sel) edges = bayesian_blocks(t, x_obs, sigma, fitness='measures', gamma=gamma_sel) assert_allclose(edges, expected)
def fit(self, X, sample_weight=None, **kwargs): # Checks X = check_array(X) if sample_weight is not None and len(sample_weight) != len(X): raise ValueError # Compute histogram and edges if self.bins == "blocks": bins = bayesian_blocks(X.ravel(), fitness="events", p0=0.0001) range_ = self.range[0] if self.range else None h, e = np.histogram(X.ravel(), bins=bins, range=range_, weights=sample_weight, normed=False) e = [e] elif self.variable_width: ticks = [np.percentile(X.ravel(), 100. * k / self.bins) for k in range(self.bins + 1)] ticks[-1] += 1e-5 range_ = self.range[0] if self.range else None h, e = np.histogram(X.ravel(), bins=ticks, range=range_, normed=False, weights=sample_weight) h, e = h.astype(float), e.astype(float) widths = e[1:] - e[:-1] h = h / widths / h.sum() e = [e] else: bins = self.bins h, e = np.histogramdd(X, bins=bins, range=self.range, weights=sample_weight, normed=True) # Add empty bins for out of bound samples for j in range(X.shape[1]): h = np.insert(h, 0, 0., axis=j) h = np.insert(h, h.shape[j], 0., axis=j) e[j] = np.insert(e[j], 0, -np.inf) e[j] = np.insert(e[j], len(e[j]), np.inf) if X.shape[1] == 1 and self.interpolation: inputs = e[0][2:-1] - (e[0][2] - e[0][1]) / 2. inputs[0] = e[0][1] inputs[-1] = e[0][-2] outputs = h[1:-1] self.interpolation_ = interp1d(inputs, outputs, kind=self.interpolation, bounds_error=False, fill_value=0.) self.histogram_ = h self.edges_ = e self.ndim_ = X.shape[1] return self
def test_zero_change_points(rseed=0): """ Ensure that edges contains both endpoints when there are no change points """ np.random.seed(rseed) # Using the failed edge case from # https://github.com/astropy/astropy/issues/8558 values = np.array([1, 1, 1, 1, 1, 1, 1, 1, 2]) with pytest.warns(AstropyUserWarning, match=r'p0 does not seem to accurate'): bins = bayesian_blocks(values) assert values.min() == bins[0] assert values.max() == bins[-1]
def get_bin_sizes_x(x, algo='scott'): """ Smartly get bin size to have a loer bias due to binning""" from astropy.stats import freedman_bin_width, scott_bin_width, knuth_bin_width, bayesian_blocks logger.info(" > Get smart bin sizes in 1D") if algo == 'scott': logger.info("use scott rule of thumb") width_x, bins_x = scott_bin_width(x, return_bins=True) elif algo == 'knuth': logger.info("use knuth rule of thumb") width_x, bins_x = knuth_bin_width(x, return_bins=True) elif algo == 'freedman': logger.info("use freedman rule of thumb") width_x, bins_x = freedman_bin_width(x, return_bins=True) elif algo == 'blocks': logger.info("use bayesian blocks rule of thumb") width_x, bins_x = bayesian_blocks(x, return_bins=True) else: raise NotImplementedError("use scott, knuth, freedman or blocks") return bins_x, width_x
def timeslice(self, lcbinwidth=0.05, gamma=1e-300): os.chdir(self.resultdir) det = ['n9', 'n4'] file = glob(self.datadir + 'glg_tte_' + det[0] + '_' + self.bnname + '_v*.fit') print(file) fitfile = file[0] hdu = fits.open(fitfile) data = hdu['events'].data['time'] trigtime = hdu[0].header['trigtime'] time = data - trigtime tte = time[(time > -10) & (time < 50)] fig = plt.figure() ax1 = fig.add_subplot(111) ax2 = ax1.twinx() edges = np.arange(tte[0], tte[-1] + lcbinwidth, lcbinwidth) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / lcbinwidth plotrate = np.concatenate(([plotrate[0]], plotrate)) ax1.plot(plottime, plotrate, linestyle='steps', color='lightgreen') edges = bayesian_blocks(plottime, plotrate, fitness='events', p0=1e-1, gamma=1e-300) histvalue, histbin = np.histogram(tte, bins=edges) plottime = histbin plotrate = histvalue / (histbin[1:] - histbin[:-1]) plotrate = np.concatenate(([plotrate[0]], plotrate)) ax1.plot(plottime, plotrate, linestyle='steps', color='b') l = len(edges) for i in range(1, l - 1): time_slice.append(edges[i]) print('time_slice', time_slice) plt.savefig('bb.png')
def mf(haloes, edges): """Bins FoF haloes """ if edges == "bayes": edges = bayesian_blocks(haloes["M200Crit"]) try: counts, edges = np.histogram(haloes["M200Crit"], edges) except ValueError: logging.exception("Error binning haloes") sys.exit(1) centres = 0.5 * (edges[1:] + edges[:-1]) haloes = np.lib.recfunctions.append_fields( haloes, "bin", np.digitize(haloes["M200Crit"], edges, right=True), dtypes=[int], usemask=False, ) return haloes, centres, counts
def test_errors(): rng = np.random.RandomState(0) t = rng.rand(100) # x must be integer or None for events with pytest.raises(ValueError): bayesian_blocks(t, fitness='events', x=t) # x must be binary for regular events with pytest.raises(ValueError): bayesian_blocks(t, fitness='regular_events', x=10 * t, dt=1) # x must be specified for measures with pytest.raises(ValueError): bayesian_blocks(t, fitness='measures') # sigma cannot be specified without x with pytest.raises(ValueError): bayesian_blocks(t, fitness='events', sigma=0.5) # length of x must match length of t with pytest.raises(ValueError): bayesian_blocks(t, fitness='measures', x=t[:-1]) # repeated values in t fail when x is specified t2 = t.copy() t2[1] = t2[0] with pytest.raises(ValueError): bayesian_blocks(t2, fitness='measures', x=t) # sigma must be broadcastable with x with pytest.raises(ValueError): bayesian_blocks(t, fitness='measures', x=t, sigma=t[:-1])
def hist(x, bins=10, range=None, *args, **kwargs): """Enhanced histogram This is a histogram function that enables the use of more sophisticated algorithms for determining bins. Aside from the `bins` argument allowing a string specified how bins are computed, the parameters are the same as pylab.hist(). Parameters ---------- x : array_like array of data to be histogrammed bins : int or list or str (optional) If bins is a string, then it must be one of: 'blocks' : use bayesian blocks for dynamic bin widths 'knuth' : use Knuth's rule to determine bins 'scott' : use Scott's rule to determine bins 'freedman' : use the Freedman-diaconis rule to determine bins range : tuple or None (optional) the minimum and maximum range for the histogram. If not specified, it will be (x.min(), x.max()) ax : Axes instance (optional) specify the Axes on which to draw the histogram. If not specified, then the current active axes will be used. **kwargs : other keyword arguments are described in pylab.hist(). """ if isinstance(bins, str) and "weights" in kwargs: warnings.warn("weights argument is not supported: it will be ignored.") kwargs.pop('weights') x = np.asarray(x) if 'ax' in kwargs: ax = kwargs['ax'] del kwargs['ax'] else: # import here so that testing with Agg will work from matplotlib import pyplot as plt ax = plt.gca() # if range is specified, we need to truncate the data for # the bin-finding routines if (range is not None and (bins in ['blocks', 'knuth', 'knuths', 'scott', 'scotts', 'freedman', 'freedmans'])): x = x[(x >= range[0]) & (x <= range[1])] if bins in ['blocks']: bins = bayesian_blocks(x) elif bins in ['knuth', 'knuths']: dx, bins = knuth_bin_width(x, True) elif bins in ['scott', 'scotts']: dx, bins = scott_bin_width(x, True) elif bins in ['freedman', 'freedmans']: dx, bins = freedman_bin_width(x, True) elif isinstance(bins, str): raise ValueError("unrecognized bin code: '{}'".format(bins)) return ax.hist(x, bins, range, **kwargs)
def describe_numeric_1d(series: pd.Series, series_description: dict) -> dict: """Describe a numeric series. Args: series: The Series to describe. series_description: The dict containing the series description so far. Returns: A dict containing calculated series description values. Notes: When 'bins_type' is set to 'bayesian_blocks', astropy.stats.bayesian_blocks is used to determine the number of bins. Read the docs: https://docs.astropy.org/en/stable/visualization/histogram.html https://docs.astropy.org/en/stable/api/astropy.stats.bayesian_blocks.html This method might print warnings, which we suppress. https://github.com/astropy/astropy/issues/4927 """ quantiles = config["vars"]["num"]["quantiles"].get(list) stats = { "mean": series.mean(), "std": series.std(), "variance": series.var(), "min": series.min(), "max": series.max(), "kurtosis": series.kurt(), "skewness": series.skew(), "sum": series.sum(), "mad": series.mad(), "n_zeros": (len(series) - np.count_nonzero(series)), "histogramdata": series, } stats["range"] = stats["max"] - stats["min"] stats.update( { "{:.0%}".format(percentile): value for percentile, value in series.quantile(quantiles).to_dict().items() } ) stats["iqr"] = stats["75%"] - stats["25%"] stats["cv"] = stats["std"] / stats["mean"] if stats["mean"] else np.NaN stats["p_zeros"] = float(stats["n_zeros"]) / len(series) bins = config["plot"]["histogram"]["bins"].get(int) # Bins should never be larger than the number of distinct values bins = min(series_description["distinct_count_with_nan"], bins) stats["histogram_bins"] = bins bayesian_blocks_bins = config["plot"]["histogram"]["bayesian_blocks_bins"].get(bool) if bayesian_blocks_bins: with warnings.catch_warnings(): warnings.simplefilter("ignore") ret = bayesian_blocks(stats["histogramdata"]) # Sanity check if not np.isnan(ret).any() and ret.size > 1: stats["histogram_bins_bayesian_blocks"] = ret return stats
def _MakeTimeBins(self): from astropy.stats import bayesian_blocks from astropy.table import Table evtfile = str("%s/AppertureLightCurve/%s_%s_MkTime.fits" % (self.folder, self.srcname, self.Tag)) evtlist = Table.read(evtfile, hdu='EVENTS')['TIME'].data expfile = str("%s/AppertureLightCurve/%s_%s_applc.fits" % (self.folder, self.srcname, self.Tag)) expbins = Table.read(expfile, hdu='RATE') meanRate = float(len(evtlist)) / float(self.tmax - self.tmin) print("Mean photon rate %s s^-1" % meanRate) print("Mean photon rate %s day^-1" % (meanRate * 3600. * 24)) #Sort table in function of time just to be sure evtlist.sort() evtlistExpCorrected = np.empty_like(evtlist) expbins[expbins.argsort('TIME')] # Calculate relative exposure time and time correction associated for each exposure bins j = 0 surfaceFermi = 10000 # in cm^2 timeCorrection = np.zeros((len(expbins) + 1, 2)) exposure = np.zeros(len(expbins)) timeCorrection[j, 0] = expbins['TIME'][j] - 0.5 * expbins['TIMEDEL'][j] timeCorrection[j, 1] = 0. exposure[j] = expbins['EXPOSURE'][j] / (expbins['TIMEDEL'][j] * surfaceFermi) for j in range(1, len(expbins)): exposure[j] = expbins['EXPOSURE'][j] / (expbins['TIMEDEL'][j] * surfaceFermi) timeCorrection[ j, 0] = expbins['TIME'][j] - 0.5 * expbins['TIMEDEL'][j] timeCorrection[j, 1] = timeCorrection[ j - 1, 1] + exposure[j - 1] * expbins['TIMEDEL'][j - 1] timeCorrection[j + 1, 0] = expbins['TIME'][j] + 0.5 * expbins['TIMEDEL'][j] timeCorrection[ j + 1, 1] = timeCorrection[j, 1] + exposure[j] * expbins['TIMEDEL'][j] #Apply exposure time correction evtlistcorrected = np.interp(evtlist, timeCorrection[:, 0], timeCorrection[:, 1]) meanRateCorrected = float( len(evtlistcorrected)) / float(timeCorrection[-1, 1] - timeCorrection[0, 1]) print("Mean photon rate exposure corrected %s s^-1" % meanRateCorrected) print("Mean photon rate exposure corrected %s day^-1" % (meanRateCorrected * 3600. * 24)) #Calculate bayesian block edgesCorrected = bayesian_blocks(evtlistcorrected, fitness='events', p0=self.p0) edgesCorrected[0] = timeCorrection[0, 1] edgesCorrected[-1] = timeCorrection[-1, 1] #Calculate bin event for apperture photometry count, tmp = np.histogram(evtlistcorrected, bins=edgesCorrected) errcount = np.sqrt(count) #Correct edges from exposure edges = np.interp(edgesCorrected, timeCorrection[:, 1], timeCorrection[:, 0]) edges[0] = self.tmin edges[-1] = self.tmax #Calculate apperture phtometry flux flux = np.array(count / (edgesCorrected[1:] - edgesCorrected[:-1])) errflux = np.array(errcount / (edgesCorrected[1:] - edgesCorrected[:-1])) self.Nbin = len(edges) - 1 self.time_array = np.zeros(self.Nbin * 2) self.gtifile = [] for i in xrange(self.Nbin): self.time_array[2 * i] = edges[i] self.time_array[2 * i + 1] = edges[i + 1] self.info("Running LC with " + str(self.Nbin) + " bins") for i in xrange(self.Nbin): print "Bin ", i, " Start=", self.time_array[ 2 * i], " Stop=", self.time_array[ 2 * i + 1], 'Apperture Photometry=', flux[ i], '+/-', errflux[i], 'ph.s^-1' #Dump into ascii bbfile = str("%s/BayesianBlocks/%s_bb.dat" % (self.folder, self.srcname)) np.savetxt(bbfile, np.transpose( np.array([ np.array(edges[:-1]), np.array(edges[1:]), np.array(edgesCorrected[1:] - edgesCorrected[:-1]), np.array(count) ])), header='tstart tend dt_exposure_corrected count') #Load apperture flux point time_pt, dTime_pt, flux_pt, errflux_pt = self.readApperturePhotometryPoint( ) plt.figure() plt.xlabel(r"Time (s)") plt.ylabel(r"${\rm Flux\ (photon\ cm^{-2}\ s^{-1})}$") plot_bayesianblocks(np.array(edges[:-1]), np.array(edges[1:]), flux / surfaceFermi, errflux / surfaceFermi, errflux / surfaceFermi, np.zeros(flux.shape).astype(np.bool)) plt.errorbar(time_pt, flux_pt / surfaceFermi, yerr=errflux_pt / surfaceFermi, xerr=dTime_pt / 2., color='k', ls='None') plt.ylim(ymin=max(plt.ylim()[0], np.percentile(flux / surfaceFermi, 1) * 0.1), ymax=min(plt.ylim()[1], np.percentile(flux / surfaceFermi, 99) * 2.0)) plt.xlim(xmin=max( plt.xlim()[0], 1.02 * min(np.array(edges[:-1])) - 0.02 * max(np.array(edges[1:]))), xmax=min( plt.xlim()[1], 1.02 * max(np.array(edges[1:])) - 0.02 * min(np.array(edges[:-1])))) # Move the offset to the axis label ax = plt.gca() ax.get_yaxis().get_major_formatter().set_useOffset(False) offset_factor = int(np.mean(np.log10(np.abs(ax.get_ylim())))) if (offset_factor != 0): ax.set_yticklabels([float(round(k,5)) \ for k in ax.get_yticks()*10**(-offset_factor)]) ax.yaxis.set_label_text(ax.yaxis.get_label_text() +\ r" [${\times 10^{%d}}$]" %offset_factor) # Secondary axis with MJD mjdaxis = ax.twiny() mjdaxis.set_xlim([utils.met_to_MJD(k) for k in ax.get_xlim()]) mjdaxis.set_xlabel(r"Time (MJD)") mjdaxis.xaxis.set_major_formatter( matplotlib.ticker.ScalarFormatter(useOffset=False)) plt.setp(mjdaxis.xaxis.get_majorticklabels(), rotation=15) plt.tight_layout() LcOutPath = self.LCfolder + self.config['target']['name'] plt.savefig(LcOutPath + "_AP.png", dpi=150, facecolor='w', edgecolor='w', orientation='portrait', papertype=None, format=None, transparent=False, bbox_inches=None, pad_inches=0.1, frameon=None)
def light_curve_analysis(file, NaI, BGO, good_ni, good_bi, txtdir, plotsave, plotsave1): dt = 0.064 maxx = 0 new_c = {} if os.path.exists(txtdir) == False: os.makedirs(txtdir) myfile.printdatatofile(txtdir + 'N_good_ni.txt', data=[good_ni]) myfile.printdatatofile(txtdir + 'N_good_bi.txt', data=[good_bi]) for ni in NaI: if file[ni] is not None: hl = file[ni] trigtime = hl[0].header['TRIGTIME'] time = hl[2].data.field(0) ch = hl[2].data.field(1) ch_n = hl[1].data.field(0) e1 = hl[1].data.field(1) e2 = hl[1].data.field(2) t = time - trigtime ch_index = np.where((ch >= 3) & (ch < 123))[0] ch_n1 = np.arange(3, 123, 1, dtype=int) t = t[ch_index] ch = ch[ch_index] bins = np.arange(t[0], t[-1], dt) bin_n, bin_edges = np.histogram(t, bins=bins) t_c = (bin_edges[1:] + bin_edges[:-1]) * 0.5 rate = bin_n / dt t_c, cs_rate, bs_rate = TD_baseline(t_c, rate) new_c[ni] = [t_c, rate, bs_rate] if ni in good_ni: if rate.max() > maxx: maxx = rate.max() rate_sm = cs_rate + bs_rate.mean() bin_n_sm = np.round(rate_sm * dt) edges = bayesian_blocks(t_c, bin_n_sm, fitness='events', p0=0.05) result = background_correction(t_c, rate_sm, edges, degree=7) startedges, stopedges = get_bayesian_duration(result, sigma=3) new_c[ni + 'bb'] = [startedges, stopedges] if len(startedges) > 0: if os.path.exists(txtdir) == False: os.makedirs(txtdir) myfile.printdatatofile(txtdir + 'Z_' + ni + '_bayesian_duration.txt', data=[startedges, stopedges], format=['.5f', '.5f']) flash_start, flash_stop = get_bayesian_flash( result, startedges, stopedges) myfile.printdatatofile(txtdir + 'Y_' + ni + '_bayesian_flash.txt', data=[flash_start, flash_stop], format=['.5f', '.5f']) ''' txx_result = get_bayesian_txx(result,startedges,stopedges,txx = 0.9,it = 400,lamd = 200.) myplt = Plot(txx_result) plt.title(ni) myplt.plot_light_curve(sigma = 5) plt.xlim(t[0],t[-1]) plt.savefig(txtdir + 'X_'+ni+'_bayesian_txx.png') plt.close() print('***********',len(txx_result['txx']),len(txx_result['txx_list'])) for ij in range(len(txx_result['txx'])): plt.title(ni) myplt.plot_distribution('90',num = ij) plt.savefig(txtdir + 'W_'+ni+'_distribution_'+str(ij)+'.png') plt.close() plt.figure(figsize = (10,10)) plt.subplot(2,1,1) plt.title(ni) myplt.plot_Txx1('90') plt.xlim(t[0],t[-1]) plt.subplot(2,1,2) myplt.plot_Txx2('90') plt.xlim(t[0],t[-1]) plt.savefig(txtdir + 'U_'+ni+'_txx.png') plt.close() save_result(txx_result,txtdir+'V_'+ni+'_distribution_T90.csv') ''' if (ni == good_ni[0]): ni_event = Separate_source(t, ch, ch_n1, WT=False) s_t, s_ch = ni_event.get_S_t_and_ch() new_t, new_energy = ch_to_energy(s_t, s_ch, ch_n, e1, e2) fig = plt.figure(figsize=(20, 20)) ax1 = fig.add_subplot(2, 2, 1) ax1.set_title('light curve', size=20) ax1.step(t_c, rate, color='k', label=ni) ax1.set_xlabel('time (s)', size=20) ax1.set_ylabel('counts rate /s', size=20) ax1.set_xlim(t[0], t[-1]) ax1.legend() ax2 = fig.add_subplot(2, 2, 2) ax2.set_title('point map', size=20) ax2.plot(new_t, new_energy, ',', color='k') ax2.set_xlabel('time (s)', size=20) ax2.set_ylabel('energy (kev)', size=20) ax2.set_yscale('log') ax2.set_xlim(t[0], t[-1]) ax2.set_ylim(8, 9.1e2) ax3 = fig.add_subplot(2, 2, 3) ax3.step(t_c, rate, color='k', label=ni) if len(startedges) > 0: for i in range(len(startedges)): ax3.axvline(x=startedges[i], color='r') ax3.axvline(x=stopedges[i], color='g') ax3.set_xlabel('time (s)', size=20) ax3.set_ylabel('counts rate /s', size=20) ax3.set_xlim(t[0], t[-1]) ax3.legend() ax4 = fig.add_subplot(2, 2, 4) ax4.plot(new_t, new_energy, ',', color='k') if len(startedges) > 0: for i in range(len(startedges)): ax4.axvline(x=startedges[i], color='r') ax4.axvline(x=stopedges[i], color='g') ax4.set_xlabel('time (s)', size=20) ax4.set_ylabel('energy (kev)', size=20) ax4.set_yscale('log') ax4.set_xlim(t[0], t[-1]) ax4.set_ylim(8, 9.1e2) for k in plotsave: dir_, file_ = os.path.split(k) if os.path.exists(dir_) == False: os.makedirs(dir_) fig.savefig(k) plt.close(fig) else: new_c[ni + 'bb'] = None else: new_c[ni] = None new_c[ni + 'bb'] = None for bi in BGO: if file[bi] is not None: hl = file[bi] trigtime = hl[0].header['TRIGTIME'] time = hl[2].data.field(0) t = time - trigtime bins = np.arange(t[0], t[-1], dt) bin_n, bin_edges = np.histogram(t, bins=bins) t_c = (bin_edges[1:] + bin_edges[:-1]) * 0.5 rate = bin_n / dt t_c, cs_rate, bs_rate = TD_baseline(t_c, rate) new_c[bi] = [t_c, rate, bs_rate] if bi in good_bi: rate_sm = cs_rate + bs_rate.mean() bin_n_sm = np.round(rate_sm * dt) edges = bayesian_blocks(t_c, bin_n_sm, fitness='events', gamma=np.exp(-5)) result = background_correction(t_c, rate_sm, edges, degree=7) startedges, stopedges = get_bayesian_duration(result, sigma=5) if len(startedges) > 0: if os.path.exists(txtdir) == False: os.makedirs(txtdir) myfile.printdatatofile(txtdir + 'Z_' + bi + '_bayesian_duration.txt', data=[startedges, stopedges], format=['.5f', '.5f']) else: new_c[bi] = None plt.figure(figsize=(30, 60)) plt.subplots_adjust(left=0.1, right=0.9, top=0.95, bottom=0.05) for index, value in enumerate(NaI): plt.subplot(14, 1, index + 1) if new_c[value] is not None: tm, rate, bs = new_c[value] plt.plot(tm, rate, color='k', label=value) plt.plot(tm, bs, color='r', label='back') if new_c[value + 'bb'] is not None: started, stoped = new_c[value + 'bb'] for kk in range(len(started)): plt.axvline(x=started[kk], color='r') plt.axvline(x=stoped[kk], color='g') plt.ylabel('the count rate (N/s)') plt.xlim(tm[0], tm[-1]) plt.ylim(0, maxx * 0.5 + maxx) plt.legend(loc='upper left') for index, value in enumerate(BGO): plt.subplot(14, 1, index + 13) if new_c[value] is not None: tm, rate, bs = new_c[value] plt.plot(tm, rate, color='k', label=value) plt.plot(tm, bs, color='r', label='back') plt.ylabel('the count rate (N/s)') plt.xlim(tm[0], tm[-1]) plt.ylim(0, maxx * 0.5 + maxx) plt.legend(loc='upper left') for vv in plotsave1: dir_, file_ = os.path.split(vv) if os.path.exists(dir_) == False: os.makedirs(dir_) plt.savefig(vv) plt.close()
def read_times(dir_list): """Reads xrt data from given directory list and appends all the times""" time = [] with open(dir_list, 'r') as reader: for dire in reader.readlines(): fits_name = "sw" + dire[:-2] + "xpcw3po_cl.evt.gz" path = "./" + dire[:-1] + "xrt/event/" + fits_name with fits.open(path) as hdul: time.append(hdul['EVENTS'].data["Time"]) return np.concatenate(time) time = read_times(dir_list) edges = bayesian_blocks(time, fitness='events', p0=0.01) edges1 = bayesian_blocks(time, fitness='event') # edges = bayesian_blocks(t, fitness='events') # edges = bayesian_blocks(t, x,fitness='measures') # edges1 = bayesian_blocks(t, x,sigma=s,fitness='measures') # edges2 = bayesian_blocks(t, x,sigma=s,fitness='measures',p0=0.01) # plt.scatter(range(len(edges)),edges) # Out[19]: <matplotlib.collections.PathCollection at 0x7f303bb3c250> # plt.scatter(range(len(edges1)),edges1) # Out[20]: <matplotlib.collections.PathCollection at 0x7f30380653d0> # plt.scatter(range(len(edges2)),edges2) # Out[21]: <matplotlib.collections.PathCollection at 0x7f303806c760>
line = "spec" + str(i) + " " + str(tstart) + "-" + str(tstop) f.write(line + "\n") f.close() lc_filename = "curve_mod.qdp" bin_textfile = "tbin_bb.txt" modes, data = Read_lc_qdp(lc_filename) PC_data = np.array(data[-1], dtype=np.float32) t = PC_data[:, 0] x = PC_data[:, 3] del_t = np.diff(t) s = (PC_data[:, 4] - PC_data[:, 5]) / 2 # edges = bayesian_blocks(t, fitness='events') # edges = bayesian_blocks(t, x,fitness='measures') # edges1 = bayesian_blocks(t, x,sigma=s,fitness='measures') edges = bayesian_blocks(t, x, sigma=s, fitness='measures', p0=0.01) CreateBinFile(edges, bin_textfile) # plt.scatter(range(len(edges)),edges) # Out[19]: <matplotlib.collections.PathCollection at 0x7f303bb3c250> # plt.scatter(range(len(edges1)),edges1) # Out[20]: <matplotlib.collections.PathCollection at 0x7f30380653d0> # plt.scatter(range(len(edges2)),edges2) # Out[21]: <matplotlib.collections.PathCollection at 0x7f303806c760> # plt.yscale("log")
time = data[2].data.field(0) ch = data[2].data.field(1) t = time - trigtime ch_n = data[1].data.field(0) e1 = data[1].data.field(1) e2 = data[1].data.field(2) #t = t[np.where((ch>=3) & (ch <=109))] #ch = ch[np.where((ch>=3) & (ch <=109))] start_edges, stop_edges = get_pulse_duration(t) dt = 1 edges0 = np.arange(start_edges - 2, stop_edges + 2 + dt, dt) bin_n, edges0 = np.histogram(t, bins=edges0) bin_c = (edges0[1:] + edges0[:-1]) * 0.5 edges = bayesian_blocks(bin_c, np.rint(bin_n), fitness='events', p0=0.001)[1:-1] #从第二到倒数第二个。 slic_start = edges[:-1] slic_top = edges[1:] block_time = [] block_time_err = [] Ep = [] Ep_err1 = [] Ep_err2 = [] for dete_n in dete: make_phaI(name, dete_n, sampledir, save_dir, slic_start,