def __call__(self): for filenames in self.get_filenames: dir_base, base_names = io.split_dir_base(filenames) print '======================================================' print 'directory base:', dir_base data = io.load_data(filenames) if self.n_merge_bins is not None: data = tr.merge_bins(data, self.n_merge_bins) print 'angles range:', data[:, 0].min(), data[:, 0].max() data = tr.fix_increasing(tr.fix_range(tr.transform_2pi(data))) print 'transformed angles range:', data[0, 0], data[-1, 0] print 'data range:', data[:, 1].min(), data[:, 1].max() # Simulate the "random process" the histogram was done from. counts = tr.get_counts_from_lengths(data[:, 1]) fdata = tr.spread_by_counts(data[:, 0], counts, trivial=self.spread_data == False) print 'simulated counts range:', counts.min(), counts.max() ddata = np.sort(tr.transform_pi_deg(data[:, 0], neg_shift=self.neg_shift)) dd = ddata[1] - ddata[0] all_bins = np.r_[ddata - 1e-8, ddata[-1] + dd] bins = all_bins[::self.plot_bins_step] self.current = Struct(filenames=filenames, dir_base=dir_base, base_names=base_names) self.source_data = Struct(counts=counts, data=data, fdata=fdata, bins=bins) yield self.source_data
def get_pars(pset, area_angles): """ Get starting parameters given the area angles of two systems. """ x0, xm, x1, area1, area2 = area_angles mu0 = 0.5 * (x0 + xm) mu1 = 0.5 * (xm + x1) print mu0, mu1 mu0, mu1 = tr.fix_range(tr.transform_2pi([mu0, mu1])) pars = np.r_[[2.0, mu0, 2.0, mu1] + [0.1, 0.0] * (pset.n_components - 2)] return pars
def plot_histogram_comparison(output_dir, result, source, pset_id=None): data, fdata, bins = source.get_source_data() rvs, sizes = result.model.rvs_mix(result.full_params, size=fdata.shape[0], ret_sizes=True) rvs = fix_range(rvs) fig = plot_rvs_comparison(fdata, rvs, sizes, bins, neg_shift=source.neg_shift) if pset_id is None: name = source.current.dir_base + '-cmp.png' else: name = source.current.dir_base + '-cmp-%d.png' % pset_id figname = os.path.join(output_dir, name) plt.tight_layout(pad=0.5) fig.savefig(figname) plt.close(fig)
def __call__(self): for filenames in self.get_filenames: dir_base, base_names = io.split_dir_base(filenames) print '======================================================' print 'directory base:', dir_base data = io.load_data(filenames) if self.n_merge_bins is not None: data = tr.merge_bins(data, self.n_merge_bins) print 'angles range:', data[:, 0].min(), data[:, 0].max() data = tr.fix_increasing(tr.fix_range(tr.transform_2pi(data))) print 'transformed angles range:', data[0, 0], data[-1, 0] print 'data range:', data[:, 1].min(), data[:, 1].max() # Simulate the "random process" the histogram was done from. counts = tr.get_counts_from_lengths(data[:, 1]) fdata = tr.spread_by_counts(data[:, 0], counts, trivial=self.spread_data == False) print 'simulated counts range:', counts.min(), counts.max() ddata = np.sort( tr.transform_pi_deg(data[:, 0], neg_shift=self.neg_shift)) dd = ddata[1] - ddata[0] all_bins = np.r_[ddata - 1e-8, ddata[-1] + dd] bins = all_bins[::self.plot_bins_step] self.current = Struct(filenames=filenames, dir_base=dir_base, base_names=base_names) self.source_data = Struct(counts=counts, data=data, fdata=fdata, bins=bins) yield self.source_data
def log_results(log, result, source): """ Log the fitting results. Notes ----- The resulting mixture parameters are stored into a 2d array with rows [location in degrees (mu), shape (kappa), probability]. """ sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]] sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]), neg_shift=source.neg_shift) converged = result.mle_retvals['converged'] fit_criteria = [-result.llf, result.aic, result.bic] print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0] chisquare = result.gof_chisquare() # Chisquare test with effect size. alpha = 0.05 # Significance level. data = source.source_data.data n_obs = data[:, 1].sum() rad_diff = data[1, 0] - data[0, 0] pdf = result.model.pdf_mix(result.full_params, data[:, 0]) probs = pdf * rad_diff * n_obs effect_size = gof.chisquare_effectsize(data[:, 1], probs) chi2 = gof.chisquare(data[:, 1], probs, value=effect_size) power = gof.chisquare_power(effect_size, n_obs, data.shape[0], alpha=alpha) chisquare_all = list(chisquare) + [n_obs, effect_size] \ + list(chi2) + [power] log.write_row(source.current.dir_base, source.current.base_names, chisquare_all, sparams, converged, fit_criteria)
fig = res.model.plot_dist(res.params, xtransform=xtr, n_bins=180) fig.axes[0].set_title('Estimated distribution') figname = os.path.join(output_dir, dir_base + '-fit-%d.png' % options.n_components) fig.savefig(figname) try: rvs, sizes = res.model.rvs_mix(res.params, size=fdata.shape[0], ret_sizes=True) except ValueError: pass else: rvs = tr.fix_range(rvs) figname = os.path.join( output_dir, dir_base + '-cmp-%d.png' % options.n_components) fig = pl.plot_rvs_comparison(fdata, rvs, sizes, bins, neg_shift=neg_shift) fig.savefig(figname) sparams = res.model.get_summary_params(res.params)[:, [1, 0, 2]] sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]), neg_shift=neg_shift) converged = res.mle_retvals['converged'] print 'converged:', converged