def plot_data(data, fdata, bins, neg_shift, plot_histogram=False): '''create figure with plot of raw data and histogram in subplots ''' nrows = 2 if plot_histogram else 1 fig, axs = plt.subplots(nrows=nrows, ncols=1, squeeze=False) td0 = transform_pi_deg(data[:, 0], neg_shift=neg_shift) ip = np.argsort(td0) xmin, xmax = (0, 180) if neg_shift else (-90, 90) ax = axs[0, 0] ax.plot(td0[ip], data[ip, 1], lw=2) ax.set_title('raw data') ax.set_xlim([xmin, xmax]) ax.set_xlabel('angle', fontsize='large') ax.set_ylabel('coherency', fontsize='large') if plot_histogram: ax = axs[1, 0] ax.hist(transform_pi_deg(fdata, neg_shift=neg_shift), bins=bins, alpha=0.5) ax.set_title('raw data histogram') ax.set_xlim([xmin, xmax]) ax.set_xlabel('angle') ax.set_ylabel('count') return fig
def plot_rvs_comparison(fdata, rvs, sizes, bins, neg_shift): '''plot 2 histograms given by fdata and rvs Parameters ---------- fdata : ndarray original data rvs : ndarray simulated data sizes : list, iterable list of the numbers of observations of mixture components bins : directly used by matplotlib ``hist`` negshift : bool If False, keep range in (-90, 90). If True, shift range to (0, 180). ''' fig = plt.figure(3) plt.clf() plt.title('orig. (blue, %d) vs. sim. (green, %s)' % (fdata.shape[0], ', '.join('%d' % ii for ii in sizes)), fontsize='medium') plt.hist(transform_pi_deg(fdata, neg_shift=neg_shift), bins=bins, alpha=0.5) plt.hist(transform_pi_deg(rvs, neg_shift=neg_shift), bins=bins, alpha=0.5) xmin, xmax = (0, 180) if neg_shift else (-90, 90) plt.axis(xmin=xmin, xmax=xmax) plt.xlabel('angle', fontsize='large') plt.ylabel('count', fontsize='large') return fig
def __call__(self): for filenames in self.get_filenames: dir_base, base_names = io.split_dir_base(filenames) print '======================================================' print 'directory base:', dir_base data = io.load_data(filenames) if self.n_merge_bins is not None: data = tr.merge_bins(data, self.n_merge_bins) print 'angles range:', data[:, 0].min(), data[:, 0].max() data = tr.fix_increasing(tr.fix_range(tr.transform_2pi(data))) print 'transformed angles range:', data[0, 0], data[-1, 0] print 'data range:', data[:, 1].min(), data[:, 1].max() # Simulate the "random process" the histogram was done from. counts = tr.get_counts_from_lengths(data[:, 1]) fdata = tr.spread_by_counts(data[:, 0], counts, trivial=self.spread_data == False) print 'simulated counts range:', counts.min(), counts.max() ddata = np.sort(tr.transform_pi_deg(data[:, 0], neg_shift=self.neg_shift)) dd = ddata[1] - ddata[0] all_bins = np.r_[ddata - 1e-8, ddata[-1] + dd] bins = all_bins[::self.plot_bins_step] self.current = Struct(filenames=filenames, dir_base=dir_base, base_names=base_names) self.source_data = Struct(counts=counts, data=data, fdata=fdata, bins=bins) yield self.source_data
def __call__(self): for filenames in self.get_filenames: dir_base, base_names = io.split_dir_base(filenames) print '======================================================' print 'directory base:', dir_base data = io.load_data(filenames) if self.n_merge_bins is not None: data = tr.merge_bins(data, self.n_merge_bins) print 'angles range:', data[:, 0].min(), data[:, 0].max() data = tr.fix_increasing(tr.fix_range(tr.transform_2pi(data))) print 'transformed angles range:', data[0, 0], data[-1, 0] print 'data range:', data[:, 1].min(), data[:, 1].max() # Simulate the "random process" the histogram was done from. counts = tr.get_counts_from_lengths(data[:, 1]) fdata = tr.spread_by_counts(data[:, 0], counts, trivial=self.spread_data == False) print 'simulated counts range:', counts.min(), counts.max() ddata = np.sort( tr.transform_pi_deg(data[:, 0], neg_shift=self.neg_shift)) dd = ddata[1] - ddata[0] all_bins = np.r_[ddata - 1e-8, ddata[-1] + dd] bins = all_bins[::self.plot_bins_step] self.current = Struct(filenames=filenames, dir_base=dir_base, base_names=base_names) self.source_data = Struct(counts=counts, data=data, fdata=fdata, bins=bins) yield self.source_data
def plot_estimated_dist(output_dir, result, source, pset_id=None): data, fdata, bins = source.get_source_data() xtr = lambda x: transform_pi_deg(x, neg_shift=source.neg_shift) rbins = transform_2pi(bins) - np.pi * (source.neg_shift == True) fig = result.model.plot_dist(result.full_params, xtransform=xtr, bins=rbins, data=fdata) fig.axes[0].set_title('estimated distribution') fig.axes[0].set_xlabel('angle', fontsize='large') fig.axes[0].set_ylabel('probability density function', fontsize='large') if pset_id is None: name = source.current.dir_base + '-fit.png' else: name = source.current.dir_base + '-fit-%d.png' % pset_id figname = os.path.join(output_dir, name) plt.tight_layout(pad=0.5) fig.savefig(figname) plt.close(fig)
def log_results(log, result, source): """ Log the fitting results. Notes ----- The resulting mixture parameters are stored into a 2d array with rows [location in degrees (mu), shape (kappa), probability]. """ sparams = result.model.get_summary_params(result.full_params)[:, [1, 0, 2]] sparams[:, 0] = tr.transform_pi_deg(tr.fix_range(sparams[:, 0]), neg_shift=source.neg_shift) converged = result.mle_retvals['converged'] fit_criteria = [-result.llf, result.aic, result.bic] print 'llf / nobs:', fit_criteria[0] / result.model.endog.shape[0] chisquare = result.gof_chisquare() # Chisquare test with effect size. alpha = 0.05 # Significance level. data = source.source_data.data n_obs = data[:, 1].sum() rad_diff = data[1, 0] - data[0, 0] pdf = result.model.pdf_mix(result.full_params, data[:, 0]) probs = pdf * rad_diff * n_obs effect_size = gof.chisquare_effectsize(data[:, 1], probs) chi2 = gof.chisquare(data[:, 1], probs, value=effect_size) power = gof.chisquare_power(effect_size, n_obs, data.shape[0], alpha=alpha) chisquare_all = list(chisquare) + [n_obs, effect_size] \ + list(chi2) + [power] log.write_row(source.current.dir_base, source.current.base_names, chisquare_all, sparams, converged, fit_criteria)
def get_area_angles(data, mode='min', ishift=None): """ Get equal area angles. For 'min' mode: First, the x-axis of the histogram is rolled so that the histogram minimum is at the beginning/end of the x-axis. The histogram maximum is then assumed to be somewhere around the middle of the x-axis. For 'max' mode: First, the x-axis of the histogram is rolled so that the histogram maximum is in the middle of the x-axis. The histogram minimum is then assumed to be somewhere around the beginning/end of the x-axis. The two modes should give similar results for symmetric histograms. Then: Assuming two main directions of fibres which are symmetric with respect to the angle of symmetry a_s and have the same probability 0.5 we obtain the mean value of these directions and their variation using the following algorithm. The integral sum of the histogram is obtained. Half of the sum is assigned to the angles smaller than the angle of symmetry and the second half to the angles greater than the angle of symmetry. Then each half is again divided into two equal area halves - two intervals are obtained [l_s, a_s], [a_s, r_s], where l_s, r_s are the dividing angles in the left and right half-areas, respectively. The mid-points of those intervals are taken as the directions of the assumed two fibre systems. """ ddd0 = data.copy() ddd0[:, 0] = transform_pi_deg(ddd0[:, 0], neg_shift=0) if ishift is None: if mode == 'min': ishift = np.argmin(ddd0[:, 1]) else: ishift = np.argmax(ddd0[:, 1]) - int(float(data.shape[0]) / 2) ddd = np.roll(ddd0, -ishift, axis=0) # Make angle ascending. for ii in xrange(1, ddd.shape[0]): if ddd[ii, 0] < ddd[ii - 1, 0]: ddd[ii, 0] += 180.0 # Mirror the first data point. dx = ddd[1, 0] - ddd[0, 0] ddd = np.r_[ddd, [[ddd[-1, 0] + dx, ddd[0, 1]]]] xmin, xmax = -1000, 1000 arh, xm = split_equal_areas(ddd, xmin, xmax) arh1, x0 = split_equal_areas(ddd, xmin, xm) arh2, x1 = split_equal_areas(ddd, xm, xmax) x0 = x0 if x0 >= 0.0 else x0 + 180 xm = xm if xm >= 0.0 else xm + 180 x1 = x1 if x1 >= 0.0 else x1 + 180 print x0, xm, x1 print arh, arh1, arh2, arh1 - arh2, arh - (arh1 + arh2) return x0, xm, x1, arh1, arh2
print '*****' print 'directory base:', dir_base data = io.load_data(filenames, transform=tr.transform_2pi) print 'data range:', data[:, 1].min(), data[:, 1].max() # Simulate the "random process" the histogram was done from. counts = tr.get_counts_from_lengths(data[:, 1]) fdata = tr.spread_by_counts(data[:, 0], counts) print 'simulated counts range:', counts.min(), counts.max( ), counts.sum() ddata = np.sort(tr.transform_pi_deg(data[:, 0], neg_shift=neg_shift)) dd = ddata[1] - ddata[0] all_bins = np.r_[ddata - 1e-8, ddata[-1] + dd] bins = all_bins #[::4] figname = os.path.join(output_dir, dir_base + '-data.png') fig = pl.plot_data(data, fdata, bins, neg_shift=neg_shift) if options.area_angles: pl.draw_areas(fig.axes[0], *pl.get_area_angles(data, neg_shift=neg_shift)) fig.savefig(figname) aux = Store() aux.fdata = fdata