def get_external_bci(conf=CONF, interval='jeffreys', verb=True): """Import external data sets and calculate BCI. Inputs: conf: confidence level, default 90% interval: verb: if True, print BCIs as they are calculated Output: external_bci: DataFrame of binomial confidence intervals """ # Import ASAS-SN and ZTF SNe asassn_det, asassn_all = count_asassn_sne() ztf_det, ztf_all = count_ztf_sne() # Calculate binomial confidence intervals for external data print('\nExternal measures of f_CSM:') asassn_bci = 100 * binom_conf_interval( asassn_det, asassn_all, confidence_level=conf, interval=interval) ztf_bci = 100 * binom_conf_interval( ztf_det, ztf_all, confidence_level=conf, interval=interval) if verb: print('ASAS-SN') print(asassn_bci) print('ZTF') print(ztf_bci) external_bci = pd.DataFrame([asassn_bci, ztf_bci], index=['ASAS-SN', 'ZTF'], columns=['bci_lower', 'bci_upper']) return external_bci
def peaks_and_thresh(self): """Get an estimate of the peak positions and standard deviations given a set threshold Then set the threshold as 5 standard deviations above background returns: images processed, loading probability, error in loading probability, bg count, bg width, signal count, signal width, separation, fidelity, error in fidelity, threshold""" # split histograms at threshold then get mean and stdev: ascend = np.sort(self.counts[:self.im_num]) bg = ascend[ascend < self.thresh] # background signal = ascend[ascend > self.thresh] # signal above threshold bg_peak = np.mean(bg) bg_stdv = np.std(bg, ddof=1) at_peak = np.mean(signal) at_stdv = np.std(signal, ddof=1) sep = at_peak - bg_peak self.thresh = bg_peak + 5 * bg_stdv # update threshold # atom is present if the counts are above threshold self.atom[:self.im_num] = self.counts[:self.im_num] // self.thresh atom_count = np.size( np.where(self.atom > 0)[0]) # images with counts above threshold empty_count = np.size(np.where(self.atom[:self.im_num] == 0)[0]) load_prob = np.around(atom_count / self.im_num, 4) conf = binom_conf_interval(atom_count, atom_count + empty_count, interval='jeffreys') uplperr = conf[1] - loading_prob # 1 sigma confidence above mean lolperr = loading_prob - conf[0] # 1 sigma confidence below mean load_err = np.mean([uplperr, lolperr]) self.fidelity, self.err_fidelity = np.around(self.get_fidelity(), 4) return np.array(self.im_num, load_prob, load_err, bg_peak, bg_stdv, at_peak, at_stdv, sep, self.fidelity, self.err_fidelity, self.thresh)
def plot_effective_area(df_cuts, mc_spectrum, out_path): bins, bin_centers, bin_widths = make_default_cta_binning( bins_per_decade=15) hist_mc = mc_spectrum.expected_events_for_bins(energy_bins=bins) hist_df, _ = np.histogram(df_cuts.mc_energy.values, bins=bins) invalid = hist_df > hist_mc hist_df[invalid] = hist_mc[invalid] lower_conf, upper_conf = binom_conf_interval(hist_df, hist_mc, 0.95) gen_area = mc_spectrum.generation_area lower_conf = lower_conf * gen_area upper_conf = upper_conf * gen_area area = (hist_df / hist_mc) * gen_area lower_error = area - lower_conf upper_error = upper_conf - area mask = area > 0 fig, ax = plt.subplots(1, 1, figsize=figsize) #ax.plot([1,2,3]) plt.errorbar(bin_centers.value[mask], area.value[mask], xerr=bin_widths.value[mask] / 2.0, yerr=[lower_error.value[mask], upper_error.value[mask]], linestyle="") reference = True if reference: from cta_plots.sensitivity import load_effective_area_reference df = load_effective_area_reference() plt.plot(df.energy, df.effective_area, '--', label='Reference') #ax.set_title('optisch anpassen, legende adden') ax.set_xscale('log') ax.set_yscale('log') fig.savefig(out_path)
def conf(self, success, total): """Return the Binomial confidence at 1 sigma""" try: sp = success / total conf = binom_conf_interval(success, total, interval='jeffreys') uperr = conf[1] - sp # 1 sigma confidence above mean loerr = sp - conf[0] # 1 sigma confidence below mean return sp, uperr, loerr, 0.5*(uperr+loerr) except ValueError as e: return 0, 0, 0, 0
def collection_area( all_events, selected_events, impact, bins, sample_fraction=1.0, smoothing=0, ): ''' Calculate the collection area for the given events. Parameters ---------- all_events: array-like Quantity which should be histogrammed for all simulated events selected_events: array-like Quantity which should be histogrammed for all selected events bins: int or array-like either number of bins or bin edges for the histogram impact: astropy Quantity of type length The maximal simulated impact parameter sample_fraction: float The fraction of `all_events` that was analysed to create `selected_events` smoothing: float The amount of smoothing to apply to the resulting matrix ''' hist_all, hist_selected, bin_edges = histograms( all_events, selected_events, bins, ) hist_selected = (hist_selected / sample_fraction).astype(int) bin_width = np.diff(bin_edges) bin_center = 0.5 * (bin_edges[:-1] + bin_edges[1:]) invalid = hist_selected > hist_all hist_selected[invalid] = hist_all[invalid] # use astropy to compute errors on that stuff lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all) # scale confidences to match and split lower_conf = lower_conf * np.pi * impact**2 upper_conf = upper_conf * np.pi * impact**2 area = (hist_selected / hist_all) * np.pi * impact**2 if smoothing > 0: area = gaussian_filter(area.value, sigma=smoothing) * area.unit return area, bin_center, bin_width, lower_conf, upper_conf
def main(tstart, scale, model='Chev94', sigma=3, iterations=ITER, conf=CONF): """Print binomial confidence interval for CSM interaction rate within given parameter bounds. Inputs: tstart: tuple, CSM model interaction start time bounds scale: tuple, CSM model scale factor bounds model: 'Chev94' or 'flat', spectral model """ # Initialize DataFrame rate_df = pd.DataFrame( [], index=['GALEX', 'G19', 'All UV'], columns=['Detections', 'Trials', 'Lower Limit [%]', 'Upper Limit [%]']) # Get save directories galex_save_dir = run_dir('galex', model, sigma, detections=False) graham_save_dir = run_dir('Graham', model, sigma, detections=False) graham_det_dir = run_dir('Graham', model, sigma, detections=True) # Successes and trials rate_df.loc['GALEX', 'Trials'] = count_recovered_sne(galex_save_dir, tstart, scale, iterations) rate_df.loc['GALEX', 'Detections'] = 0 graham_detections = count_recovered_sne(graham_det_dir, tstart, scale, iterations) graham_nondetections = count_recovered_sne(graham_save_dir, tstart, scale, iterations) rate_df.loc['G19', 'Detections'] = graham_detections rate_df.loc['G19', 'Trials'] = graham_detections + graham_nondetections rate_df.loc['All UV'] = np.sum(rate_df.loc[['GALEX', 'G19']]) # Calculate binomial confidence interval # bci = 100 * binom_conf_interval(rate_df['Detections'], rate_df['Trials'], # confidence_level=conf, interval='jeffreys') for study in rate_df.index: detections = rate_df.loc[study, 'Detections'] trials = rate_df.loc[study, 'Trials'] if trials >= 1: bci = 100 * binom_conf_interval( detections, trials, confidence_level=conf, interval='jeffreys') rate_df.loc[study, ['Lower Limit [%]', 'Upper Limit [%]']] = bci.T else: rate_df.loc[study, 'Lower Limit [%]'] = np.nan rate_df.loc[study, 'Upper Limit [%]'] = np.nan # bci_lower, bci_upper = bci_nan(rate_df[['Detections']], rate_df[['Trials']]) # rate_df['Lower Limit [%]'] = bci_lower # rate_df['Upper Limit [%]'] = bci_upper print('\nConfidence intervals for %s < tstart < %s, ' % tstart + '%s < S < %s using the %s model' % (scale + (model, ))) print(rate_df)
def collection_area( all_events, selected_events, impact, bins, range=None, log=True, sample_fraction=1.0, ): ''' Calculate the collection area for the given events. Parameters ---------- all_events: array-like Quantity which should be histogrammed for all simulated events selected_events: array-like Quantity which should be histogrammed for all selected events bins: int or array-like either number of bins or bin edges for the histogram impact: astropy Quantity of type length The maximal simulated impact parameter log: bool flag indicating whether log10 should be applied to the quantity. sample_fraction: float The fraction of `all_events` that was analysed to create `selected_events` ''' hist_all, hist_selected, bin_edges = histograms(all_events, selected_events, bins, range=range, log=log) hist_selected = (hist_selected / sample_fraction).astype(int) bin_width = np.diff(bin_edges) bin_center = 0.5 * (bin_edges[:-1] + bin_edges[1:]) invalid = hist_selected > hist_all hist_selected[invalid] = hist_all[invalid] # use astropy to compute errors on that stuff lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all) # scale confidences to match and split lower_conf = lower_conf * np.pi * impact**2 upper_conf = upper_conf * np.pi * impact**2 area = hist_selected / hist_all * np.pi * impact**2 return area, bin_center, bin_width, lower_conf, upper_conf
def bci_nan(detections, trials, conf=0.9, interval='jeffreys'): """Find binomial confidence interval for DataFrame with NaN values. Inputs: detections: DataFrame of detections trials: DataFrame of trials (incl. detections), same shape as detections Returns: bci_lower: DataFrame, same shape as trials, with lower BCI limits bci_upper: DataFrame, same shape as trials, with upper BCI limits """ from astropy.stats import binom_conf_interval if detections.shape != trials.shape: raise ValueError('detections and trials must have the same shape.') bci_lower = pd.DataFrame([], index=trials.index) bci_upper = pd.DataFrame([], index=trials.index) # Calculate binomial confidence intervals for col in trials.columns: # separate bins with no trials pos_index = trials[trials[col] >= 1].index zero_index = trials[trials[col] < 1].index # calculate BCI for cells with positive trials bci = binom_conf_interval(detections.loc[pos_index, col], trials.loc[pos_index, col], confidence_level=conf, interval=interval) # add to dataframes bci_lower.loc[pos_index, col] = bci[0].T bci_upper.loc[pos_index, col] = bci[1].T # in cases with no trials, lower limit 0. and upper limit 1. bci_lower.loc[zero_index, col] = 0. bci_upper.loc[zero_index, col] = 1. return bci_lower, bci_upper
def main(): parser = argparse.ArgumentParser(description='Plot detection limits.') parser.add_argument('-o', '--overwrite', action='store_true', help='re-concatenate detection and nondetection data.') parser.add_argument('-s', '--systematics', action='store_true', help='plot observation and sample systematics.') parser.add_argument('--show', action='store_true', help='show plot after saving') parser.add_argument('--presentation', action='store_true', help='configure plots for presentation') args = parser.parse_args() sn_info = pd.read_csv(Path('ref/sn_info.csv'), index_col='name') conf_det = pd.read_csv(Path('out/confirmed_detections.csv')) det_sne = list(zip(conf_det['Name'], conf_det['Band'])) if args.overwrite or not Path('out/nondetections.csv').is_file(): print('Separating detections from nondetections...') detections = aggregate_detections(det_sne, sn_info) nondetections = aggregate_nondetections(det_sne, sn_info) else: detections = pd.read_csv(Path('out/detections.csv')) nondetections = pd.read_csv(Path('out/nondetections.csv')) if args.systematics: print('Plotting systematics...') # Look for systematics in observations all_detections = nondetections.append(detections) all_detections.set_index('name', inplace=True) plot_observation_systematics(all_detections, sn_info) plot_sample_systematics(sn_info) print('Plotting detections & limits...') fig, ax = plt.subplots() fig.set_tight_layout(True) ebar_alpha = 0.8 limit_alpha = 0.6 nondet_alpha = 0.05 faint_alpha = 0.3 upper_lim = 1e28 lower_lim = 1e22 if args.presentation else None cutoff = 10**25.88 # Graham 2015cp detection det_ms = 6 # detection marker size markers = ['o', 's', 'p', 'd', 'P'] colors = ['cyan', 'orange', 'green', 'magenta'] # Plot Swift SN2011fe from Brown+ 2012 if args.presentation: band = 'UVW1' else: band = 'UVM2' SN2011fe = pd.read_csv(Path('external/SN2011fe_Brown2012.tsv'), sep='\t', comment='#', skiprows=[45, 46]) SN2011fe = SN2011fe[pd.notna(SN2011fe['mag'])] SN2011fe['t_delta'] = SN2011fe['MJD'] - Time('2011-08-24', format='iso').mjd lc = SN2011fe[SN2011fe['Filt'] == band.lower()].copy() dist = 6.4 # Mpc; from Shappee & Stanek 2011 z = 0 # too close to need correction a_v = 0 # won't worry about it right now a_band = 'NUV' # close enough lc['FluxDensity'], lc['e_FluxDensity'] = swift_cps2flux( lc['CRate'], lc['e_CRate'], band) lc['Luminosity'] = flux2luminosity(lc['FluxDensity'], dist, z, a_v, a_band) lc['Luminosity_hz'] = wavelength2freq(lc['Luminosity'], 2245.8) ax.plot(lc['t_delta'], lc['Luminosity_hz'], color='brown', label='SN2011fe (%s)' % band, zorder=1) # Plot near-peak and CSM detections for i, (sn, band) in enumerate(det_sne): lc = detections[(detections['name'] == sn) & (detections['band'] == band)] lc_det = lc[lc['sigma'] > DET_SIGMA] lc_non = lc[lc['sigma'] <= DET_SIGMA] if args.presentation: # Plot nondetection limits of near-peak SNe plot_luminosity_limit(ax, lc_non, s=36, c=COLORS[band], a=faint_alpha, e='none', z=2) else: ax.errorbar(lc_det['t_delta_rest'], lc_det['luminosity_hostsub_hz'], yerr=lc_det['luminosity_hostsub_err_hz'], linestyle='none', label='%s (%s)' % (sn, band), marker=markers[i], ms=det_ms, markeredgecolor='k', color=colors[i], ecolor='k', elinewidth=1, zorder=9) # Plot nondetection limits of near-peak SNe plot_luminosity_limit(ax, lc_non, s=det_ms**2, c=colors[i], a=limit_alpha, e='k', z=8) # Plot nondetections for band in ['FUV', 'NUV']: lc = nondetections[nondetections['band'] == band] # Make distant (bright) limits smaller bright = lc[LIMIT_SIGMA * lc['luminosity_hostsub_err_hz'] >= cutoff] plot_luminosity_limit(ax, bright, s=16, c=COLORS[band], a=nondet_alpha, e='none', z=2) # Make close (faint) limits bigger faint = lc[LIMIT_SIGMA * lc['luminosity_hostsub_err_hz'] < cutoff] plot_luminosity_limit(ax, faint, s=36, c=COLORS[band], a=faint_alpha, e='none', z=3) # Plot Graham detections # note: Graham uses days past explosion, not discovery if args.presentation: ax.axhline(y=cutoff, color='r', label='SN2015cp (F275W)', zorder=10) else: ax.scatter(686, 10**25.88, marker='*', s=100, color='r', edgecolors='k', label='SN2015cp (F275W)', zorder=10) ax.scatter(477, 10**26.06, marker='X', s=64, color='y', edgecolors='k', label='ASASSN-15og (F275W)', zorder=10) ax.set_xlabel('Rest frame time since discovery [days]') # ax.set_xlabel('Observed time since discovery [days]') ax.set_xlim((-50, np.max(faint['t_delta_rest']) + 50)) ax.set_ylabel('Luminosity [erg s$^{-1}$ Hz$^{-1}$]') ax.set_yscale('log') ax.set_ylim((lower_lim, upper_lim)) # Legend handles, labels = ax.get_legend_handles_labels() legend_elements = [ Line2D([0], [0], marker='v', markerfacecolor=COLORS['FUV'], markeredgecolor='none', markersize=6, alpha=faint_alpha, label='detection limit (FUV)', lw=0), Line2D([0], [0], marker='v', markerfacecolor=COLORS['NUV'], markeredgecolor='none', markersize=6, alpha=faint_alpha, label='detection limit (NUV)', lw=0) ] ncol = 2 if args.presentation else 3 plt.legend(handles=handles + legend_elements, loc='upper right', ncol=ncol, handletextpad=0.2, handlelength=1.0) plt.savefig(Path('figs/limits.png'), dpi=300) if args.show: plt.show() else: plt.close() # Binomial statistics plot fig, ax = plt.subplots() conf_level = 0.9 # Include all nondetections below the luminosity of 2015cp below_graham = nondetections[nondetections['luminosity_hostsub_err_hz'] * LIMIT_SIGMA < cutoff] # Also include limits from near-peak SNe below_graham.append( lc_non[lc_non['luminosity_hostsub_err_hz'] * LIMIT_SIGMA < cutoff]) # Only those after discovery below_graham = below_graham[below_graham['t_delta_rest'] > 0] print('Number of SNe with limits fainter than 2015cp: %s' % len(below_graham.drop_duplicates('name').index)) print('Number of observations with limits fainter than 2015cp: %s' % len(below_graham.index)) bins = [0, 100, 500, 2500] k = [] n = [] labels = [] for i in range(len(bins) - 1): limits = below_graham[(below_graham['t_delta_rest'] >= bins[i]) & (below_graham['t_delta_rest'] < bins[i + 1])] discrete_sne = limits.drop_duplicates('name') k.append(0) n.append(len(discrete_sne.index)) labels.append('%s - %s' % (bins[i], bins[i + 1])) print(bins) print(n) bci = 100 * binom_conf_interval( k, n, confidence_level=conf_level, interval='jeffreys') print(bci) midpoint = np.mean(bci, axis=0) x_pos = np.arange(len(bins) - 1) ax.errorbar(x_pos, midpoint, yerr=np.abs(bci - midpoint), capsize=10, marker='o', linestyle='none', ms=10, mec='r', c='r', mfc='w', label='This study') # Confidence interval from Yao 2019 ztf_bci = 100 * binom_conf_interval( 1, 127, confidence_level=conf_level, interval='jeffreys') print(ztf_bci) ztf_mean = np.mean(ztf_bci) ax.errorbar([0.1], [ztf_mean], yerr=([ztf_mean - ztf_bci[0]], [ztf_bci[1] - ztf_mean]), marker='o', c='b', linestyle='none', ms=10, capsize=10, mec='b', mfc='w', label='ZTF') # ASAS-SN interval asassn_bci = 100 * binom_conf_interval( 3, 460, confidence_level=conf_level, interval='jeffreys') print(asassn_bci) asassn_mean = np.mean(asassn_bci) ax.errorbar([0.2], [asassn_mean], yerr=([asassn_mean - asassn_bci[0]], [asassn_bci[1] - asassn_mean]), marker='o', c='orange', linestyle='none', ms=10, capsize=10, mec='orange', mfc='w', label='ASAS-SN') # Confidence interval & assumed late-onset rate from Graham 2019 graham_rate = 6 graham_bci = 100 * binom_conf_interval( 1, 64, confidence_level=conf_level, interval='jeffreys') print(graham_bci) ax.errorbar([2.1], [graham_rate], yerr=([graham_rate - graham_bci[0]], [graham_bci[1] - graham_rate]), marker='v', color='g', linestyle='none', ms=15, capsize=10, label='G19') # ax.annotate('G19', (2.1, graham_rate), textcoords='offset points', # xytext=(10, 0), ha='left', va='center', size=18, color='g') ax.set_xlim((x_pos[0] - 0.5, x_pos[-1] + 0.5)) ax.set_xticks(x_pos) ax.set_xticklabels(labels) ax.tick_params(axis='x', which='minor', bottom=False, top=False) ax.set_xlabel('Rest frame time since discovery [days]') ax.set_ylabel('Rate of CSM interaction [%]') # Preliminary! if args.presentation: fig.text(0.95, 0.05, 'PRELIMINARY', fontsize=72, color='gray', rotation='30', ha='right', va='bottom', alpha=0.5) plt.tight_layout() plt.legend() plt.savefig(Path('figs/rates.png'), dpi=300) if args.show: plt.show() else: plt.close()
bins=6, range=[r1, r2]) vol_table1['env_bins'] = np.digitize(vol_table1['logSurfaceDensity'], bins1) grouped1 = vol_table1.group_by('env_bins') means = grouped1.groups.aggregate(np.mean) #creating an array to find number of data points in each environmental bin n = [] #this doesnt work properly, n ends up having different dimensions to p for y in range(1, len(bins1)): lens1 = vol_table1['env_bins'] == y yy = grouped1[lens1] yy = len(yy['env_bins']) n = n + [yy] #binomial errors p = means['spiral_spiral_deb_frac'] k = n * p a, b = binom_conf_interval(k, n) a = p - a b = b - p error = [a, b] #unsure what to set the upper and lower limits as ax.errorbar(means['logSurfaceDensity'], means['spiral_spiral_deb_frac'], error, fmt='.-', label=str(x)) legend = ax.legend(loc='upper right', shadow=True, prop={'size': 6})
set(list(jhu[jhudwarf].index)) & set(list(nsa[nsadwarf].index))) nsaandportdwarfs = list( set(list(nsa[nsadwarf].index)) & set(list(port[portdwarf].index))) print(len(jhuandportdwarfs), len(jhuandnsadwarfs), len(nsaandportdwarfs)) jhuandportdwarfagn = list( set(list(jhu[jhudwarfagn].index)) & set(list(port[portdwarfagn].index))) jhuandnsadwarfagn = list( set(list(jhu[jhudwarfagn].index)) & set(list(nsa[nsadwarfagn].index))) nsaandportdwarfagn = list( set(list(nsa[nsadwarfagn].index)) & set(list(port[portdwarfagn].index))) print(len(jhuandportdwarfagn), len(jhuandnsadwarfagn), len(nsaandportdwarfagn)) print(len(jhuandportdwarfagn), len(jhuandnsadwarfagn), len(nsaandportdwarfagn)) print (100.0*len(jhuandportdwarfagn)/len(jhuandportdwarfs), \ 100.0*binom_conf_interval(len(jhuandportdwarfagn),len(jhuandportdwarfs)) -\ 100.0*len(jhuandportdwarfagn)/len(jhuandportdwarfs)) print (100.0*len(jhuandnsadwarfagn)/len(jhuandnsadwarfs),\ 100.0*binom_conf_interval(len(jhuandnsadwarfagn),len(jhuandnsadwarfs)) -\ 100.0*len(jhuandnsadwarfagn)/len(jhuandnsadwarfs)) print (100.0*len(nsaandportdwarfagn)/len(nsaandportdwarfs),\ 100.0*binom_conf_interval(len(nsaandportdwarfagn),len(nsaandportdwarfs))-\ 100.0*len(nsaandportdwarfagn)/len(nsaandportdwarfs)) print('JHU or Port', 'JHU or NSA', 'NSA or Port') jhuorport = list(set(list(jhu.index)) | set(list(port.index))) jhuornsa = list(set(list(jhu.index)) | set(list(nsa.index))) nsaorport = list(set(list(nsa.index)) | set(list(port.index))) print(len(jhuorport), len(jhuornsa), len(nsaorport)) jhuorportdwarfs = list(
def main(input_file, output, n_bins, cuts_path, reference): bins, bin_center, bin_widths = make_energy_bins(e_min=0.008 * u.TeV, e_max=200 * u.TeV, bins=n_bins) gammas, _, _ = load_signal_events(input_file, columns=cols) if cuts_path: gammas = apply_cuts(gammas, cuts_path, theta_cuts=True, sigma=0) runs = read_data(input_file, key='runs') mc_production = MCSpectrum.from_cta_runs(runs) gammas_energy = gammas.gamma_energy_prediction_mean.values hist_all = mc_production.expected_events_for_bins(energy_bins=bins) hist_selected, _ = np.histogram(gammas_energy, bins=bins) invalid = hist_selected > hist_all hist_selected[invalid] = hist_all[invalid] # use astropy to compute errors on that stuff lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all) # scale confidences to match and split lower_conf = lower_conf * mc_production.generation_area upper_conf = upper_conf * mc_production.generation_area area = (hist_selected / hist_all) * mc_production.generation_area # matplotlib wants relative offsets for errors. the conf values are absolute. lower = area - lower_conf upper = upper_conf - area mask = area > 0 plt.errorbar( bin_center.value[mask], area.value[mask], xerr=bin_widths.value[mask] / 2.0, yerr=[lower.value[mask], upper.value[mask]], linestyle='', color=main_color, ) if reference: df = load_effective_area_requirement() plt.plot(df.energy, df.effective_area, '--', color='gray', label='Prod3b reference') plt.legend() plt.ylim([100, 1E8]) plt.xscale('log') plt.yscale('log') plt.xlabel(r'$E_{\mathrm{Reco}} / \mathrm{TeV}$') plt.ylabel(r'$\mathrm{Mean Effective\; Area} / \mathrm{m}^2$') plt.tight_layout() if output: plt.savefig(output) else: plt.show()
def save_results_good_data_nounique_model(test_dose_response, qc_flag, model_preds, selected_models, chemical_id, end_point): # Create the PdfPages object to which we will save the pages: # The with statement makes sure that the PdfPages object is closed properly at # the end of the block, even if an Exception occurs. # Estimate AUC and min and mox doses if (not test_dose_response.empty): print("test_dose_response:" + str(test_dose_response)) dose_response_auc = np.trapz(test_dose_response.num_affected / test_dose_response.total_num, x=test_dose_response.dose) dose_min = min(test_dose_response.dose) dose_max = max(test_dose_response.dose) dose_response_auc_norm = dose_response_auc / (dose_max - dose_min) else: dose_response_auc = np.nan dose_min = np.nan dose_max = np.nan dose_response_auc_norm = np.nan if (not isinstance(chemical_id, str)): chemical_id = str(chemical_id) filename = chemical_id + '_' + end_point + '.pdf' model_preds = model_preds.round(8) # Extract subset of results table model_preds_basic_stats = model_preds[[ 'Model', 'Chi-squared', 'p-val', 'AIC', 'BMD10', 'BMDL10' ]] residual_column_names = [('dose' + str(i)) for i in range(len(test_dose_response['dose']))] model_preds_residuals = pd.DataFrame(columns=['Model'] + residual_column_names) model_preds_residuals['Model'] = model_preds['Model'] model_preds_residuals_matrix = np.empty( (model_preds['Scaled Residuals'].shape[0], len(test_dose_response['dose']))) model_preds_residuals_matrix[:] = np.nan model_preds_residuals[residual_column_names] = model_preds_residuals_matrix for model_pred_index in range(model_preds['Scaled Residuals'].shape[0]): if (not any(np.isnan( model_preds['Scaled Residuals'][model_pred_index]))): model_preds_residuals.iloc[model_pred_index, 1:] = np.matrix( model_preds['Scaled Residuals'] [model_pred_index].tolist()).round(8) # Create dictionaries for various flags data_qc_flag_vals = { 0: 'Not enough dose groups for BMD analysis.' + '\n ' + 'BMD analysis not performed.', 1: 'No trend detected in dose-response data.' + '\n' + 'BMD analysis not performed.', 2: 'Dose-response data quality very good.', 3: 'Dose-response data quality good.', 4: 'Data resolution poor.' + '\n' + 'Caution advised.', 5: 'Negative correlation detected in dose-response data.' + '\n' + 'Caution advised.' } bmd_analysis_flag_vals = { 1: 'Convergence not achieved for any dose-response model.', 2: 'Model fit might be unreliable.' + '\n' + 'p-val for chi-squared statistic was < 0.1 for all converged models.', 3: 'A unique model could not be determined.' + '\n' + 'Multiple models had the same AIC and BMD values but no valid BMDL values.', 4: 'Multiple models found.' + '\n' + 'User advised to look at the results of analysis to choose the best model.' } #txt_for_model_selection = selected_models['model'].values + ' determined to be the best model' unique_model_flag_vals = { 0: 'None', 1: 'Best model could not be determined' } bmd_analysis_flag = selected_models['model_select_flag'] unique_model_flag = selected_models['no_unique_model_found_flag'] # Filenames for csv files containing the results of analysis bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv' dose_response_vals_file_name = 'dose_response_vals_' + str( time_now_date) + '.csv' fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv' # Generate text for report text_for_report = data_qc_flag_vals[qc_flag] # Specify reason for non-unique model text_for_report += '\n' + unique_model_flag_vals[unique_model_flag] text_for_report += '\n' + bmd_analysis_flag_vals[bmd_analysis_flag] with PdfPages(filename) as pdf: # Output data summary fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') fig.text(0.1, 0.7, ' '.join(map(str, text_for_report)), transform=fig.transFigure, size=10, ha="left") plt.title('Summary of Analysis') pdf.savefig() plt.close() # Print Model Predictions fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') ax.table(cellText=model_preds_basic_stats.values, colLabels=model_preds_basic_stats.columns, loc='center') plt.title('Model Predictions') fig.tight_layout() pdf.savefig() # saves the current figure into a pdf page plt.close() # Print residuals for different models fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') ax.table(cellText=model_preds_residuals.values, colLabels=model_preds_residuals.columns, loc='center') plt.title('Scaled Residuals') fig.tight_layout() pdf.savefig() # saves the current figure into a pdf page plt.close() CI_bounds = np.zeros([2, len(test_dose_response.dose)]) for index in range(len(test_dose_response.dose)): CI = astrostats.binom_conf_interval( test_dose_response.num_affected[index], test_dose_response.total_num[index], confidence_level=0.95) CI = np.abs(CI - test_dose_response.num_affected[index] / test_dose_response.total_num[index]) CI_bounds[0, index] = CI[0] CI_bounds[1, index] = CI[1] fig, ax = plt.subplots() ax.set_xscale("linear") ax.errorbar(test_dose_response.dose, test_dose_response.num_affected / test_dose_response.total_num, CI_bounds, marker='s', mfc='red', fmt='.') ax.set_xlabel('Dose') ax.set_ylabel('Fractional Response') ax.set_title('Dose-response Data') pdf.savefig() # saves the current figure into a pdf page plt.close() # Create dataframes to apprend to write to csv files bmd_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50', 'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag', 'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag' ]) dose_response_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi' ]) fit_vals = pd.DataFrame( columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals']) # Populate dataframes bmd_vals['Chemical_ID'] = [chemical_id] bmd_vals['End_Point'] = [end_point] bmd_vals['Model'] = np.nan bmd_vals['BMD10'] = np.nan bmd_vals['BMDL'] = np.nan bmd_vals['BMD50'] = np.nan bmd_vals['DataQC_Flag'] = qc_flag bmd_vals['AUC'] = dose_response_auc bmd_vals['Min_Dose'] = dose_min bmd_vals['Max_Dose'] = dose_max bmd_vals['AUC_Norm'] = dose_response_auc_norm bmd_vals['BMD_Analysis_Flag'] = bmd_analysis_flag bmd_vals['BMD10_Flag'] = np.nan bmd_vals['BMD50_Flag'] = np.nan dose_response_vals['Chemical_ID'] = [chemical_id] * len( test_dose_response.dose) dose_response_vals['End_Point'] = [end_point] * len( test_dose_response.dose) dose_response_vals['Dose'] = test_dose_response.dose dose_response_vals[ 'Response'] = test_dose_response.num_affected / test_dose_response.total_num dose_response_vals['CI_Lo'] = CI_bounds[0, :] dose_response_vals['CI_Hi'] = CI_bounds[1, :] fit_vals['Chemical_ID'] = [chemical_id] fit_vals['End_Point'] = [end_point] fit_vals['X_vals'] = np.nan fit_vals['Y_vals'] = np.nan if not os.path.isfile(bmd_vals_file_name): bmd_vals.to_csv(bmd_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header bmd_vals.to_csv(bmd_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(dose_response_vals_file_name): dose_response_vals.to_csv(dose_response_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header dose_response_vals.to_csv(dose_response_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(fit_vals_file_name): fit_vals.to_csv(fit_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header fit_vals.to_csv(fit_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') # We can also set the file's metadata via the PdfPages object: d = pdf.infodict() d['Author'] = 'Paritosh Pande' d['CreationDate'] = datetime.datetime.today()
def save_results_good_data_unique_model(test_dose_response, qc_flag, model_preds, selected_models, chemical_id, end_point): # Create the PdfPages object to which we will save the pages: # The with statement makes sure that the PdfPages object is closed properly at # the end of the block, even if an Exception occurs. # Estimate AUC and min and mox doses if (not test_dose_response.empty): dose_response_auc = np.trapz(test_dose_response.num_affected / test_dose_response.total_num, x=test_dose_response.dose) dose_min = min(test_dose_response.dose) dose_max = max(test_dose_response.dose) dose_response_auc_norm = dose_response_auc / (dose_max - dose_min) else: dose_response_auc = np.nan dose_min = np.nan dose_max = np.nan dose_response_auc_norm = np.nan if (not isinstance(chemical_id, str)): chemical_id = str(chemical_id) filename = chemical_id + '_' + end_point + '.pdf' model_preds = model_preds.round(8) # Extract subset of results table model_preds_basic_stats = model_preds[[ 'Model', 'Chi-squared', 'p-val', 'AIC', 'BMD10', 'BMDL10' ]] residual_column_names = [('dose' + str(i)) for i in range(len(test_dose_response['dose']))] model_preds_residuals = pd.DataFrame(columns=['Model'] + residual_column_names) model_preds_residuals['Model'] = model_preds['Model'] model_preds_residuals_matrix = np.empty( (model_preds['Scaled Residuals'].shape[0], len(test_dose_response['dose']))) model_preds_residuals_matrix[:] = np.nan model_preds_residuals[residual_column_names] = model_preds_residuals_matrix for model_pred_index in range(model_preds['Scaled Residuals'].shape[0]): if (not any(np.isnan( model_preds['Scaled Residuals'][model_pred_index]))): if (report): print(f"model_preds_residuals:\n{model_preds_residuals}") print(f"model_pred_index:\n{model_pred_index}") #0 print( f"model_preds['Scaled Residuals'][model_pred_index]:\n{model_preds['Scaled Residuals'][model_pred_index]}" ) #[-0.71275987 -0.04841195 1.2423122 0.22264199 0.0676003 -0.32941879 -1.42086953 1.03044301] print( f"type(model_preds['Scaled Residuals'][model_pred_index]):\n{type(model_preds['Scaled Residuals'][model_pred_index])}" ) #<class 'numpy.ndarray'> print( f"type(model_preds['Scaled Residuals'][model_pred_index].tolist()):\n{type(model_preds['Scaled Residuals'][model_pred_index].tolist())}" ) #<class 'list'> print( f"model_preds['Scaled Residuals'][model_pred_index].tolist():\n{model_preds['Scaled Residuals'][model_pred_index].tolist()}" ) #[-0.712759872754371, -0.048411946176013756, 1.2423122026919409, 0.22264198996743165, 0.06760030169949577, -0.3294187919305739, -1.4208695346666183, 1.0304430051297178] print( f"np.matrix(model_preds['Scaled Residuals'][model_pred_index].tolist()).round(8):\n{np.matrix(model_preds['Scaled Residuals'][model_pred_index].tolist()).round(8)}" ) model_preds_residuals.iloc[model_pred_index, 1:] = np.matrix( model_preds['Scaled Residuals'] [model_pred_index].tolist()).round(8) # Create dictionaries for various flags data_qc_flag_vals = { 0: 'Not enough dose groups for BMD analysis.' + '\n ' + 'BMD analysis not performed.', 1: 'No trend detected in dose-response data.' + '\n' + 'BMD analysis not performed.', 2: 'Dose-response data quality very good.', 3: 'Dose-response data quality good.', 4: 'Data resolution poor. Caution advised.', 5: 'Negative correlation detected in dose-response data.' + '\n' + 'Caution advised.' } bmd_analysis_flag_vals = { 1: 'Convergence not achieved for any dose-response model.', 2: 'Model fit might be unreliable.' + '\n' + 'p-val for chi-squared statistic was < 0.1 for all converged models.', 3: 'A unique model could not be determined.' + '\n' + 'Multiple models had the same AIC and BMD values but no valid BMDL values.', 4: 'Multiple models found.' + '\n' + 'User advised to look at the results of analysis to choose the best model.' } txt_for_model_selection = 'Best model found:' + selected_models[ 'model'].values unique_model_flag_vals = { 0: txt_for_model_selection, 1: 'Best model could not be determined' } bmd_analysis_flag = selected_models['model_select_flag'] unique_model_flag = selected_models['no_unique_model_found_flag'] # Filenames for csv files containing the results of analysis bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv' dose_response_vals_file_name = 'dose_response_vals_' + str( time_now_date) + '.csv' fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv' text_for_report = data_qc_flag_vals[qc_flag] # Generate text for report if ((unique_model_flag == 0) and (bmd_analysis_flag != 2)): text_for_report = text_for_report + '\n' + unique_model_flag_vals[ unique_model_flag] elif ((unique_model_flag == 0) and (bmd_analysis_flag == 2)): text_for_report = text_for_report + '\n' + unique_model_flag_vals[ unique_model_flag] + '\n' + bmd_analysis_flag_vals[ bmd_analysis_flag] else: # Specify reason for non-uniqueness text_for_report = text_for_report + '\n' + unique_model_flag_vals[ unique_model_flag] + '\n' + bmd_analysis_flag_vals[ bmd_analysis_flag] with PdfPages(filename) as pdf: # Output data summary fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') fig.text(0.1, 0.7, ' '.join(map(str, text_for_report)), transform=fig.transFigure, size=10, ha="left") plt.title('Summary of Analysis') pdf.savefig() plt.close() # Print Model Predictions fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') ax.table(cellText=model_preds_basic_stats.values, colLabels=model_preds_basic_stats.columns, loc='center') plt.title('Model Predictions') fig.tight_layout() pdf.savefig() # saves the current figure into a pdf page plt.close() # Print residuals for different models fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') ax.table(cellText=model_preds_residuals.values, colLabels=model_preds_residuals.columns, loc='center') plt.title('Scaled Residuals') fig.tight_layout() pdf.savefig() # saves the current figure into a pdf page plt.close() # Extract data for best model found and save it for portal # and plot fit for selected model model_name = selected_models['model'].values optimized_params = model_preds.loc[model_preds['Model'] == model_name[0], 'Optimized Params'].values[0] CI_bounds = np.zeros([2, len(test_dose_response.dose)]) for index in range(len(test_dose_response.dose)): CI = astrostats.binom_conf_interval( test_dose_response.num_affected[index], test_dose_response.total_num[index], confidence_level=0.95) CI = np.abs(CI - test_dose_response.num_affected[index] / test_dose_response.total_num[index]) CI_bounds[0, index] = CI[0] CI_bounds[1, index] = CI[1] fig, ax = plt.subplots() # Setting the values for all axes. custom_ylim = (0, 1) plt.setp(ax, ylim=custom_ylim) ax.set_xscale("linear") ax.errorbar(test_dose_response.dose, test_dose_response.num_affected / test_dose_response.total_num, CI_bounds, marker='s', mfc='red', fmt='.') ax.set_xlabel('Dose') ax.set_ylabel('Fractional Response') ax.set_title(' '.join( map(str, 'Dose-response with best fit model (' + model_name + ')'))) int_steps = 10 dose_x_vals = gen_uneven_spacing(test_dose_response.dose, int_steps) np.append( dose_x_vals, dose_x_vals[-1] + (dose_x_vals[-1] - dose_x_vals[-2]) / int_steps) if (model_name != 'None'): if (model_name == 'logistic'): ax.plot(dose_x_vals, baf.logistic_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.logistic_fun(dose_x_vals, optimized_params) elif (model_name == 'log_logistic'): ax.plot(dose_x_vals, baf.log_logistic_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.log_logistic_fun(dose_x_vals, optimized_params) elif (model_name == 'gamma'): ax.plot(dose_x_vals, baf.gamma_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.gamma_fun(dose_x_vals, optimized_params) elif (model_name == 'weibull'): ax.plot(dose_x_vals, baf.weibull_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.weibull_fun(dose_x_vals, optimized_params) elif (model_name == 'probit'): ax.plot(dose_x_vals, baf.probit_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.probit_fun(dose_x_vals, optimized_params) elif (model_name == 'log_probit'): ax.plot(dose_x_vals, baf.log_probit_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.log_probit_fun(dose_x_vals, optimized_params) elif (model_name == 'multistage_2'): ax.plot(dose_x_vals, baf.multistage_2_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.multistage_2_fun(dose_x_vals, optimized_params) elif (model_name == 'quantal_linear'): ax.plot(dose_x_vals, baf.quantal_linear_fun(dose_x_vals, optimized_params), 'b-') y_vals = baf.quantal_linear_fun(dose_x_vals, optimized_params) pdf.savefig() # saves the current figure into a pdf page plt.close() # Create dataframes to apprend to write to csv files bmd_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50', 'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag', 'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag' ]) dose_response_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi' ]) #fit_vals = pd.DataFrame(columns = ['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals', 'Y_vals_diff']) fit_vals = pd.DataFrame( columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals']) # Populate dataframes bmd_vals['Chemical_ID'] = [chemical_id] bmd_vals['End_Point'] = [end_point] bmd_vals['Model'] = model_name bmd_vals['BMD10'] = model_preds.loc[model_preds['Model'] == model_name[0], 'BMD10'].values bmd_vals['BMDL'] = model_preds.loc[model_preds['Model'] == model_name[0], 'BMDL10'].values bmd_vals['BMD50'] = model_preds.loc[model_preds['Model'] == model_name[0], 'BMD50'].values bmd_vals['DataQC_Flag'] = qc_flag bmd_vals['AUC'] = dose_response_auc bmd_vals['Min_Dose'] = dose_min bmd_vals['Max_Dose'] = dose_max bmd_vals['AUC_Norm'] = dose_response_auc_norm bmd_vals['BMD_Analysis_Flag'] = bmd_analysis_flag if (model_preds.loc[model_preds['Model'] == model_name[0], 'BMD10'].values < test_dose_response.dose[1]): bmd_vals['BMD10_Flag'] = -1 elif (model_preds.loc[model_preds['Model'] == model_name[0], 'BMD10'].values > test_dose_response.dose.iloc[-1]): bmd_vals['BMD10_Flag'] = 1 else: bmd_vals['BMD10_Flag'] = 0 if (model_preds.loc[model_preds['Model'] == model_name[0], 'BMD50'].values < test_dose_response.dose[1]): bmd_vals['BMD50_Flag'] = -1 elif (model_preds.loc[model_preds['Model'] == model_name[0], 'BMD50'].values > test_dose_response.dose.iloc[-1]): bmd_vals['BMD50_Flag'] = 1 else: bmd_vals['BMD50_Flag'] = 0 dose_response_vals['Chemical_ID'] = [chemical_id] * len( test_dose_response.dose) dose_response_vals['End_Point'] = [end_point] * len( test_dose_response.dose) dose_response_vals['Dose'] = test_dose_response.dose dose_response_vals[ 'Response'] = test_dose_response.num_affected / test_dose_response.total_num dose_response_vals['CI_Lo'] = CI_bounds[0, :] dose_response_vals['CI_Hi'] = CI_bounds[1, :] if (report): print(len(dose_x_vals)) print(len(y_vals)) fit_vals['Chemical_ID'] = [chemical_id] * len(dose_x_vals) fit_vals['End_Point'] = [end_point] * len(dose_x_vals) fit_vals['X_vals'] = dose_x_vals fit_vals['Y_vals'] = y_vals #fit_vals['Y_vals_diff'] = y_vals if not os.path.isfile(bmd_vals_file_name): bmd_vals.to_csv(bmd_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header bmd_vals.to_csv(bmd_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(dose_response_vals_file_name): dose_response_vals.to_csv(dose_response_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header dose_response_vals.to_csv(dose_response_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(fit_vals_file_name): fit_vals.to_csv(fit_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header fit_vals.to_csv(fit_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') # We can also set the file's metadata via the PdfPages object: d = pdf.infodict() d['Author'] = 'Paritosh Pande' d['CreationDate'] = datetime.datetime.today()
edwarf = np.sum((ecodwarfconf.jhu != 0) | (ecodwarfconf.port != 0)) edwarfagn = np.sum(ecodwarfconf['confidence_level'] >= 0) elif ('&' in colname[index]): rdwarf = np.sum((resdwarfconf.jhu != 0) & (resdwarfconf.port != 0)) rdwarfagn = np.sum(resdwarfconf['confidence_level'] == 2) edwarf = np.sum((ecodwarfconf.jhu != 0) & (ecodwarfconf.port != 0)) edwarfagn = np.sum(ecodwarfconf['confidence_level'] == 2) else: rdwarf = np.sum(resdwarfconf[colname[index]] != 0) rdwarfagn = np.sum(resdwarfconf[colname[index]] > 0) edwarf = np.sum(ecodwarfconf[colname[index]] != 0) edwarfagn = np.sum(ecodwarfconf[colname[index]] > 0) rdwarfagnpc = round((100.0 * rdwarfagn / rdwarf), 2) r_edown, r_eup = 100.0 * binom_conf_interval(rdwarfagn, rdwarf) - rdwarfagnpc r_edown = round(-r_edown, 2) r_eup = round(r_eup, 2) edwarfagnpc = round((100.0 * edwarfagn / edwarf), 2) e_edown, e_eup = 100.0 * binom_conf_interval(edwarfagn, edwarf) - edwarfagnpc e_edown = round(-e_edown, 2) e_eup = round(e_eup, 2) print '\t'+index+' & '+ str(rdwarf)+' & ',str(rdwarfagn)+' & $'+str(rdwarfagnpc)+\ '^{+'+str(r_eup)+'}'+'_{'+str(-r_edown)+'}$'+' & '\ + str(edwarf)+' & ',str(edwarfagn)+' & $'+str(edwarfagnpc)+\ '^{+'+str(e_eup)+'}'+'_{'+str(-e_edown)+'}$\\\\' print('\t \hline \n \t \end{tabular} \n \label{table:2} \n \end{table*}')
pcdwarfspringagn = 100.0*dwarfspringagn/totaldwarfagn pcdwarffallagn = 100.0*dwarffallagn/totaldwarfagn print('\n\nDwarfs from RESOLVE Master Catalog') print('Number of Dwarfs: {} \nSpring Dwarfs: {} ({:.2f}% of spring sample) \ \nFall Dwarfs: {} ({:.2f}% of fall sample)' .format(totaldwarf,len(dwarfspring),pcdwarfspring, len(dwarffall),pcdwarffall)) #print('Total: {} \nSpring Dwarf AGN: {} ({:.2f}%) \nFall Dwarf AGN: {} ({:.2f}%)' # .format(totaldwarfagn,dwarfspringagn,pcdwarfspringagn, # dwarffallagn,pcdwarffallagn)) pcspringdwarfagn = 100.0*dwarfspringagn/len(dwarfspring) pcfalldwarfagn = 100.0*dwarffallagn/len(dwarffall) pcresdwarfagn = 100.0*np.sum(dwarfagn)/totaldwarf springlowlim, springuplim = 100*binom_conf_interval(dwarfspringagn,len(dwarfspring)) springup= springuplim -pcspringdwarfagn springlow = springlowlim-pcspringdwarfagn falllowlim, falluplim = 100*binom_conf_interval(dwarffallagn,len(dwarffall)) fallup= falluplim -pcfalldwarfagn falllow = falllowlim-pcfalldwarfagn reslowlim, resuplim = 100*binom_conf_interval(np.sum(dwarfagn),totaldwarf) resup= resuplim -pcresdwarfagn reslow = reslowlim-pcresdwarfagn def pcprint(pc,up,low): pc = str(round(pc,2))+'^{+'+str(round(up,2))+'}_{'+str(round(low,2))+'}\%' display(Math(pc)) display(Math('Dwarf AGN')) display(Math('Spring : '+str(dwarfspringagn)+'/'+str(len(dwarfspring))))
def main(input_files, labels, output, n_bins, threshold, reference): bins, bin_center, bin_widths = make_energy_bins(e_min=0.008 * u.TeV, e_max=200 * u.TeV, bins=n_bins) for input_file, label, color in zip_longest(input_files, labels, color_cycle): if not input_file: break events = read_data(input_file, key='array_events') runs = read_data(input_file, key='runs') mc_production = MCSpectrum.from_cta_runs(runs) if threshold > 0: events = events.loc[events.gamma_prediction_mean >= threshold] energies = events.gamma_energy_prediction_mean.values hist_all = mc_production.expected_events_for_bins(energy_bins=bins) hist_selected, _ = np.histogram(energies, bins=bins) invalid = hist_selected > hist_all hist_selected[invalid] = hist_all[invalid] # use astropy to compute errors on that stuff lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all) # scale confidences to match and split lower_conf = lower_conf upper_conf = upper_conf trigger_probability = (hist_selected / hist_all) # matplotlib wants relative offsets for errors. the conf values are absolute. lower = trigger_probability - lower_conf upper = upper_conf - trigger_probability mask = trigger_probability > 0 plt.errorbar( bin_center.value[mask], trigger_probability[mask], xerr=bin_widths.value[mask] / 2.0, yerr=[lower[mask], upper[mask]], linestyle='', color=color, label=label, ) if reference: df = load_effective_area_requirement() plt.plot(df.energy, df.effective_area, '--', color='gray', label='Prod3b reference') plt.legend() # plt.ylim([100, 1E8]) plt.xscale('log') # plt.yscale('log') plt.xlabel(r'$E_{\mathrm{Reco}} / \mathrm{TeV}$') plt.ylabel('Trigger Probabilty') plt.tight_layout() if output: plt.savefig(output) else: plt.show()
def save_results_poor_data_or_no_convergence(test_dose_response, qc_flag, chemical_id, end_point, selected_models=None): # Create the PdfPages object to which we will save the pages: # The with statement makes sure that the PdfPages object is closed properly at # the end of the block, even if an Exception occurs. #print(test_dose_response) #print(qc_flag) #print(chemical_id) #print(end_point) #print(selected_models) # Estimate AUC and min and mox doses if (not test_dose_response.empty): dose_response_auc = np.trapz(test_dose_response.num_affected / test_dose_response.total_num, x=test_dose_response.dose) dose_min = min(test_dose_response.dose) dose_max = max(test_dose_response.dose) dose_response_auc_norm = dose_response_auc / (dose_max - dose_min) else: dose_response_auc = np.nan dose_min = np.nan dose_max = np.nan dose_response_auc_norm = np.nan if (not isinstance(chemical_id, str)): chemical_id = str(chemical_id) filename = chemical_id + '_' + end_point + '.pdf' # Create dictionaries for various flags data_qc_flag_vals = { 0: 'Not enough dose groups for BMD analysis.' + '\n ' + 'BMD analysis not performed.', 1: 'No trend detected in dose-response data.' + '\n' + 'BMD analysis not performed.', 2: 'Dose-response data quality very good.', 3: 'Dose-response data quality good.', 4: 'Data resolution poor.' + '\n' + 'Caution advised.', 5: 'Negative correlation detected in dose-response data.' + '\n' + 'Caution advised.' } # Filenames for csv files containing the results of analysis bmd_vals_file_name = 'bmd_vals_' + str(time_now_date) + '.csv' dose_response_vals_file_name = 'dose_response_vals_' + str( time_now_date) + '.csv' fit_vals_file_name = 'fit_vals_' + str(time_now_date) + '.csv' # Generate text for report if (selected_models is not None): text_for_report = 'Convergence not achieved for any dose-response model.' else: text_for_report = data_qc_flag_vals[qc_flag] with PdfPages(filename) as pdf: # Output data summary fig, ax = plt.subplots() # hide axes fig.patch.set_visible(False) ax.axis('off') ax.axis('tight') #fig.text(0.1,0.7,' '.join(map(str, text_for_report)), transform=fig.transFigure, size=10, ha="left") fig.text(0.1, 0.7, text_for_report, transform=fig.transFigure, size=10, ha="left") plt.title('Summary of Analysis') pdf.savefig() plt.close() # Plot dose-response data CI_bounds = np.zeros([2, len(test_dose_response.dose)]) # in save_results_poor_data_or_no_convergence fn for index in range(len(test_dose_response.dose)): print( f"test_dose_response.num_affected[index]:{test_dose_response.num_affected[index]}" ) print( f"test_dose_response.total_num[index]:{test_dose_response.total_num[index]}" ) CI = astrostats.binom_conf_interval( test_dose_response.num_affected[index], test_dose_response.total_num[index], confidence_level=0.95) CI = np.abs(CI - test_dose_response.num_affected[index] / test_dose_response.total_num[index]) CI_bounds[0, index] = CI[0] CI_bounds[1, index] = CI[1] fig, ax = plt.subplots() # Setting the values for all axes. custom_ylim = (0, 1) plt.setp(ax, ylim=custom_ylim) ax.set_xscale("linear") ax.errorbar(test_dose_response.dose, test_dose_response.num_affected / test_dose_response.total_num, CI_bounds, marker='s', mfc='red', fmt='.') ax.set_xlabel('Dose') ax.set_ylabel('Fractional Response') ax.set_title('Dose-response Data') pdf.savefig() # saves the current figure into a pdf page plt.close() # Create dataframes to apprend to write to csv files bmd_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Model', 'BMD10', 'BMDL', 'BMD50', 'AUC', 'Min_Dose', 'Max_Dose', 'AUC_Norm', 'DataQC_Flag', 'BMD_Analysis_Flag', 'BMD10_Flag', 'BMD50_Flag' ]) dose_response_vals = pd.DataFrame(columns=[ 'Chemical_ID', 'End_Point', 'Dose', 'Response', 'CI_Lo', 'CI_Hi' ]) fit_vals = pd.DataFrame( columns=['Chemical_ID', 'End_Point', 'X_vals', 'Y_vals']) # Populate dataframes bmd_vals['Chemical_ID'] = [chemical_id] bmd_vals['End_Point'] = [end_point] bmd_vals['Model'] = np.nan bmd_vals['BMD10'] = np.nan bmd_vals['BMDL'] = np.nan bmd_vals['BMD50'] = np.nan bmd_vals['DataQC_Flag'] = qc_flag bmd_vals['AUC'] = dose_response_auc bmd_vals['Min_Dose'] = dose_min bmd_vals['Max_Dose'] = dose_max bmd_vals['AUC_Norm'] = dose_response_auc_norm bmd_vals['BMD_Analysis_Flag'] = np.nan bmd_vals['BMD10_Flag'] = np.nan bmd_vals['BMD50_Flag'] = np.nan assign_nan = False try: # 53_ANY24 bogus = test_dose_response.dose[0] #print ("test_dose_response.dose[0]:"+str(test_dose_response.dose[0])) except: # 1532_ANY24 assign_nan = True # print ("test_dose_response.dose:"+str(test_dose_response.dose)) # Series([], Name: dose, dtype: object) if (assign_nan): dose_response_vals['Chemical_ID'] = [chemical_id] dose_response_vals['End_Point'] = [end_point] dose_response_vals['Dose'] = np.nan dose_response_vals['Response'] = np.nan dose_response_vals['CI_Lo'] = np.nan dose_response_vals['CI_Hi'] = np.nan else: dose_response_vals['Chemical_ID'] = [chemical_id] * len( test_dose_response.dose) dose_response_vals['End_Point'] = [end_point] * len( test_dose_response.dose) dose_response_vals['Dose'] = test_dose_response.dose dose_response_vals[ 'Response'] = test_dose_response.num_affected / test_dose_response.total_num dose_response_vals['CI_Lo'] = CI_bounds[0, :] dose_response_vals['CI_Hi'] = CI_bounds[1, :] fit_vals['Chemical_ID'] = [chemical_id] fit_vals['End_Point'] = [end_point] fit_vals['X_vals'] = np.nan fit_vals['Y_vals'] = np.nan if not os.path.isfile(bmd_vals_file_name): bmd_vals.to_csv(bmd_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header bmd_vals.to_csv(bmd_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(dose_response_vals_file_name): dose_response_vals.to_csv(dose_response_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header dose_response_vals.to_csv(dose_response_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') if not os.path.isfile(fit_vals_file_name): fit_vals.to_csv(fit_vals_file_name, header='column_names', index=False, na_rep='NULL') else: # else it exists so append without writing the header fit_vals.to_csv(fit_vals_file_name, mode='a', header=False, index=False, na_rep='NULL') # We can also set the file's metadata via the PdfPages object: d = pdf.infodict() d['Author'] = 'Paritosh Pande' d['CreationDate'] = datetime.datetime.today()
def main(input_file, output, cuts_path, reference, cmap): bins, bin_center, bin_widths = make_default_cta_binning(e_min=0.005 * u.TeV, bins_per_decade=15) gammas, _, _ = load_signal_events( input_file, calculate_weights=False, ) gammas.dropna(inplace=True) sigma = 1 gammas = apply_cuts(gammas, cuts_path=cuts_path, theta_cuts=True, sigma=sigma) runs = load_runs(input_file) mc_production = MCSpectrum.from_cta_runs(runs) data_description = load_data_description(input_file, gammas, cuts_path=cuts_path) gammas_energy = gammas.mc_energy.values hist_all = mc_production.expected_events_for_bins(energy_bins=bins) hist_selected, _ = np.histogram(gammas_energy, bins=bins) invalid = hist_selected > hist_all hist_selected[invalid] = hist_all[invalid] # use astropy to compute errors on that stuff lower_conf, upper_conf = binom_conf_interval(hist_selected, hist_all, conf=0.95) # scale confidences to match and split lower_conf = lower_conf * mc_production.generation_area upper_conf = upper_conf * mc_production.generation_area area = (hist_selected / hist_all) * mc_production.generation_area # matplotlib wants relative offsets for errors. the conf values are absolute. lower = area - lower_conf upper = upper_conf - area mask = area > 0 color = None if cuts_path: f_prediction = prediction_function(cuts_path, sigma=0) colormap = cm.get_cmap(cmap, 512) color = colormap(f_prediction(bin_center.value[mask])) sm = cm.ScalarMappable(cmap=colormap) plt.colorbar(sm, label='Prediction Threshold', pad=0.01) plt.errorbar( bin_center.value[mask], area.value[mask], xerr=bin_widths.value[mask] / 2.0, yerr=[lower.value[mask], upper.value[mask]], linestyle='', color=color if color is not None else next(color_cycle), # label='Effective Area' ) if reference: df = load_effective_area_reference() plt.plot(df.energy, df.effective_area, '--', color='gray', label='Reference') legend = plt.legend(framealpha=0, loc='upper left', handletextpad=1) # renderer = plt.gcf().canvas.get_renderer() # shift = max([t.get_window_extent(renderer).width for t in legend.get_texts()]) for t in legend.get_texts(): # print(t, shift) t.set_multialignment('right') # t.set_ha('left') # ha is alias for horizontalalignment # t.set_position((shift,0)) legend.set_title(data_description) legend._legend_box.align = "left" legend.get_title().set_alpha(0.5) plt.ylim([800, 0.5E8]) plt.xscale('log') plt.yscale('log') plt.xlabel('True Energy / TeV') plt.ylabel('Effective Area / $\\text{m}^2$') plt.tight_layout(pad=0, rect=(0.001, 0, 1.041, 0.99)) if output: plt.savefig(output) else: plt.show()
def process(self, ih, user_var, fix_thresh=False, method='quick', include=True): """Calculate the statistics from the current histogram. Keyword arguments: ih: an instance of the image_handler Analysis class, generates the histogram user_var: the user variable associated with this calculation fix_thresh: True - keep old threshold value, False - update the threshold value method: 'quick' - image_handler uses a peak finding algorithm 'double gaussian' - fit a double Guassian function 'separate gaussians' - split the histogram at the threshold and fit Gaussians 'double poissonian' - fit a double Poissonian function 'single gaussian' - fit a single Gaussian to background peak include: whether to include the values in further analysis. """ if ih.ind > 0: # only update if a histogram exists if fix_thresh: # using manual threshold bins, occ, thresh = ih.histogram( ) # update hist and peak stats, keep thresh else: bins, occ, thresh = ih.hist_and_thresh( ) # update hist and get peak stats bin_mid = (bins[1] - bins[0]) * 0.5 # from edge of bin to middle self.bf = fc.fit(bins[:-1] + bin_mid, occ) # class for fitting function to data try: int(np.log(thresh)) # don't do anything if threshold is < 1 ih.est_peaks(bins, occ) # use find_peaks to get first estimate except (ValueError, OverflowError): return 0 if method == 'quick': A0, A1 = ih.peak_heights mu0, mu1 = ih.peak_centre sig0, sig1 = ih.peak_widths elif method == 'double gaussian': # parameters: Total num images, loading prob, centre, s.d., centre, s.d. self.bf.p0 = [ ih.ind, 0.6, ih.peak_centre[0], ih.peak_widths[0], ih.peak_centre[1], ih.peak_widths[1] ] try: if fix_thresh: # bound the lower peak to below threshold self.bf.getBestFit( self.bf.double_gauss, bounds=(np.array([0, 0, 0, 0, ih.thresh, 0]), np.array([ np.inf, 1, ih.thresh, np.inf, np.inf, np.inf ]))) else: # get unbounded best fit parameters self.bf.getBestFit(self.bf.double_gauss) except: return 0 # fit failed, do nothing if self.bf.ps[1] < self.bf.ps[4]: N, A1, mu0, sig0, mu1, sig1 = self.bf.ps else: N, A1, mu1, sig1, mu0, sig0 = self.bf.ps A0, A1 = N * (1 - A1), N * A1 elif method == 'separate gaussians': # separate Gaussian fit for bg/signal diff = abs(bins - thresh) # minimum is at the threshold thresh_i = np.argmin(diff) # index of the threshold # split the histogram at the threshold value best_fits = [ fc.fit(bins[:thresh_i] + bin_mid, occ[:thresh_i]), fc.fit(bins[thresh_i:-1] + bin_mid, occ[thresh_i:]) ] for b in best_fits: try: b.estGaussParam() # get estimate of parameters b.getBestFit(b.gauss) # get best fit parameters except: return 0 A0, mu0, sig0 = best_fits[0].ps A1, mu1, sig1 = best_fits[1].ps self.bf.p0 = [ A0 + A1, 1 - A0 / (A0 + A1), mu0, sig0, mu1, sig1 ] self.bf.ps = [ A0 + A1, 1 - A0 / (A0 + A1), mu0, sig0, mu1, sig1 ] self.bf.bffunc = self.bf.double_gauss # plot as double gaussian for consistency elif method == 'double poissonian': self.bf.p0 = [ ih.peak_heights[0], ih.peak_centre[0], ih.peak_heights[1], ih.peak_centre[1] ] try: # parameters are: mean, amplitude self.bf.getBestFit(self.bf.double_poisson) except: return 0 A0, mu0, A1, mu1 = self.bf.ps sig0, sig1 = np.sqrt(mu0), np.sqrt(mu1) elif method == 'single gaussian': try: self.bf.estGaussParam() self.bf.getBestFit( self.bf.gauss) # get best fit parameters except: return 0 # fit failed, do nothing A0, mu0, sig0 = self.bf.ps A1, mu1, sig1 = 0, 0, 0 fix_thresh = True ih.thresh = max(bins) # set the threshold above the counts try: list(map( int, [A0, A1, mu0, mu1, sig0, sig1])) # check for NaN or inf except (ValueError, OverflowError): return 0 ih.peak_heights = [A0, A1] ih.peak_centre = [mu0, mu1] ih.peak_widths = [sig0, sig1] if self.bf.rchisq and abs(self.bf.rchisq) > 1e9: include = False # bad fit # update threshold to where fidelity is maximum if not set by user if fix_thresh: ih.fidelity, ih.err_fidelity = np.around(ih.get_fidelity(), 4) # round to 4 d.p. else: ih.hist_and_thresh() # update atom statistics ih.stats['Atom detected'] = [ count // ih.thresh for count in ih.stats['Counts'] ] above_idxs = np.where(np.array(ih.stats['Atom detected']) > 0)[ 0] # index of images with counts above threshold atom_count = np.size( above_idxs) # number of images with counts above threshold above = np.array( ih.stats['Counts'])[above_idxs] # counts above threshold below_idxs = np.where(np.array(ih.stats['Atom detected']) <= 0)[ 0] # index of images with counts below threshold empty_count = np.size( below_idxs) # number of images with counts below threshold below = np.array( ih.stats['Counts'])[below_idxs] # counts below threshold # use the binomial distribution to get 1 sigma confidence intervals: conf = binom_conf_interval(atom_count, atom_count + empty_count, interval='jeffreys') loading_prob = atom_count / ih.ind # fraction of images above threshold uplperr = conf[1] - loading_prob # 1 sigma confidence above mean lolperr = loading_prob - conf[0] # 1 sigma confidence below mean # store the calculated histogram statistics as temp self.temp_vals['File ID'] = int(self.ind) self.temp_vals['Start file #'] = min(ih.stats['File ID']) self.temp_vals['End file #'] = max(ih.stats['File ID']) self.temp_vals['ROI xc ; yc ; size'] = ' ; '.join( list(map(str, [ih.xc, ih.yc, ih.roi_size]))) self.temp_vals['User variable'] = self.types['User variable']( user_var) if user_var else 0.0 self.temp_vals['Number of images processed'] = ih.ind self.temp_vals['Counts above : below threshold'] = str( atom_count) + ' : ' + str(empty_count) self.temp_vals['Loading probability'] = np.around(loading_prob, 4) self.temp_vals['Error in Loading probability'] = np.around( (uplperr + lolperr) * 0.5, 4) self.temp_vals['Lower Error in Loading probability'] = np.around( lolperr, 4) self.temp_vals['Upper Error in Loading probability'] = np.around( uplperr, 4) try: 1 // empty_count # raises ZeroDivisionError if size is 0 1 // (empty_count - 1) # for std dev need size > 1 self.temp_vals['Background peak count'] = int(mu0) # assume bias offset is self.bias, readout noise Nr var = ih.roi_size * self.Nr**2 + self.dg * self.emg * mu0 / self.pag if var > 0: self.temp_vals['sqrt(Nr^2 + Nbg*fg/A)'] = int(var**0.5) else: # don't take the sqrt of a -ve number self.temp_vals['sqrt(Nr^2 + Nbg*fg/A)'] = 0 self.temp_vals['Background peak width'] = int(sig0) self.temp_vals['Error in Background peak count'] = np.around( sig0 / empty_count**0.5, 2) self.temp_vals['Background mean'] = np.around( np.mean(below), 1) self.temp_vals['Background standard deviation'] = np.around( np.std(below, ddof=1), 1) except ZeroDivisionError: for key in [ 'Background peak count', 'sqrt(Nr^2 + Nbg*fg/A)', 'Background peak width', 'Error in Background peak count' ]: self.temp_vals[key] = 0 try: 1 // atom_count # raises ZeroDivisionError if size is 0 1 // (atom_count - 1) # for std dev need size > 1 self.temp_vals['Signal peak count'] = int(mu1) # assume bias offset is self.bias, readout noise Nr var = ih.roi_size * self.Nr**2 + self.dg * self.emg * mu1 / self.pag if var > 0: self.temp_vals['sqrt(Nr^2 + Ns*fg/A)'] = int(var**0.5) else: # don't take the sqrt of a -ve number self.temp_vals['sqrt(Nr^2 + Ns*fg/A)'] = 0 self.temp_vals['Signal peak width'] = int(sig1) self.temp_vals['Error in Signal peak count'] = np.around( sig1 / atom_count**0.5, 2) self.temp_vals['Signal mean'] = np.around(np.mean(above), 1) self.temp_vals['Signal standard deviation'] = np.around( np.std(above, ddof=1), 1) sep = mu1 - mu0 # separation of fitted peaks self.temp_vals['Separation'] = int(sep) seperr = np.sqrt(sig0**2 / empty_count + sig1**2 / atom_count) self.temp_vals['Error in Separation'] = np.around(seperr, 2) self.temp_vals['Fidelity'] = ih.fidelity self.temp_vals['Error in Fidelity'] = ih.err_fidelity self.temp_vals['S/N'] = np.around( sep / np.sqrt(sig0**2 + sig1**2), 2) # fractional error in the error is 1/sqrt(2N - 2) self.temp_vals['Error in S/N'] = np.around( self.temp_vals['S/N'] * np.sqrt((seperr / sep)**2 + (sig0**2 / (2 * empty_count - 2) + sig1**2 / (2 * atom_count - 2)) / (sig0**2 + sig1**2)), 2) self.temp_vals['Include'] = include except ZeroDivisionError: for key in [ 'Signal peak count', 'sqrt(Nr^2 + Ns*fg/A)', 'Signal peak width', 'Error in Signal peak count', 'Separation', 'Error in Separation', 'Fidelity', 'Error in Fidelity', 'S/N', 'Error in S/N', 'Include' ]: self.temp_vals[key] = 0 self.temp_vals['Threshold'] = int(ih.thresh) return 1 # fit successful
def psychFit(deltaBins, numR, numL, choices): """ Get psychometric curve fit from # of cues to Right & Left side and choice made by subject (Evidence vs % Choice Left) """ numRight = np.zeros(len(deltaBins)) numTrials = np.zeros(len(deltaBins)) trialDelta = np.zeros(len(deltaBins)) phat = np.zeros(len(deltaBins)) pci = np.zeros((2, len(deltaBins))) # Evidence variable nCues_RminusL = numR - numL # Correct deltaBin & trialBin to produce same result as Matlab psychFit deltaBins_search = deltaBins.astype(float) - 1.5 trialBin = np.searchsorted(deltaBins_search, nCues_RminusL, side='right') trialBin -= 1 # Put into evidence bins all Trials with corresponding choices for iTrial in range(len(choices)): numTrials[trialBin[iTrial]] = numTrials[trialBin[iTrial]] + 1 if choices[iTrial] == 2: numRight[trialBin[iTrial]] = numRight[trialBin[iTrial]] + 1 trialDelta[trialBin[iTrial]] = trialDelta[ trialBin[iTrial]] + nCues_RminusL[iTrial] with np.errstate(divide='ignore', invalid='ignore'): trialDelta = np.true_divide(trialDelta, numTrials) # Select only bins with trials idx_zero = numTrials == 0 numTrials_nz = numTrials[~idx_zero] numRight_nz = numRight[~idx_zero] # (Binomial proportion confidence interval given k successes, n trials) phat_nz = binom_conf_interval(numRight_nz, numTrials_nz, confidence_level=0, interval='jeffreys') pci_nz = binom_conf_interval(numRight_nz, numTrials_nz, confidence_level=1 - 0.1587, interval='jeffreys') # Correct confidence intervals and expected outcomes for bins with no trials (ci = [0 1], hat = 0.5) phat_nz = phat_nz[0] phat[~idx_zero] = phat_nz phat[idx_zero] = 0.5 pci[0][~idx_zero] = pci_nz[0] pci[0][idx_zero] = 0 pci[1][~idx_zero] = pci_nz[1] pci[1][idx_zero] = 1 # (Logistic function fit) only valid if we have at least 5 bins with trials if np.count_nonzero(~idx_zero) < 5: is_there_psychometric = False else: is_there_psychometric = True # Get weight matrix to "reproduce" Matlab fit # https://stackoverflow.com/questions/58983113/scipy-curve-fit-vs-matlab-fit-weighted-nonlinear-least-squares # matlab -> 'Weights' , ((pci(sel,2) - pci(sel,1))/2).^-2 # python -> sigma = diagonal_matrix(1/weights) weight_array = np.power((pci[1][~idx_zero] - pci[0][~idx_zero]) / 2, 2) sigma_fit = np.diag(weight_array) psychometric, pcov = curve_fit(psychometrics_function, deltaBins[~idx_zero], phat[~idx_zero], \ p0=(0, 1, 3, 0), sigma=sigma_fit, maxfev=40000) # Append a row of nans to confidence intervals . whyy ?? aux_vec = np.empty((1, pci.shape[1])) aux_vec[:] = np.nan pci = np.vstack((pci, aux_vec)) # x vector for plotting delta = np.linspace(deltaBins[0] - 2, deltaBins[-1] + 2, num=50) # Repeat trialDelta 3 times for errorX why ?? errorX = np.tile(trialDelta[~idx_zero], 3) # Confidence intervals are errorY, as a vector errorY = np.stack(pci[:, ~idx_zero]) errorY = errorY.flatten() # Fill dictionary of results fit_results = dict() fit_results['delta_bins'] = deltaBins[~idx_zero] fit_results['delta_data'] = trialDelta[~idx_zero] fit_results['pright_data'] = 100 * phat[~idx_zero] fit_results['delta_error'] = errorX fit_results['pright_error'] = 100 * errorY if is_there_psychometric: fit_results['delta_fit'] = delta fit_results['pright_fit'] = psychometrics_function( delta, *psychometric) * 100 else: fit_results['delta_fit'] = np.empty([0]) fit_results['pright_fit'] = np.empty([0]) return fit_results