def DST(dst_filenames_path): dst = load_dsts(dst_filenames_path, "DST", "Events") dst_time = dst.sort_values('event') T = dst_time.time.values DT = time_delta_from_time(T) kge = kr_event(dst, DT, dst.S2e, dst.S2q) return dst, DT, kge
def load_data(input_path : str , input_dsts : str , file_bootstrap_map : str , ref_histo_file : str , key_Z_histo : str , quality_ranges : dict ) -> Tuple[pd.DataFrame, ASectorMap , ref_hist_container]: """ Reads kdst files and applies basic R cut. Outputs kdst as pd.DataFrame, bootstrap map, and reference histograms Parameters ---------- input_path : str Path to the input map_folder input_dsts : str Name criteria for the dst to be read file_bootstrap_map : str Path to the bootstrap map file ref_Z_histo_file : str Path to the reference histogram file quality_ranges : dict Dictionary containing ranges for the quality cuts Returns ---------- dst_filtered : pd.DataFrame Dst containing all the events once filtered bootstrap_map : ASectorMap Bootstrap map ref_hist_container : ref_hist_container To be completed """ input_path = os.path.expandvars(input_path) dst_files = glob.glob(input_path + input_dsts) dst_full = load_dsts(dst_files, "DST", "Events") dst_full = dst_full.sort_values(by=['time']) mask_quality = quality_cut(dst_full, **quality_ranges) dst_filtered = dst_full[mask_quality] file_bootstrap_map = os.path.expandvars(file_bootstrap_map) bootstrap_map = read_maps(file_bootstrap_map) ref_histo_file = os.path.expandvars(ref_histo_file) z_pd = pd.read_hdf(ref_histo_file, key=key_Z_histo) z_histo = ref_hist(bin_centres = z_pd.bin_centres, bin_entries = z_pd.bin_entries, err_bin_entries = z_pd.err_bin_entries) ref_histos = ref_hist_container(Z_dist_hist = z_histo) return dst_filtered, bootstrap_map, ref_histos
def compare_mc(): """ Looks at MC and data for the PMTs and checks scale levels at the level of individual PMTs and the sum. Attempt to check values for relative scaling for PMTs. run as python pmtCompMCData.py <MC data file base> <Data data file base> """ mc_file_base = sys.argv[1] da_file_base = sys.argv[2] dst_mc_base = sys.argv[3] dst_da_base = sys.argv[4] run_number = da_file_base[da_file_base.find('/r') + 2:da_file_base.find('/r') + 6] mc_sorter = sorter_func(mc_file_base) mc_file_list = sorted(glob(mc_file_base + '*.h5'), key=mc_sorter) da_sorter = sorter_func(da_file_base) da_file_list = sorted(glob(da_file_base + '*.h5'), key=da_sorter) mc_hit = load_dsts(glob(dst_mc_base + '*.h5'), 'RECO', 'Events') da_hit = load_dsts(glob(dst_da_base + '*.h5'), 'RECO', 'Events') dfcols = [ 'wf_sum', 'p0', 'cp0', 'p1', 'cp1', 'p2', 'cp2', 'p3', 'cp3', 'p4', 'cp4', 'p5', 'cp5', 'p6', 'cp6', 'p7', 'cp7', 'p8', 'cp8', 'p9', 'cp9', 'p10', 'cp10', 'p11', 'cp11' ] pmt_scales = [ 1, 0.79, 1, 0.80, 0.72, 1.11, 1.03, 0.82, 0.82, 1.03, 0.89, 0.95, 0.82 ] mc_sums = pd.DataFrame(columns=dfcols) for fn in mc_file_list: print('Reading mc file ', fn) pmaps = load_pmaps(fn) print('...data got') for evt, pmap in pmaps.items(): if len(pmap.s2s) == 1 and len(pmap.s1s) == 1: try: mc_hit[mc_hit.event == evt].X.values[0] hx = mc_hit[mc_hit.event == evt].X.values hy = mc_hit[mc_hit.event == evt].Y.values hz = mc_hit[mc_hit.event == evt].Z.values hq = mc_hit[mc_hit.event == evt].Q.values #for s2 in pmap.s2s: s2 = pmap.s2s[0] rs2 = pmf.rebin_peak(s2, 2) p_z = (rs2.times - pmap.s1s[0].time_at_max_energy) / 1000 #if hz.shape[0] == len(rs2.times): new_row = [s2.pmts.waveform(x).sum() for x in range(12)] cn_row = [ life_correction(hx, hy, hz, hq, p_z, rs2.pmts.waveform(x)) for x in range(12) ] new_row = np.column_stack((new_row, cn_row)).flatten() ## new_row.insert(0, s2.total_energy) new_row = np.insert(new_row, 0, s2.total_energy) mc_sums.loc[len(mc_sums)] = list(new_row) except IndexError: continue da_sums = pd.DataFrame(columns=dfcols) for fn in da_file_list: print('Reading data file ', fn) pmaps = load_pmaps(fn) print('...data got') for evt, pmap in pmaps.items(): if len(pmap.s2s) == 1 and len(pmap.s1s) == 1: try: da_hit[da_hit.event == evt].X.values[0] hx = da_hit[da_hit.event == evt].X.values hy = da_hit[da_hit.event == evt].Y.values hz = da_hit[da_hit.event == evt].Z.values hq = da_hit[da_hit.event == evt].Q.values #for s2 in pmap.s2s: s2 = pmap.s2s[0] rs2 = pmf.rebin_peak(s2, 1) p_z = (rs2.times - pmap.s1s[0].time_at_max_energy) / 1000 #print('Check: ', hz.shape[0], len(rs2.times)) #if hz.shape[0] == len(rs2.times): new_row = [s2.pmts.waveform(x).sum() for x in range(12)] cn_row = [ life_correction(hx, hy, hz, hq, p_z, rs2.pmts.waveform(x)) for x in range(12) ] new_row = np.column_stack((new_row, cn_row)).flatten() #new_row.insert(0, s2.total_energy) new_row = np.insert(new_row, 0, s2.total_energy) da_sums.loc[len(da_sums)] = list(new_row) except IndexError: continue trg0 = mc_sums['p0'] * pmt_scales[1] > 8835 trg2 = mc_sums['p2'] * pmt_scales[3] > 7836 ## Make some plots mc_sums[trg0 & trg2].wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='MC', density=True, histtype='step') da_sums.wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='data', density=True, histtype='step') plt.title('PMT sum') plt.xlabel('Summed PMT charge (pe)') plt.yscale('log') plt.show() ## Attempt big fit. (only lifetime corrected [1::2] done in function) efunc = general_chi2(mc_sums.drop('wf_sum', axis=1).values.T) ## full_dats = np.apply_along_axis(np.histogram, 1, ## da_sums.drop('wf_sum', axis=1).values.T, ## bins=np.linspace(0, 120000, 100), ## density=True)[:, 0] full_dats = np.apply_along_axis(np.histogram, 1, da_sums.drop('wf_sum', axis=1).values.T[1::2], bins=np.linspace(0, 120000, 100))[:, 0] dat_norms = np.fromiter((s.sum() for s in full_dats), np.int) full_dats = np.concatenate(full_dats) errs = np.sqrt(full_dats) errs[errs <= 0] = 3 par_seed = pmt_scales[1:] pfit, cov, infodict, msg, ier = leastsq(efunc, par_seed, args=(full_dats, errs, dat_norms), full_output=True) print('Fit res: ', pfit, ier, infodict, msg) trg0 = mc_sums['p0'] * pfit[1] * pfit[0] > 8835 trg2 = mc_sums['p2'] * pfit[1] * pfit[2] > 7836 fig, axes = plt.subplots(nrows=3, ncols=4, figsize=(20, 6)) #mc_sums['new_sum'] = mc_sums.drop('wf_sum', axis=1).sum(axis=1) mc_sums['new_sum'] = mc_sums[dfcols[1::2]].multiply(pfit).sum(axis=1) mc_sums['new_csum'] = mc_sums[dfcols[2::2]].multiply(pfit).sum(axis=1) da_sums['csum'] = da_sums[dfcols[2::2]].sum(axis=1) for cname, ax, p in zip(dfcols[2::2], axes.flatten(), pfit): ax.set_title('PMT ' + cname[2:] + ' pe distribution') ax.set_xlabel('Photoelectrons') ax.set_ylabel('AU') mc_sums[trg0 & trg2][cname].multiply(p).plot.hist(ax=ax, bins=np.linspace( 0, 120000, 100), label='MC', density=True, histtype='step') da_sums[cname].plot.hist(ax=ax, bins=np.linspace(0, 120000, 100), label='data', density=True, histtype='step') ## if 'p1' == cname: ## mc_vals = mc_sums[trg0 & trg2][cname].values ## da_vals = da_sums[cname].values ## ffunc = simple_pmt1_fit(mc_vals) ## dcv, hbins = np.histogram(da_vals, density=True, ## bins=np.linspace(0, 120000, 100)) ## hbins = shift_to_bin_centers(hbins) ## errs = np.sqrt(dcv) ## errs[errs==0] = 3 ## fvals = fitf.fit(ffunc, hbins, dcv, seed=(1), sigma=errs) ## ax.plot(hbins, fvals.fn(hbins), label='fit attempt') ## print('fit result: ', fvals.values, fvals.errors) ax.legend() plt.tight_layout() fig.show() plt.show() mc_sums[trg0 & trg2].new_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='MC', density=True, histtype='step') da_sums.wf_sum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='data', density=True, histtype='step') plt.title('PMT sum') plt.xlabel('Summed PMT charge (pe)') plt.yscale('log') plt.show() mc_sums[trg0 & trg2].new_csum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='MC', density=True, histtype='step') da_sums.csum.plot.hist(bins=np.linspace(0, 1.2e6, 100), label='data', density=True, histtype='step') plt.title('PMT sum') plt.xlabel('Summed PMT charge (pe)') plt.yscale('log') plt.show()
def read_dsts(path_to_dsts): filenames = glob.glob(path_to_dsts + '/*') dstdf = load_dsts(filenames, group='DST', node='Events') nsdf = load_dsts(filenames, group='Extra', node='nS12') return nsdf, dstdf
def load_data(fout, dir_in, run): """ Input path to all dst files Return a merged dst ------ Note: In the kdst there are duplicated events because of the multiplicity from both s1 and s2 Note two: dst.event.nunique returns int. (Panda.Series.nunique) if used in Panda.Dataframe.nunique returns Series with unique entries """ path = dir_in + '/' + run + '/kdst/' files_all = [path + f for f in os.listdir(path) \ if os.path.isfile( os.path.join(path, f) )] dst = load_dsts(files_all, "DST", "Events") time_run = dst.time.mean() # count number of number of unique entries unique_events = ~dst.event.duplicated() #unique_events = dst.event.nunique() nunique_events = dst.event.nunique() #print(nunique_events) num_of_S2s = np.size (unique_events) num_of_evts = np.count_nonzero(unique_events) print(num_of_evts) fout.write(f"dst_entries {str(len(dst))}\n") fout.write(f"time_run {time_run}\n") fout.write(f"s2_tot {num_of_S2s}\n") fout.write(f"evt_tot {num_of_evts}\n") # compute number of s1 and s2 df = dst[~dst.time.duplicated()] tot_ev = df.event.nunique() s1_num = df.nS1.values s2_num = df.nS2.values fout.write(f"num_of_ev_check {tot_ev}\n") s1_0 = np.count_nonzero(s1_num == 0) s1_1 = np.count_nonzero(s1_num == 1) s1_2 = np.count_nonzero(s1_num == 2) s1_3 = np.count_nonzero(s1_num == 3) s1_4 = np.count_nonzero(s1_num == 4) s1_5 = np.count_nonzero(s1_num == 5) s1_6 = np.count_nonzero(s1_num == 6) s2_0 = np.count_nonzero(s2_num == 0) s2_1 = np.count_nonzero(s2_num == 1) s2_2 = np.count_nonzero(s2_num == 2) s2_3 = np.count_nonzero(s2_num == 3) s2_4 = np.count_nonzero(s2_num == 4) s2_5 = np.count_nonzero(s2_num == 5) s2_6 = np.count_nonzero(s2_num == 6) s2_7 = np.count_nonzero(s2_num == 7) s2_8 = np.count_nonzero(s2_num == 8) fout.write(f'eff_0s1 {s1_0 /tot_ev*100:.5f}\n') fout.write(f'eff_0s1_u {error_eff(tot_ev, s1_0 /tot_ev)*100:.5f}\n') fout.write(f'eff_1s1 {s1_1 /tot_ev*100:.5f}\n') fout.write(f'eff_1s1_u {error_eff(tot_ev, s1_1 /tot_ev)*100:.5f}\n') fout.write(f'eff_2s1 {s1_2 /tot_ev*100:.5f}\n') fout.write(f'eff_2s1_u {error_eff(tot_ev, s1_2 /tot_ev)*100:.5f}\n') fout.write(f'eff_3s1 {s1_3 /tot_ev*100:.5f}\n') fout.write(f'eff_3s1_u {error_eff(tot_ev, s1_3 /tot_ev)*100:.5f}\n') fout.write(f'eff_4s1 {s1_4 /tot_ev*100:.5f}\n') fout.write(f'eff_4s1_u {error_eff(tot_ev, s1_4 /tot_ev)*100:.5f}\n') fout.write(f'eff_5s1 {s1_5 /tot_ev*100:.5f}\n') fout.write(f'eff_5s1_u {error_eff(tot_ev, s1_5 /tot_ev)*100:.5f}\n') fout.write(f'eff_6s1 {s1_6 /tot_ev*100:.5f}\n') fout.write(f'eff_6s1_u {error_eff(tot_ev, s1_6 /tot_ev)*100:.5f}\n') # s2 eff fout.write(f'eff_0s2 {s2_0 /tot_ev*100:.5f}\n') fout.write(f'eff_0s2_u {error_eff(tot_ev, s2_0/tot_ev)*100:.5f}\n') fout.write(f'eff_1s2 {s2_1 /tot_ev*100:.5f}\n') fout.write(f'eff_1s2_u {error_eff(tot_ev, s2_1/tot_ev)*100:.5f}\n') fout.write(f'eff_2s2 {s2_2 /tot_ev*100:.5f}\n') fout.write(f'eff_2s2_u {error_eff(tot_ev, s2_2/tot_ev)*100:.5f}\n') fout.write(f'eff_3s2 {s2_3 /tot_ev*100:.5f}\n') fout.write(f'eff_3s2_u {error_eff(tot_ev, s2_3/tot_ev)*100:.5f}\n') fout.write(f'eff_4s2 {s2_4 /tot_ev*100:.5f}\n') fout.write(f'eff_4s2_u {error_eff(tot_ev, s2_4/tot_ev)*100:.5f}\n') fout.write(f'eff_5s2 {s2_5 /tot_ev*100:.5f}\n') fout.write(f'eff_5s2_u {error_eff(tot_ev, s2_5/tot_ev)*100:.5f}\n') fout.write(f'eff_6s2 {s2_6 /tot_ev*100:.5f}\n') fout.write(f'eff_6s2_u {error_eff(tot_ev, s2_6/tot_ev)*100:.5f}\n') fout.write(f'eff_7s2 {s2_7 /tot_ev*100:.5f}\n') fout.write(f'eff_7s2_u {error_eff(tot_ev, s2_7/tot_ev)*100:.5f}\n') fout.write(f'eff_8s2 {s2_8 /tot_ev*100:.5f}\n') fout.write(f'eff_8s2_u {error_eff(tot_ev, s2_8/tot_ev)*100:.5f}\n') return dst
def relative_pmt_response(): """ Script which uses pmaps (will be generalised in future to check XY dependence) to look at the relative response of the PMTs in Kr events and compares to the results on Poisson mu from calibrations """ pmap_file_base = sys.argv[1] dst_file_base = sys.argv[2] run_number = pmap_file_base.split('/')[2][1:] pmt_dats = DB.DataPMT(int(run_number)) s1hists = {x: [] for x in range(12)} s2hists = {x: [] for x in range(12)} s1sumh = [] s2sumh = [] hitPMTdist = {x: [] for x in range(12)} hitPMTZpos = {x: [] for x in range(12)} pmap_sorter = sorter_func(pmap_file_base) pmap_file_list = sorted(glob(pmap_file_base + '*.h5'), key=pmap_sorter) dst_sorter = sorter_func(dst_file_base) dst_file_list = sorted(glob(dst_file_base + '*.h5'), key=dst_sorter) ## dst_frame = load_dsts(dst_file_list, 'DST', 'Events') dst_frame = load_dsts(dst_file_list, 'RECO', 'Events') dst_evt_list = dst_frame['event'].unique() #for fn in iglob(pmap_file_base + '*.h5'): for fn in pmap_file_list: ## This version just using pmt databases s1df, s2df, _, s1pmtdf, s2pmtdf = load_pmaps_as_df(fn) common_evts = np.intersect1d(s1pmtdf['event'].unique(), dst_evt_list) for evt in common_evts: #for evt in s1pmtdf['event'].unique(): #evt = dst_evt_iter[0] s1evt = s1pmtdf[s1pmtdf['event'] == evt] s2evt = s2pmtdf[s2pmtdf['event'] == evt] s1sevt = s1df[s1df['event'] == evt] s2sevt = s2df[s2df['event'] == evt] hit_evt = dst_frame[dst_frame['event'] == evt] ## if hit_evt['nS2'].iloc[0] == 1 and len(s2evt['peak'].unique()) == 1 and len(s1evt['peak'].unique()) == 1: if hit_evt['npeak'].nunique() == 1 and len( s2evt['peak'].unique()) == 1 and len( s1evt['peak'].unique()) == 1: ## Not well defined for multi-S2 events hit_x = hit_evt['X'].iloc[0] hit_y = hit_evt['Y'].iloc[0] hit_z = hit_evt['Z'].iloc[0] for peak in s1evt['peak'].unique(): s1peak = s1evt[s1evt['peak'] == peak] s1sumh.append(s1sevt[s1sevt['peak'] == peak]['ene'].sum()) pmt1Q = s1peak[s1peak['npmt'] == 1]['ene'].sum() for pmt in s1peak['npmt'].unique(): hitPMTdist[pmt].append( np.sqrt( np.power( hit_x - pmt_dats[pmt_dats['SensorID'] == pmt].X.values, 2) + np.power( hit_y - pmt_dats[pmt_dats['SensorID'] == pmt].Y.values, 2))) hitPMTZpos[pmt].append(hit_z) if pmt != 1: s1hists[pmt].append( s1peak[s1peak['npmt'] == pmt]['ene'].sum() / pmt1Q) else: s1hists[pmt].append(pmt1Q) for peak in s2evt['peak'].unique(): s2peak = s2evt[s2evt['peak'] == peak] s2sumh.append(s2sevt[s2sevt['peak'] == peak]['ene'].sum()) if s2sumh[-1] > 4000: # and s2sumh[-1] < 12000: ## pmt1Q = s2peak[s2peak['npmt'] == 1]['ene'].values[5:-5] pmt1Q = s2peak[s2peak['npmt'] == 1]['ene'].sum() for pmt in s2peak['npmt'].unique(): if pmt != 1: ## s2hists[pmt].append(s2peak[s2peak['npmt'] == pmt]['ene'].values[5:-5]/pmt1Q) s2hists[pmt].append( s2peak[s2peak['npmt'] == pmt]['ene'].sum() / pmt1Q) else: s2hists[pmt].append(pmt1Q) #dst_evt_iter.iternext() ## Make the plots s1sumh = np.array(s1sumh) s2sumh = np.array(s2sumh) figs0, axes0 = plt.subplots(nrows=1, ncols=2) axes0[0].hist(s1sumh) axes0[0].set_title('PMT sum S1 distribution') axes0[1].hist(s2sumh) axes0[1].set_title('PMT sum S2 distribution') plt.tight_layout() figs0.show() figs0.savefig('SumChargescharge_R' + run_number + '.png') figs1, axess1 = plt.subplots(nrows=3, ncols=4, figsize=(20, 6)) s1pmt1 = np.array(s1hists[1]) s1bins = np.arange(-2, 4, 0.1) s2bins = np.arange(0.4, 1.1, 0.005) s1select = (s1sumh > 2) & (s1sumh < 150) for (key, val), ax in zip(s1hists.items(), axess1.flatten()): if key == 1: ax.hist(np.array(val)[s1select], bins=100) #ax.scatter(s1sumh[s1select], np.array(val)[s1select]) ## ax.scatter(np.array(hitPMTdist[key])[s1select], np.array(val)[s1select]) #ax.scatter(np.array(hitPMTZpos[key])[s1select], np.array(val)[s1select]) ax.set_title('PMT 1 S1 charge') #ax.set_xlabel('integrated charge in PMT sum (pe)') #ax.set_xlabel('z pos.') #ax.set_ylabel('integrated charge in PMT1 (pe)') ax.set_ylabel('AU)') ax.set_xlabel('integrated charge in PMT1 (pe)') sh_hits = np.array(hitPMTZpos[key])[s1select].shape sh_val = np.array(val)[s1select].shape covar = np.cov( np.array(hitPMTZpos[key])[s1select].reshape(1, sh_hits[0]), np.array(val)[s1select].reshape(1, sh_val[0]))[0, 1] corr_coef = covar / ( np.std(np.array(val)[s1select], ddof=1) * np.std(np.array(hitPMTZpos[key])[s1select], ddof=1)) print('Sensor ', key, ' correlation coefficient = ', corr_coef) else: vals, bins, _ = ax.hist(np.array(val)[s1select], bins=s1bins) ## ax.scatter(s1pmt1[np.abs(val) < 10], np.array(val)[np.abs(val) < 10]) #ax.scatter(s1sumh[s1select], np.array(val)[s1select]) ## ax.scatter(np.array(hitPMTdist[key])[s1select & (np.abs(val) < 10)], np.array(val)[s1select & (np.abs(val) < 10)]) #ax.scatter(np.array(hitPMTZpos[key])[s1select & (np.abs(val) < 10)], np.array(val)[s1select & (np.abs(val) < 10)]) ax.set_title('PMT ' + str(key) + ' S1 relative charge') #ax.set_xlabel('integrated charge in PMT sum (pe)') ## ax.set_xlabel('PMT-hit dist. (mm)') #ax.set_xlabel('hit Z pos') #ax.set_ylabel('pmt q / pmt1 q') ax.set_ylabel('AU') ax.set_xlabel('pmt q / pmt1 q') ## sh_hits = np.array(hitPMTZpos[key])[s1select].shape ## sh_val = np.array(val)[s1select].shape ## covar = np.cov(np.array(hitPMTZpos[key])[s1select].reshape(1, sh_hits[0]), np.array(val)[s1select].reshape(1, sh_val[0]))[0, 1] ## corr_coef = covar / (np.std(np.array(val)[s1select], ddof=1)*np.std(np.array(hitPMTZpos[key])[s1select], ddof=1)) s1select2 = s1select & (np.array(val) > 0) & (np.array(val) <= 2) print('Sensor ', key, ' mean = ', np.mean(np.array(val)[s1select2])) useful_bins = np.argwhere(vals >= 100) b1 = useful_bins[0][0] b2 = useful_bins[-1][0] errs = np.sqrt(vals[b1:b2]) fvals = fitf.fit(fitf.gauss, shift_to_bin_centers(bins)[b1:b2], vals[b1:b2], seed=(vals.sum(), bins[vals.argmax()], 0.1), sigma=errs) ax.plot( shift_to_bin_centers(bins)[b1:b2], fvals.fn(shift_to_bin_centers(bins)[b1:b2])) print('Fit S1 ' + str(key), fvals.values, fvals.errors, fvals.chi2) plt.tight_layout() figs1.show() figs1.savefig('s1relativechargeThzoom_R' + run_number + '.png') fitVals = {} figs2, axess2 = plt.subplots(nrows=3, ncols=4, figsize=(20, 6)) s2pmt1 = np.array(s2hists[1]) for (key, val), ax in zip(s2hists.items(), axess2.flatten()): if key == 1: ## ax.set_title('PMT 1 S2 charge') ax.set_title('PMT 1 S2 charge vs s2 sum charge') ax.set_xlabel('S2pmt sum charge (pe)') #ax.set_xlabel('integrated charge in PMT sum (pe)') #ax.set_xlabel('z pos') ax.set_ylabel('integrated charge in PMT1 (pe)') #ax.set_ylabel('AU') ax.set_xlabel('integrated charge in PMT1 (pe)') ## ax.hist(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], bins=100) ##ax.hist(np.concatenate(val), bins=100) ## ax.hist(np.array(val)[s2sumh>4000], bins=100) ax.scatter(s2sumh[s2sumh > 4000], np.array(val)[s2sumh > 4000]) #ax.scatter(s2sumh[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) ## ax.scatter(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) #ax.scatter(np.array(hitPMTZpos[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) ## sh_hits = np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].shape ## sh_val = np.array(val)[(s2sumh>4000) & (s2sumh<12000)].shape ## covar = np.cov(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_hits[0]), np.array(val)[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_val[0]))[0, 1] ## corr_coef = covar / (np.std(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], ddof=1)*np.std(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], ddof=1)) ## print('Sensor ', key, ' correlation coefficient = ', corr_coef) else: ## ax.set_title('PMT '+str(key)+' S2 relative charge') ax.set_title('PMT ' + str(key) + ' S2 relative charge vs pmt sum') ax.set_ylabel('pmt q / pmt1 q') #ax.set_xlabel('zpos') ax.set_xlabel('integrated charge in PMT sum (pe)') #ax.set_xlabel('pmt q / pmt1 q') #ax.set_ylabel('AU') #ax.scatter(s2pmt1[np.abs(val) < 10], np.array(val)[np.abs(val) < 10]) #ax.scatter(s2sumh[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) ## vals, bins, _ = ax.hist(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], bins=s2bins) ## vals, bins, _ = ax.hist(np.concatenate(val), bins=s2bins) ## vals, bins, _ = ax.hist(np.array(val)[s2sumh>4000], bins=s2bins) ax.scatter(s2sumh[s2sumh > 4000], np.array(val)[s2sumh > 4000]) ## ax.scatter(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) #ax.scatter(np.array(hitPMTZpos[key])[(s2sumh>4000) & (s2sumh<12000)], np.array(val)[(s2sumh>4000) & (s2sumh<12000)]) sh_sum = s2sumh[s2sumh > 4000].shape sh_vals = np.array(val)[s2sumh > 4000].shape covar = np.cov(s2sumh[s2sumh > 4000].reshape(1, sh_sum[0]), np.array(val)[s2sumh > 4000].reshape(1, sh_vals[0]))[0, 1] corr_coef = covar / (np.std(np.array(val)[s2sumh > 4000], ddof=1) * np.std(s2sumh[s2sumh > 4000], ddof=1)) ## sh_hits = np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].shape ## sh_val = np.array(val)[(s2sumh>4000) & (s2sumh<12000)].shape ## covar = np.cov(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_hits[0]), np.array(val)[(s2sumh>4000) & (s2sumh<12000)].reshape(1, sh_val[0]))[0, 1] ## corr_coef = covar / (np.std(np.array(val)[(s2sumh>4000) & (s2sumh<12000)], ddof=1)*np.std(np.array(hitPMTdist[key])[(s2sumh>4000) & (s2sumh<12000)], ddof=1)) print('Sensor ', key, ' correlation coefficient = ', corr_coef) ## limit fit to region with stat error <= 10% Poisson ## useful_bins = np.argwhere(vals>=200) ## b1 = useful_bins[0][0] ## b2 = useful_bins[-1][0] ## errs = np.sqrt(vals[b1:b2]) ## print('Seed check: ', (vals.sum(), bins[vals.argmax()], 0.02)) ## fvals = fitf.fit(fitf.gauss, shift_to_bin_centers(bins)[b1:b2], vals[b1:b2], ## seed=(vals.sum(), bins[vals.argmax()], 0.02), ## sigma=errs, bounds=[(0, 0, 0.00001), (1e10, 2, 3)]) ## ax.plot(shift_to_bin_centers(bins), ## fitf.gauss(shift_to_bin_centers(bins), *fvals.values)) ## fitVals[key] = (fvals.values[1], fvals.values[2]) ## print('Fit PMT '+str(key), fvals.values, fvals.errors, fvals.chi2) plt.tight_layout() figs2.show() figs2.savefig('s2relativechargeThvsSum_R' + run_number + '.png') ## figcal, axcal = plt.subplots() ## axcal.errorbar(list(fitVals.keys()), ## np.fromiter((x[0] for x in fitVals.values()), np.float), ## yerr=np.fromiter((x[1] for x in fitVals.values()), np.float), ## label='Average response of PMTs to Kr relative to PMT 1') ## ## Get the calibration info for comparison. ## cal_files = [ fname for fname in sys.argv[3:] ] ## read_params = partial(spr, table_name='FIT_pmt_scaled_dark_pedestal', ## param_names=['poisson_mu']) ## ## Assumes ordering, ok? ## for i, fn in enumerate(cal_files): ## cal_run = fn.split('_')[1] ## with tb.open_file(fn) as cal_in: ## pmt1Val = 0 ## pmt1Err = 0 ## cVals = [] ## cErrs = [] ## for sens, (pars, errs) in read_params(cal_in): ## if sens != 1: ## cVals.append(pars['poisson_mu']) ## cErrs.append(errs['poisson_mu']) ## else: ## pmt1Val = pars['poisson_mu'] ## pmt1Err = errs['poisson_mu'] ## normVals = np.array(cVals) / pmt1Val ## normErrs = normVals * np.sqrt(np.power(np.array(cErrs)/np.array(cVals), 2) + ## np.power(pmt1Err/pmt1Val, 2)) ## axcal.errorbar(list(fitVals.keys()), normVals, ## yerr=normErrs, label='Calibration '+cal_run) ## axcal.legend() ## axcal.set_xlabel('PMT sensor ID') ## axcal.set_ylabel('Response relative to that of PMT 1') ## figcal.show() ## figcal.savefig('calPoisKrRelCompStatsFILT.png') input('plots good?')
dct.update({'nevents': len(dfp)}) return pd.Series(dct) fnames_cp = [f for f in fnames] lenfs = len(fnames_cp) fnames_tmp = fnames_cp[:20] for i in range(30, lenfs, 5): fnames_tp = fnames[i - 30:i] runs = re.findall('\d+', fnames_tp[0])[0] rune = re.findall('\d+', fnames_tp[-1])[0] fout = folder_out + 'runs_' + runs + '_' + rune + '.h5' mp_chunk = [] print(runs, rune) for tbname in tablenames: data = load_dsts(fnames_tp, 'KDST', tbname) xbin, ybin = data.xbin.unique()[0], data.ybin.unique()[0] xbins = np.linspace(xbin - 10, xbin + 10, 11) #2mm bins ybins = np.linspace(ybin - 10, ybin + 10, 11) xcenters = (xbins[1:] + xbins[:-1]) / 2 ycenters = (ybins[1:] + ybins[:-1]) / 2 data = data.assign(xbin=pd.cut(data.X, xbins, labels=xcenters), ybin=pd.cut(data.Y, ybins, labels=ycenters)) #correct columns ecols = [f'E_{i}' for i in range(12)] + ['S2e'] for col in ecols: data[col] = data[col] * np.exp(data.Z / data['lt']) / data['e0cen'] #find mean and std of gaussians means = data.groupby(['xbin', 'ybin']).apply(mean_and_std).reset_index()
def generate_pdfs(): """ Generate SiPM PDFs using Kr RAW data. Multiple types of PDF are generated: Full spectrum PDFs : using full buffer Z vetoed PDFs : Vetoing all sipms for regions in z where there is an identified s1 or s2 1 ring vetoed PDFs : Vetoing sipms within 1 ring distance of a hit. 2 ring vetoed PDFs : As above for 2 rings ... """ pmap_file_base = sys.argv[1] hit_file_base = sys.argv[2] raw_file_base = sys.argv[3] pmap_sorter = sorter_func(pmap_file_base) pmap_files = sorted(glob(pmap_file_base + '*.h5'), key=pmap_sorter) hit_sorter = sorter_func(hit_file_base) hit_files = sorted(glob(hit_file_base + '*.h5'), key=hit_sorter) raw_sorter = sorter_func(raw_file_base) raw_files = sorted(glob(raw_file_base + '*.h5'), key=raw_sorter) ## Details of raw waveforms to aid vetoing (assumes all same in run) with tb.open_file(raw_files[0]) as rwf_in: sipmrwf = rwf_in.root.RD.sipmrwf[0][0] wf_range = np.arange(len(sipmrwf)) run_no = int(sys.argv[4]) ## Gains and sensor positions sipm_gains = DB.DataSiPM(run_no).adc_to_pes.values sipm_xy = DB.DataSiPM(run_no)[['X', 'Y']].values ## output histbins = np.arange(-10, 300, 0.1) bin_centres = shift_to_bin_centers(histbins) with tb.open_file('vetoedPDFs_R' + str(run_no) + '.h5', 'w') as pdf_out: HIST = partial(hist_writer, pdf_out, group_name='HIST', n_sensors=1792, n_bins=len(bin_centres), bin_centres=bin_centres) full_spec = HIST(table_name='full_spec') z_vetoed = HIST(table_name='z_vetoed') one_ring = HIST(table_name='one_ring_vetoed') two_ring = HIST(table_name='two_ring_vetoed') thr_ring = HIST(table_name='thr_ring_vetoed') inv_thre = HIST(table_name='thr_ring_avetoed') ## Hit info hit_positions = load_dsts(hit_files, 'DST', 'Events')[['event', 'X', 'Y']].values ## Start assuming KR data and Kdst ## For each event [evt_no, list tuples start and end veto areas] reduced_pulse_info = [] for pmf in pmap_files: #print(pmf) sys.stdout.write(pmf + '\n') sys.stdout.flush() try: ## pmap_dict = load_pmaps(pmf) s1s, s2s, _, _, _ = load_pmaps_as_df(pmf) except (ValueError, tb.exceptions.NoSuchNodeError): print("Empty file. Skipping.") continue ## for key, pmap in pmap_dict.items(): for evtNo in s1s['event'].unique(): evtS1 = s1s[s1s['event'] == evtNo] evtS2 = s2s[s2s['event'] == evtNo] mask_list = [] ## for s1 in pmap.s1s: for is1 in evtS1['peak'].unique(): s1 = evtS1[evtS1['peak'] == is1] mask_list.append( (wf_range < s1['time'].iloc[0] / units.mus - 1) | (wf_range > s1['time'].iloc[-1] / units.mus + 1)) for is2 in evtS2['peak'].unique(): s2 = evtS2[evtS2['peak'] == is2] mask_list.append( (wf_range < s2['time'].iloc[0] / units.mus - 2) | (wf_range > s2['time'].iloc[-1] / units.mus + 2)) reduced_pulse_info.append( [evtNo, np.logical_and.reduce(mask_list)]) print('masking info stored') mask_counter = 0 pmap_evts = np.fromiter((x[0] for x in reduced_pulse_info), np.int) for rawf in raw_files: #print(rawf) sys.stdout.write(rawf + '\n') sys.stdout.flush() if mask_counter >= len(reduced_pulse_info): continue try: ## empty arrays for histograms shape = 1792, len(bin_centres) hist_full_spec = np.zeros(shape, dtype=np.int) hist_z_vetoed = np.zeros(shape, dtype=np.int) hist_1_vetoed = np.zeros(shape, dtype=np.int) hist_2_vetoed = np.zeros(shape, dtype=np.int) hist_3_vetoed = np.zeros(shape, dtype=np.int) hist_3_aveto = np.zeros(shape, dtype=np.int) with tb.open_file(rawf) as raw_in: revent_nos = np.fromiter( (x[0] for x in raw_in.root.Run.events), np.int) #evt_no = reduced_pulse_info[mask_counter][0] #indx = np.argwhere(revent_nos==evt_no) #print(reduced_pulse_info) #pmap_evts = np.array(reduced_pulse_info)[:, 0] pmap_overlap_indx = np.arange(revent_nos.shape[0])[np.in1d( revent_nos, pmap_evts)] hit_overlap_indx = np.arange(revent_nos.shape[0])[np.in1d( revent_nos, hit_positions[:, 0])] hit_indcs = np.arange(hit_positions[:, 0].shape[0])[np.in1d( hit_positions[:, 0], revent_nos)] hindx = 0 #print(indx, indx[0][0]) #while indx.shape[0] != 0: for indx in pmap_overlap_indx: #print(indx[0][0]) #rwf = raw_in.root.RD.sipmrwf[indx[0][0]] ## cwf = csf.sipm_processing["subtract_mode_calibrate"](raw_in.root.RD.sipmrwf[indx[0][0]], sipm_gains) cwf = csf.sipm_processing["subtract_mode_calibrate"]( raw_in.root.RD.sipmrwf[indx], sipm_gains) hist_full_spec += cf.bin_waveforms(cwf, histbins) z_veto = reduced_pulse_info[mask_counter][1] hist_z_vetoed += cf.bin_waveforms( cwf[:, z_veto], histbins) #dst_indx = np.argwhere(hit_positions[:, 0]==evt_no) #if dst_indx.shape[0] != 0: if indx in hit_overlap_indx: ## hit_p = hit_positions[dst_indx[0][0], 1:] hit_p = hit_positions[hit_indcs[hindx], 1:] hindx += 1 hist_1_vetoed += cf.bin_waveforms( ring_veto(cwf, 1, z_veto, hit_p, sipm_xy), histbins) hist_2_vetoed += cf.bin_waveforms( ring_veto(cwf, 2, z_veto, hit_p, sipm_xy), histbins) thrVeto = ring_veto(cwf, 3, z_veto, hit_p, sipm_xy) hist_3_vetoed += cf.bin_waveforms( thrVeto, histbins) hist_3_aveto += cf.bin_waveforms( thrVeto[:, np.invert(z_veto)], histbins) mask_counter += 1 #if mask_counter < len(reduced_pulse_info): # evt_no = reduced_pulse_info[mask_counter][0] # indx = np.argwhere(revent_nos==evt_no) #else: ## dummy evt_no to definitely give no info # indx = np.argwhere(revent_nos==-100) #print(indx, indx[0][0]) full_spec(hist_full_spec) z_vetoed(hist_z_vetoed) one_ring(hist_1_vetoed) two_ring(hist_2_vetoed) thr_ring(hist_3_vetoed) inv_thre(hist_3_aveto) except tb.HDF5ExtError: print('corrupt file') continue
hdst = hdst_psf_processing(df[z_sel], [Xrange, Yrange], the_db) # Safety check (single sipm events not considered to be physical) hdst = hdst[hdst.nsipm > 1] # Loop to extract the PSF in different regions. bin_edges = [ np.linspace(*rr, [nbinX, nbinY][i] + 1) for i, rr in enumerate([Xrange, Yrange]) ] psf_new, entries_new, bins_new = create_psf( (hdst.RelX.values, hdst.RelY.values), hdst.NormQ, bin_edges) writer(bins_new[0], bins_new[1], [0], 0., 0., z + z_step / 2, np.asarray([psf_new]).transpose((1, 2, 0)), np.asarray([entries_new]).transpose((1, 2, 0))) start = int(sys.argv[1]) numb = int(sys.argv[2]) for i in range(start, start + numb): thefile = filename.format(i) try: df = dstio.load_dsts([thefile], 'RECO', 'Events').drop( ['Xrms', 'Yrms', 'Qc', 'Ec', 'track_id'], axis='columns').reset_index(drop=True) except: print(f'File {thefile} not found or corrupted.') continue compute_psf(df, i)