def analyze_cluster_size_per_scan_parameter(input_file_hits, output_file_cluster_size, parameter='GDAC', max_chunk_size=10000000, overwrite_output_files=False, output_pdf=None): ''' This method takes multiple hit files and determines the cluster size for different scan parameter values of Parameters ---------- input_files_hits: string output_file_cluster_size: string The data file with the results parameter: string The name of the parameter to separate the data into (e.g.: PlsrDAC) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer overwrite_output_files: bool Set to true to overwrite the output file if it already exists output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen, if False nothing is printed ''' logging.info('Analyze the cluster sizes for different ' + parameter + ' settings for ' + input_file_hits) if os.path.isfile( output_file_cluster_size ) and not overwrite_output_files: # skip analysis if already done logging.info('Analyzed cluster size file ' + output_file_cluster_size + ' already exists. Skip cluster size analysis.') else: with tb.open_file( output_file_cluster_size, mode="w") as out_file_h5: # file to write the data into filter_table = tb.Filters( complib='blosc', complevel=5, fletcher32=False) # compression of the written data parameter_goup = out_file_h5.create_group( out_file_h5.root, parameter, title=parameter) # note to store the data cluster_size_total = None # final array for the cluster size per GDAC with tb.open_file( input_file_hits, mode="r+") as in_hit_file_h5: # open the actual hit file meta_data_array = in_hit_file_h5.root.meta_data[:] scan_parameter = analysis_utils.get_scan_parameter( meta_data_array) # get the scan parameters if scan_parameter: # if a GDAC scan parameter was used analyze the cluster size per GDAC setting scan_parameter_values = scan_parameter[ parameter] # scan parameter settings used if len( scan_parameter_values ) == 1: # only analyze per scan step if there are more than one scan step logging.warning('The file ' + str(input_file_hits) + ' has no different ' + str(parameter) + ' parameter values. Omit analysis.') else: logging.info('Analyze ' + input_file_hits + ' per scan parameter ' + parameter + ' for ' + str(len(scan_parameter_values)) + ' values from ' + str(np.amin(scan_parameter_values)) + ' to ' + str(np.amax(scan_parameter_values))) event_numbers = analysis_utils.get_meta_data_at_scan_parameter( meta_data_array, parameter )['event_number'] # get the event numbers in meta_data where the scan parameter changes parameter_ranges = np.column_stack( (scan_parameter_values, analysis_utils.get_ranges_from_array( event_numbers))) hit_table = in_hit_file_h5.root.Hits analysis_utils.index_event_number(hit_table) total_hits, total_hits_2, index = 0, 0, 0 chunk_size = max_chunk_size # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_cluster_size_hist = True analyze_data.create_cluster_tot_hist = True analyze_data.histogram.set_no_scan_parameter( ) # one has to tell histogram the # of scan parameters for correct occupancy hist allocation progress_bar = progressbar.ProgressBar( widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=hit_table.shape[0], term_width=80) progress_bar.start() for parameter_index, parameter_range in enumerate( parameter_ranges ): # loop over the selected events analyze_data.reset( ) # resets the data of the last analysis logging.debug( 'Analyze GDAC = ' + str(parameter_range[0]) + ' ' + str( int( float( float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + '%') start_event_number = parameter_range[1] stop_event_number = parameter_range[2] logging.debug('Data from events = [' + str(start_event_number) + ',' + str(stop_event_number) + '[') actual_parameter_group = out_file_h5.create_group( parameter_goup, name=parameter + '_' + str(parameter_range[0]), title=parameter + '_' + str(parameter_range[0])) # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given readout_hit_len = 0 # variable to calculate a optimal chunk size value from the number of hits for speed up for hits, index in analysis_utils.data_aligned_at_events( hit_table, start_event_number=start_event_number, stop_event_number=stop_event_number, start_index=index, chunk_size=chunk_size): total_hits += hits.shape[0] analyze_data.analyze_hits( hits ) # analyze the selected hits in chunks readout_hit_len += hits.shape[0] progress_bar.update(index) chunk_size = int(1.05 * readout_hit_len) if int( 1.05 * readout_hit_len ) < max_chunk_size else max_chunk_size # to increase the readout speed, estimated the number of hits for one read instruction if chunk_size < 50: # limit the lower chunk size, there can always be a crazy event with more than 20 hits chunk_size = 50 # get occupancy hist occupancy = analyze_data.histogram.get_occupancy( ) # just check here if histogram is consistent # store and plot cluster size hist cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist( ) cluster_size_hist_table = out_file_h5.create_carray( actual_parameter_group, name='HistClusterSize', title='Cluster Size Histogram', atom=tb.Atom.from_dtype( cluster_size_hist.dtype), shape=cluster_size_hist.shape, filters=filter_table) cluster_size_hist_table[:] = cluster_size_hist if output_pdf is not False: plotting.plot_cluster_size( hist=cluster_size_hist, title='Cluster size (' + str(np.sum(cluster_size_hist)) + ' entries) for ' + parameter + ' = ' + str(scan_parameter_values[parameter_index] ), filename=output_pdf) if cluster_size_total is None: # true if no data was appended to the array yet cluster_size_total = cluster_size_hist else: cluster_size_total = np.vstack( [cluster_size_total, cluster_size_hist]) total_hits_2 += np.sum(occupancy) progress_bar.finish() if total_hits != total_hits_2: logging.warning( 'Analysis shows inconsistent number of hits. Check needed!' ) logging.info('Analyzed %d hits!', total_hits) cluster_size_total_out = out_file_h5.create_carray( out_file_h5.root, name='AllHistClusterSize', title='All Cluster Size Histograms', atom=tb.Atom.from_dtype(cluster_size_total.dtype), shape=cluster_size_total.shape, filters=filter_table) cluster_size_total_out[:] = cluster_size_total
def analyse_n_cluster_per_event(scan_base, include_no_cluster=False, time_line_absolute=True, combine_n_readouts=1000, chunk_size=10000000, plot_n_cluster_hists=False, output_pdf=None, output_file=None): ''' Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). Parameters ---------- scan_base: list of str scan base names (e.g.: ['//data//SCC_50_fei4_self_trigger_scan_390', ] include_no_cluster: bool Set to true to also consider all events without any hit. combine_n_readouts: int the number of read outs to combine (e.g. 1000) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen ''' time_stamp = [] n_cluster = [] start_time_set = False for data_file in scan_base: with tb.open_file(data_file + '_interpreted.h5', mode="r+") as in_cluster_file_h5: # get data and data pointer meta_data_array = in_cluster_file_h5.root.meta_data[:] cluster_table = in_cluster_file_h5.root.Cluster # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number) parameter_ranges = np.column_stack( (analysis_utils.get_ranges_from_array( meta_data_array['timestamp_start'][::combine_n_readouts]), analysis_utils.get_ranges_from_array( meta_data_array['event_number'][::combine_n_readouts]))) # create a event_numer index (important for speed) analysis_utils.index_event_number(cluster_table) # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_tot_hist = False analyze_data.create_bcid_hist = False # variables for read speed up index = 0 # index where to start the read out, 0 at the beginning, increased during looping best_chunk_size = chunk_size total_cluster = cluster_table.shape[0] progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=total_cluster, term_width=80) progress_bar.start() # loop over the selected events for parameter_index, parameter_range in enumerate( parameter_ranges): logging.debug('Analyze time stamp ' + str(parameter_range[0]) + ' and data from events = [' + str(parameter_range[2]) + ',' + str(parameter_range[3]) + '[ ' + str( int( float( float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + '%') analyze_data.reset() # resets the data of the last analysis # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given readout_cluster_len = 0 # variable to calculate a optimal chunk size value from the number of hits for speed up hist = None for clusters, index in analysis_utils.data_aligned_at_events( cluster_table, start_event_number=parameter_range[2], stop_event_number=parameter_range[3], start_index=index, chunk_size=best_chunk_size): n_cluster_per_event = analysis_utils.get_n_cluster_in_events( clusters['event_number'] )[:, 1] # array with the number of cluster per event, cluster per event are at least 1 if hist is None: hist = np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0] else: hist = np.add( hist, np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0]) if include_no_cluster and parameter_range[ 3] is not None: # happend for the last readout hist[0] = (parameter_range[3] - parameter_range[2]) - len( n_cluster_per_event ) # add the events without any cluster readout_cluster_len += clusters.shape[0] total_cluster -= len(clusters) progress_bar.update(index) best_chunk_size = int(1.5 * readout_cluster_len) if int( 1.05 * readout_cluster_len ) < chunk_size else chunk_size # to increase the readout speed, estimated the number of hits for one read instruction if plot_n_cluster_hists: plotting.plot_1d_hist( hist, title='Number of cluster per event at ' + str(parameter_range[0]), x_axis_title='Number of cluster', y_axis_title='#', log_y=True, filename=output_pdf) hist = hist.astype('f4') / np.sum( hist) # calculate fraction from total numbers if time_line_absolute: time_stamp.append(parameter_range[0]) else: if not start_time_set: start_time = parameter_ranges[0, 0] start_time_set = True time_stamp.append((parameter_range[0] - start_time) / 60.0) n_cluster.append(hist) progress_bar.finish() if total_cluster != 0: logging.warning( 'Not all clusters were selected during analysis. Analysis is therefore not exact' ) if time_line_absolute: plotting.plot_scatter_time( time_stamp, n_cluster, title='Number of cluster per event as a function of time', marker_style='o', filename=output_pdf, legend=('0 cluster', '1 cluster', '2 cluster', '3 cluster') if include_no_cluster else ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster')) else: plotting.plot_scatter( time_stamp, n_cluster, title='Number of cluster per event as a function of time', x_label='time [min.]', marker_style='o', filename=output_pdf, legend=('0 cluster', '1 cluster', '2 cluster', '3 cluster') if include_no_cluster else ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster')) if output_file: with tb.open_file(output_file, mode="a") as out_file_h5: cluster_array = np.array(n_cluster) rec_array = np.array(zip(time_stamp, cluster_array[:, 0], cluster_array[:, 1], cluster_array[:, 2], cluster_array[:, 3], cluster_array[:, 4], cluster_array[:, 5]), dtype=[('time_stamp', float), ('cluster_0', float), ('cluster_1', float), ('cluster_2', float), ('cluster_3', float), ('cluster_4', float), ('cluster_5', float) ]).view(np.recarray) try: n_cluster_table = out_file_h5.create_table( out_file_h5.root, name='n_cluster', description=rec_array, title='Cluster per event', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) n_cluster_table[:] = rec_array except tb.exceptions.NodeError: logging.warning( output_file + ' has already a Beamspot note, do not overwrite existing.') return time_stamp, n_cluster
def analyze_beam_spot(scan_base, combine_n_readouts=1000, chunk_size=10000000, plot_occupancy_hists=False, output_pdf=None, output_file=None): ''' Determines the mean x and y beam spot position as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). The occupancy is determined for the given combined events and stored into a pdf file. At the end the beam x and y is plotted into a scatter plot with absolute positions in um. Parameters ---------- scan_base: list of str scan base names (e.g.: ['//data//SCC_50_fei4_self_trigger_scan_390', ] combine_n_readouts: int the number of read outs to combine (e.g. 1000) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen ''' time_stamp = [] x = [] y = [] for data_file in scan_base: with tb.open_file(data_file + '_interpreted.h5', mode="r+") as in_hit_file_h5: # get data and data pointer meta_data_array = in_hit_file_h5.root.meta_data[:] hit_table = in_hit_file_h5.root.Hits # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number) parameter_ranges = np.column_stack( (analysis_utils.get_ranges_from_array( meta_data_array['timestamp_start'][::combine_n_readouts]), analysis_utils.get_ranges_from_array( meta_data_array['event_number'][::combine_n_readouts]))) # create a event_numer index (important) analysis_utils.index_event_number(hit_table) # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_tot_hist = False analyze_data.create_bcid_hist = False analyze_data.histogram.set_no_scan_parameter() # variables for read speed up index = 0 # index where to start the read out, 0 at the beginning, increased during looping best_chunk_size = chunk_size progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=hit_table.shape[0], term_width=80) progress_bar.start() # loop over the selected events for parameter_index, parameter_range in enumerate( parameter_ranges): logging.debug('Analyze time stamp ' + str(parameter_range[0]) + ' and data from events = [' + str(parameter_range[2]) + ',' + str(parameter_range[3]) + '[ ' + str( int( float( float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + '%') analyze_data.reset() # resets the data of the last analysis # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given readout_hit_len = 0 # variable to calculate a optimal chunk size value from the number of hits for speed up for hits, index in analysis_utils.data_aligned_at_events( hit_table, start_event_number=parameter_range[2], stop_event_number=parameter_range[3], start_index=index, chunk_size=best_chunk_size): analyze_data.analyze_hits( hits) # analyze the selected hits in chunks readout_hit_len += hits.shape[0] progress_bar.update(index) best_chunk_size = int(1.5 * readout_hit_len) if int( 1.05 * readout_hit_len ) < chunk_size else chunk_size # to increase the readout speed, estimated the number of hits for one read instruction # get and store results occupancy_array = analyze_data.histogram.get_occupancy() projection_x = np.sum(occupancy_array, axis=0).ravel() projection_y = np.sum(occupancy_array, axis=1).ravel() x.append( analysis_utils.get_mean_from_histogram(projection_x, bin_positions=range( 0, 80))) y.append( analysis_utils.get_mean_from_histogram(projection_y, bin_positions=range( 0, 336))) time_stamp.append(parameter_range[0]) if plot_occupancy_hists: plotting.plot_occupancy( occupancy_array[:, :, 0], title='Occupancy for events between ' + time.strftime( '%H:%M:%S', time.localtime(parameter_range[0])) + ' and ' + time.strftime( '%H:%M:%S', time.localtime(parameter_range[1])), filename=output_pdf) progress_bar.finish() plotting.plot_scatter([i * 250 for i in x], [i * 50 for i in y], title='Mean beam position', x_label='x [um]', y_label='y [um]', marker_style='-o', filename=output_pdf) if output_file: with tb.open_file(output_file, mode="a") as out_file_h5: rec_array = np.array(zip(time_stamp, x, y), dtype=[('time_stamp', float), ('x', float), ('y', float)]) try: beam_spot_table = out_file_h5.create_table( out_file_h5.root, name='Beamspot', description=rec_array, title='Beam spot position', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) beam_spot_table[:] = rec_array except tb.exceptions.NodeError: logging.warning( output_file + ' has already a Beamspot note, do not overwrite existing.') return time_stamp, x, y
def analyze_cluster_size_per_scan_parameter( input_file_hits, output_file_cluster_size, parameter="GDAC", max_chunk_size=10000000, overwrite_output_files=False, output_pdf=None, ): """ This method takes multiple hit files and determines the cluster size for different scan parameter values of Parameters ---------- input_files_hits: string output_file_cluster_size: string The data file with the results parameter: string The name of the parameter to separate the data into (e.g.: PlsrDAC) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer overwrite_output_files: bool Set to true to overwrite the output file if it already exists output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen, if False nothing is printed """ logging.info("Analyze the cluster sizes for different " + parameter + " settings for " + input_file_hits) if os.path.isfile(output_file_cluster_size) and not overwrite_output_files: # skip analysis if already done logging.info( "Analyzed cluster size file " + output_file_cluster_size + " already exists. Skip cluster size analysis." ) else: with tb.openFile(output_file_cluster_size, mode="w") as out_file_h5: # file to write the data into filter_table = tb.Filters(complib="blosc", complevel=5, fletcher32=False) # compression of the written data parameter_goup = out_file_h5.createGroup( out_file_h5.root, parameter, title=parameter ) # note to store the data cluster_size_total = None # final array for the cluster size per GDAC with tb.openFile(input_file_hits, mode="r+") as in_hit_file_h5: # open the actual hit file meta_data_array = in_hit_file_h5.root.meta_data[:] scan_parameter = analysis_utils.get_scan_parameter(meta_data_array) # get the scan parameters if scan_parameter: # if a GDAC scan parameter was used analyze the cluster size per GDAC setting scan_parameter_values = scan_parameter[parameter] # scan parameter settings used if ( len(scan_parameter_values) == 1 ): # only analyze per scan step if there are more than one scan step logging.warning( "The file " + str(input_file_hits) + " has no different " + str(parameter) + " parameter values. Omit analysis." ) else: logging.info( "Analyze " + input_file_hits + " per scan parameter " + parameter + " for " + str(len(scan_parameter_values)) + " values from " + str(np.amin(scan_parameter_values)) + " to " + str(np.amax(scan_parameter_values)) ) event_numbers = analysis_utils.get_meta_data_at_scan_parameter(meta_data_array, parameter)[ "event_number" ] # get the event numbers in meta_data where the scan parameter changes parameter_ranges = np.column_stack( (scan_parameter_values, analysis_utils.get_ranges_from_array(event_numbers)) ) hit_table = in_hit_file_h5.root.Hits analysis_utils.index_event_number(hit_table) total_hits, total_hits_2, index = 0, 0, 0 chunk_size = max_chunk_size # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_cluster_size_hist = True analyze_data.create_cluster_tot_hist = True analyze_data.histograming.set_no_scan_parameter() # one has to tell the histogramer the # of scan parameters for correct occupancy hist allocation progress_bar = progressbar.ProgressBar( widgets=[ "", progressbar.Percentage(), " ", progressbar.Bar(marker="*", left="|", right="|"), " ", analysis_utils.ETA(), ], maxval=hit_table.shape[0], term_width=80, ) progress_bar.start() for parameter_index, parameter_range in enumerate( parameter_ranges ): # loop over the selected events analyze_data.reset() # resets the data of the last analysis logging.debug( "Analyze GDAC = " + str(parameter_range[0]) + " " + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + "%" ) start_event_number = parameter_range[1] stop_event_number = parameter_range[2] logging.debug( "Data from events = [" + str(start_event_number) + "," + str(stop_event_number) + "[" ) actual_parameter_group = out_file_h5.createGroup( parameter_goup, name=parameter + "_" + str(parameter_range[0]), title=parameter + "_" + str(parameter_range[0]), ) # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given readout_hit_len = ( 0 ) # variable to calculate a optimal chunk size value from the number of hits for speed up for hits, index in analysis_utils.data_aligned_at_events( hit_table, start_event_number=start_event_number, stop_event_number=stop_event_number, start=index, chunk_size=chunk_size, ): total_hits += hits.shape[0] analyze_data.analyze_hits(hits) # analyze the selected hits in chunks readout_hit_len += hits.shape[0] progress_bar.update(index) chunk_size = ( int(1.05 * readout_hit_len) if int(1.05 * readout_hit_len) < max_chunk_size else max_chunk_size ) # to increase the readout speed, estimated the number of hits for one read instruction if ( chunk_size < 50 ): # limit the lower chunk size, there can always be a crazy event with more than 20 hits chunk_size = 50 # get occupancy hist occupancy = ( analyze_data.histograming.get_occupancy() ) # just here to check histograming is consistend # store and plot cluster size hist cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist() cluster_size_hist_table = out_file_h5.createCArray( actual_parameter_group, name="HistClusterSize", title="Cluster Size Histogram", atom=tb.Atom.from_dtype(cluster_size_hist.dtype), shape=cluster_size_hist.shape, filters=filter_table, ) cluster_size_hist_table[:] = cluster_size_hist if output_pdf is not False: plotting.plot_cluster_size( hist=cluster_size_hist, title="Cluster size (" + str(np.sum(cluster_size_hist)) + " entries) for " + parameter + " = " + str(scan_parameter_values[parameter_index]), filename=output_pdf, ) if cluster_size_total is None: # true if no data was appended to the array yet cluster_size_total = cluster_size_hist else: cluster_size_total = np.vstack([cluster_size_total, cluster_size_hist]) total_hits_2 += np.sum(occupancy) progress_bar.finish() if total_hits != total_hits_2: logging.warning("Analysis shows inconsistent number of hits. Check needed!") logging.info("Analyzed %d hits!", total_hits) cluster_size_total_out = out_file_h5.createCArray( out_file_h5.root, name="AllHistClusterSize", title="All Cluster Size Histograms", atom=tb.Atom.from_dtype(cluster_size_total.dtype), shape=cluster_size_total.shape, filters=filter_table, ) cluster_size_total_out[:] = cluster_size_total
def analyse_n_cluster_per_event( scan_base, include_no_cluster=False, time_line_absolute=True, combine_n_readouts=1000, chunk_size=10000000, plot_n_cluster_hists=False, output_pdf=None, output_file=None, ): """ Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). Parameters ---------- scan_base: list of str scan base names (e.g.: ['//data//SCC_50_fei4_self_trigger_scan_390', ] include_no_cluster: bool Set to true to also consider all events without any hit. combine_n_readouts: int the number of read outs to combine (e.g. 1000) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen """ time_stamp = [] n_cluster = [] start_time_set = False for data_file in scan_base: with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_cluster_file_h5: # get data and data pointer meta_data_array = in_cluster_file_h5.root.meta_data[:] cluster_table = in_cluster_file_h5.root.Cluster # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number) parameter_ranges = np.column_stack( ( analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]), analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]), ) ) # create a event_numer index (important for speed) analysis_utils.index_event_number(cluster_table) # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_tot_hist = False analyze_data.create_bcid_hist = False # variables for read speed up index = 0 # index where to start the read out, 0 at the beginning, increased during looping best_chunk_size = chunk_size total_cluster = cluster_table.shape[0] progress_bar = progressbar.ProgressBar( widgets=[ "", progressbar.Percentage(), " ", progressbar.Bar(marker="*", left="|", right="|"), " ", analysis_utils.ETA(), ], maxval=total_cluster, term_width=80, ) progress_bar.start() # loop over the selected events for parameter_index, parameter_range in enumerate(parameter_ranges): logging.debug( "Analyze time stamp " + str(parameter_range[0]) + " and data from events = [" + str(parameter_range[2]) + "," + str(parameter_range[3]) + "[ " + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + "%" ) analyze_data.reset() # resets the data of the last analysis # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given readout_cluster_len = ( 0 ) # variable to calculate a optimal chunk size value from the number of hits for speed up hist = None for clusters, index in analysis_utils.data_aligned_at_events( cluster_table, start_event_number=parameter_range[2], stop_event_number=parameter_range[3], start=index, chunk_size=best_chunk_size, ): n_cluster_per_event = analysis_utils.get_n_cluster_in_events(clusters["event_number"])[ :, 1 ] # array with the number of cluster per event, cluster per event are at least 1 if hist is None: hist = np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0] else: hist = np.add(hist, np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0]) if include_no_cluster and parameter_range[3] is not None: # happend for the last readout hist[0] = (parameter_range[3] - parameter_range[2]) - len( n_cluster_per_event ) # add the events without any cluster readout_cluster_len += clusters.shape[0] total_cluster -= len(clusters) progress_bar.update(index) best_chunk_size = ( int(1.5 * readout_cluster_len) if int(1.05 * readout_cluster_len) < chunk_size else chunk_size ) # to increase the readout speed, estimated the number of hits for one read instruction if plot_n_cluster_hists: plotting.plot_1d_hist( hist, title="Number of cluster per event at " + str(parameter_range[0]), x_axis_title="Number of cluster", y_axis_title="#", log_y=True, filename=output_pdf, ) hist = hist.astype("f4") / np.sum(hist) # calculate fraction from total numbers if time_line_absolute: time_stamp.append(parameter_range[0]) else: if not start_time_set: start_time = parameter_ranges[0, 0] start_time_set = True time_stamp.append((parameter_range[0] - start_time) / 60.0) n_cluster.append(hist) progress_bar.finish() if total_cluster != 0: logging.warning("Not all clusters were selected during analysis. Analysis is therefore not exact") if time_line_absolute: plotting.plot_scatter_time( time_stamp, n_cluster, title="Number of cluster per event as a function of time", marker_style="o", filename=output_pdf, legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster") if include_no_cluster else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"), ) else: plotting.plot_scatter( time_stamp, n_cluster, title="Number of cluster per event as a function of time", x_label="time [min.]", marker_style="o", filename=output_pdf, legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster") if include_no_cluster else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"), ) if output_file: with tb.openFile(output_file, mode="a") as out_file_h5: cluster_array = np.array(n_cluster) rec_array = np.array( zip( time_stamp, cluster_array[:, 0], cluster_array[:, 1], cluster_array[:, 2], cluster_array[:, 3], cluster_array[:, 4], cluster_array[:, 5], ), dtype=[ ("time_stamp", float), ("cluster_0", float), ("cluster_1", float), ("cluster_2", float), ("cluster_3", float), ("cluster_4", float), ("cluster_5", float), ], ).view(np.recarray) try: n_cluster_table = out_file_h5.createTable( out_file_h5.root, name="n_cluster", description=rec_array, title="Cluster per event", filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False), ) n_cluster_table[:] = rec_array except tb.exceptions.NodeError: logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.") return time_stamp, n_cluster
def analyze_beam_spot( scan_base, combine_n_readouts=1000, chunk_size=10000000, plot_occupancy_hists=False, output_pdf=None, output_file=None, ): """ Determines the mean x and y beam spot position as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). The occupancy is determined for the given combined events and stored into a pdf file. At the end the beam x and y is plotted into a scatter plot with absolute positions in um. Parameters ---------- scan_base: list of str scan base names (e.g.: ['//data//SCC_50_fei4_self_trigger_scan_390', ] combine_n_readouts: int the number of read outs to combine (e.g. 1000) max_chunk_size: int the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer output_pdf: PdfPages PdfPages file object, if none the plot is printed to screen """ time_stamp = [] x = [] y = [] for data_file in scan_base: with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_hit_file_h5: # get data and data pointer meta_data_array = in_hit_file_h5.root.meta_data[:] hit_table = in_hit_file_h5.root.Hits # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number) parameter_ranges = np.column_stack( ( analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]), analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]), ) ) # create a event_numer index (important) analysis_utils.index_event_number(hit_table) # initialize the analysis and set settings analyze_data = AnalyzeRawData() analyze_data.create_tot_hist = False analyze_data.create_bcid_hist = False analyze_data.histograming.set_no_scan_parameter() # variables for read speed up index = 0 # index where to start the read out, 0 at the beginning, increased during looping best_chunk_size = chunk_size progress_bar = progressbar.ProgressBar( widgets=[ "", progressbar.Percentage(), " ", progressbar.Bar(marker="*", left="|", right="|"), " ", analysis_utils.ETA(), ], maxval=hit_table.shape[0], term_width=80, ) progress_bar.start() # loop over the selected events for parameter_index, parameter_range in enumerate(parameter_ranges): logging.debug( "Analyze time stamp " + str(parameter_range[0]) + " and data from events = [" + str(parameter_range[2]) + "," + str(parameter_range[3]) + "[ " + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0))) + "%" ) analyze_data.reset() # resets the data of the last analysis # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given readout_hit_len = ( 0 ) # variable to calculate a optimal chunk size value from the number of hits for speed up for hits, index in analysis_utils.data_aligned_at_events( hit_table, start_event_number=parameter_range[2], stop_event_number=parameter_range[3], start=index, chunk_size=best_chunk_size, ): analyze_data.analyze_hits(hits) # analyze the selected hits in chunks readout_hit_len += hits.shape[0] progress_bar.update(index) best_chunk_size = ( int(1.5 * readout_hit_len) if int(1.05 * readout_hit_len) < chunk_size else chunk_size ) # to increase the readout speed, estimated the number of hits for one read instruction # get and store results occupancy_array = analyze_data.histograming.get_occupancy() projection_x = np.sum(occupancy_array, axis=0).ravel() projection_y = np.sum(occupancy_array, axis=1).ravel() x.append(analysis_utils.get_mean_from_histogram(projection_x, bin_positions=range(0, 80))) y.append(analysis_utils.get_mean_from_histogram(projection_y, bin_positions=range(0, 336))) time_stamp.append(parameter_range[0]) if plot_occupancy_hists: plotting.plot_occupancy( occupancy_array[:, :, 0], title="Occupancy for events between " + time.strftime("%H:%M:%S", time.localtime(parameter_range[0])) + " and " + time.strftime("%H:%M:%S", time.localtime(parameter_range[1])), filename=output_pdf, ) progress_bar.finish() plotting.plot_scatter( [i * 250 for i in x], [i * 50 for i in y], title="Mean beam position", x_label="x [um]", y_label="y [um]", marker_style="-o", filename=output_pdf, ) if output_file: with tb.openFile(output_file, mode="a") as out_file_h5: rec_array = np.array(zip(time_stamp, x, y), dtype=[("time_stamp", float), ("x", float), ("y", float)]) try: beam_spot_table = out_file_h5.createTable( out_file_h5.root, name="Beamspot", description=rec_array, title="Beam spot position", filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False), ) beam_spot_table[:] = rec_array except tb.exceptions.NodeError: logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.") return time_stamp, x, y