示例#1
0
 def test_analysis_utils_get_n_cluster_in_events(self):  # check compiled get_n_cluster_in_events function
     event_numbers = np.array(
         [
             [0, 0, 1, 2, 2, 2, 4, 4000000000, 4000000000, 40000000000, 40000000000],
             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
         ],
         dtype=np.int64,
     )  # use data format with non linear memory alignment
     result = analysis_utils.get_n_cluster_in_events(event_numbers[0])
     self.assertListEqual([0, 1, 2, 4, 4000000000, 40000000000], result[:, 0].tolist())
     self.assertListEqual([2, 1, 3, 1, 2, 2], result[:, 1].tolist())
示例#2
0
 def test_analysis_utils_get_n_cluster_in_events(
         self):  # check compiled get_n_cluster_in_events function
     event_numbers = np.array(
         [[
             0, 0, 1, 2, 2, 2, 4, 4000000000, 4000000000, 40000000000,
             40000000000
         ], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
         dtype=np.int64)  # use data format with non linear memory alignment
     result = analysis_utils.get_n_cluster_in_events(event_numbers[0])
     self.assertListEqual([0, 1, 2, 4, 4000000000, 40000000000],
                          result[:, 0].tolist())
     self.assertListEqual([2, 1, 3, 1, 2, 2], result[:, 1].tolist())
示例#3
0
def analyse_n_cluster_per_event(scan_base,
                                include_no_cluster=False,
                                time_line_absolute=True,
                                combine_n_readouts=1000,
                                chunk_size=10000000,
                                plot_n_cluster_hists=False,
                                output_pdf=None,
                                output_file=None):
    ''' Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    '''

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.open_file(data_file + '_interpreted.h5',
                          mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (analysis_utils.get_ranges_from_array(
                    meta_data_array['timestamp_start'][::combine_n_readouts]),
                 analysis_utils.get_ranges_from_array(
                     meta_data_array['event_number'][::combine_n_readouts])))

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
                                                   maxval=total_cluster,
                                                   term_width=80)
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(
                    parameter_ranges):
                logging.debug('Analyze time stamp ' + str(parameter_range[0]) +
                              ' and data from events = [' +
                              str(parameter_range[2]) + ',' +
                              str(parameter_range[3]) + '[ ' + str(
                                  int(
                                      float(
                                          float(parameter_index) /
                                          float(len(parameter_ranges)) *
                                          100.0))) + '%')
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                        cluster_table,
                        start_event_number=parameter_range[2],
                        stop_event_number=parameter_range[3],
                        start_index=index,
                        chunk_size=best_chunk_size):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(
                        clusters['event_number']
                    )[:,
                      1]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event,
                                            bins=10,
                                            range=(0, 10))[0]
                    else:
                        hist = np.add(
                            hist,
                            np.histogram(n_cluster_per_event,
                                         bins=10,
                                         range=(0, 10))[0])
                    if include_no_cluster and parameter_range[
                            3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] -
                                   parameter_range[2]) - len(
                                       n_cluster_per_event
                                   )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = int(1.5 * readout_cluster_len) if int(
                    1.05 * readout_cluster_len
                ) < chunk_size else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title='Number of cluster per event at ' +
                        str(parameter_range[0]),
                        x_axis_title='Number of cluster',
                        y_axis_title='#',
                        log_y=True,
                        filename=output_pdf)
                hist = hist.astype('f4') / np.sum(
                    hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning(
                    'Not all clusters were selected during analysis. Analysis is therefore not exact'
                )

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            x_label='time [min.]',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    if output_file:
        with tb.open_file(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(zip(time_stamp, cluster_array[:, 0],
                                     cluster_array[:, 1], cluster_array[:, 2],
                                     cluster_array[:, 3], cluster_array[:, 4],
                                     cluster_array[:, 5]),
                                 dtype=[('time_stamp', float),
                                        ('cluster_0', float),
                                        ('cluster_1', float),
                                        ('cluster_2', float),
                                        ('cluster_3', float),
                                        ('cluster_4', float),
                                        ('cluster_5', float)
                                        ]).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.create_table(
                    out_file_h5.root,
                    name='n_cluster',
                    description=rec_array,
                    title='Cluster per event',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(
                    output_file +
                    ' has already a Beamspot note, do not overwrite existing.')
    return time_stamp, n_cluster
示例#4
0
文件: analysis.py 项目: makoc/pyBAR
def analyse_n_cluster_per_event(
    scan_base,
    include_no_cluster=False,
    time_line_absolute=True,
    combine_n_readouts=1000,
    chunk_size=10000000,
    plot_n_cluster_hists=False,
    output_pdf=None,
    output_file=None,
):
    """ Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    """

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (
                    analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]),
                    analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]),
                )
            )

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=total_cluster,
                term_width=80,
            )
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(parameter_ranges):
                logging.debug(
                    "Analyze time stamp "
                    + str(parameter_range[0])
                    + " and data from events = ["
                    + str(parameter_range[2])
                    + ","
                    + str(parameter_range[3])
                    + "[ "
                    + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                    + "%"
                )
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = (
                    0
                )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                    cluster_table,
                    start_event_number=parameter_range[2],
                    stop_event_number=parameter_range[3],
                    start=index,
                    chunk_size=best_chunk_size,
                ):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(clusters["event_number"])[
                        :, 1
                    ]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0]
                    else:
                        hist = np.add(hist, np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0])
                    if include_no_cluster and parameter_range[3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] - parameter_range[2]) - len(
                            n_cluster_per_event
                        )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = (
                    int(1.5 * readout_cluster_len) if int(1.05 * readout_cluster_len) < chunk_size else chunk_size
                )  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title="Number of cluster per event at " + str(parameter_range[0]),
                        x_axis_title="Number of cluster",
                        y_axis_title="#",
                        log_y=True,
                        filename=output_pdf,
                    )
                hist = hist.astype("f4") / np.sum(hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning("Not all clusters were selected during analysis. Analysis is therefore not exact")

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            x_label="time [min.]",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    if output_file:
        with tb.openFile(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(
                zip(
                    time_stamp,
                    cluster_array[:, 0],
                    cluster_array[:, 1],
                    cluster_array[:, 2],
                    cluster_array[:, 3],
                    cluster_array[:, 4],
                    cluster_array[:, 5],
                ),
                dtype=[
                    ("time_stamp", float),
                    ("cluster_0", float),
                    ("cluster_1", float),
                    ("cluster_2", float),
                    ("cluster_3", float),
                    ("cluster_4", float),
                    ("cluster_5", float),
                ],
            ).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.createTable(
                    out_file_h5.root,
                    name="n_cluster",
                    description=rec_array,
                    title="Cluster per event",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.")
    return time_stamp, n_cluster