Пример #1
0
def analyze_hits_per_scan_parameter(analyze_data,
                                    scan_parameters=None,
                                    chunk_size=50000):
    '''Takes the hit table and analyzes the hits per scan parameter

    Parameters
    ----------
    analyze_data : analysis.analyze_raw_data.AnalyzeRawData object with an opened hit file (AnalyzeRawData.out_file_h5) or a
    file name with the hit data given (AnalyzeRawData._analyzed_data_file)
    scan_parameters : list of strings:
        The names of the scan parameters to use
    chunk_size : int:
        The chunk size of one hit table read. The bigger the faster. Too big causes memory errors.
    Returns
    -------
    yields the analysis.analyze_raw_data.AnalyzeRawData for each scan parameter
    '''

    if analyze_data.out_file_h5 is None or analyze_data.out_file_h5.isopen == 0:
        in_hit_file_h5 = tb.open_file(analyze_data._analyzed_data_file, 'r+')
        close_file = True
    else:
        in_hit_file_h5 = analyze_data.out_file_h5
        close_file = False

    meta_data = in_hit_file_h5.root.meta_data[:]  # get the meta data table
    try:
        hit_table = in_hit_file_h5.root.Hits  # get the hit table
    except tb.NoSuchNodeError:
        logging.error(
            'analyze_hits_per_scan_parameter needs a hit table, but no hit table found.'
        )
        return

    meta_data_table_at_scan_parameter = analysis_utils.get_unique_scan_parameter_combinations(
        meta_data, scan_parameters=scan_parameters)
    parameter_values = analysis_utils.get_scan_parameters_table_from_meta_data(
        meta_data_table_at_scan_parameter, scan_parameters)
    event_number_ranges = analysis_utils.get_ranges_from_array(
        meta_data_table_at_scan_parameter['event_number']
    )  # get the event number ranges for the different scan parameter settings

    analysis_utils.index_event_number(
        hit_table
    )  # create a event_numer index to select the hits by their event number fast, no needed but important for speed up

    # variables for read speed up
    index = 0  # index where to start the read out of the hit table, 0 at the beginning, increased during looping
    best_chunk_size = chunk_size  # number of hits to copy to RAM during looping, the optimal chunk size is determined during looping

    # loop over the selected events
    for parameter_index, (start_event_number,
                          stop_event_number) in enumerate(event_number_ranges):
        logging.info('Analyze hits for ' + str(scan_parameters) + ' = ' +
                     str(parameter_values[parameter_index]))
        analyze_data.reset(
        )  # resets the front end data of the last analysis step but not the options
        readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
        # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
        for hits, index in analysis_utils.data_aligned_at_events(
                hit_table,
                start_event_number=start_event_number,
                stop_event_number=stop_event_number,
                start_index=index,
                chunk_size=best_chunk_size):
            analyze_data.analyze_hits(
                hits,
                scan_parameter=False)  # analyze the selected hits in chunks
            readout_hit_len += hits.shape[0]
        best_chunk_size = int(
            1.5 * readout_hit_len
        ) if int(1.05 * readout_hit_len) < chunk_size and int(
            1.05 * readout_hit_len
        ) > 1e3 else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction
        file_name = " ".join(re.findall(
            "[a-zA-Z0-9]+", str(scan_parameters))) + '_' + " ".join(
                re.findall("[a-zA-Z0-9]+",
                           str(parameter_values[parameter_index])))
        analyze_data._create_additional_hit_data(safe_to_file=False)
        analyze_data._create_additional_cluster_data(safe_to_file=False)
        yield analyze_data, file_name

    if close_file:
        in_hit_file_h5.close()
Пример #2
0
def create_hitor_calibration(output_filename):
    logging.info('Analyze and plot results of %s', output_filename)

    def plot_calibration(col_row_combinations, scan_parameter,
                         calibration_data,
                         filename):  # Result calibration plot function
        for index, (column, row) in enumerate(col_row_combinations):
            logging.info("Plot calibration for pixel " + str(column) + '/' +
                         str(row))
            fig = Figure()
            FigureCanvas(fig)
            ax = fig.add_subplot(111)
            fig.patch.set_facecolor('white')
            ax.grid(True)
            ax.errorbar(scan_parameter,
                        calibration_data[column - 1, row - 1, :, 0] * 25. +
                        25.,
                        yerr=[
                            calibration_data[column - 1, row - 1, :, 2] * 25,
                            calibration_data[column - 1, row - 1, :, 2] * 25
                        ],
                        fmt='o',
                        label='FE-I4 ToT [ns]')
            ax.errorbar(
                scan_parameter,
                calibration_data[column - 1, row - 1, :, 1] * 1.5625,
                yerr=[
                    calibration_data[column - 1, row - 1, :, 3] * 1.5625,
                    calibration_data[column - 1, row - 1, :, 3] * 1.5625
                ],
                fmt='o',
                label='TDC ToT [ns]')
            ax.set_title('Calibration for pixel ' + str(column) + '/' +
                         str(row))
            ax.set_xlabel('Charge [PlsrDAC]')
            ax.set_ylabel('TOT')
            ax.legend(loc=0)
            filename.savefig(fig)
            if index > 100:  # stop for too many plots
                logging.info(
                    'Do not create pixel plots for more than 100 pixels to safe time'
                )
                break

    with AnalyzeRawData(raw_data_file=output_filename, create_pdf=True
                        ) as analyze_raw_data:  # Interpret the raw data file
        analyze_raw_data.create_occupancy_hist = False  # too many scan parameters to do in ram histograming
        analyze_raw_data.create_hit_table = True
        analyze_raw_data.create_tdc_hist = True
        analyze_raw_data.align_at_tdc = True  # align events at TDC words, first word of event has to be a tdc word
        analyze_raw_data.interpret_word_table()
        analyze_raw_data.interpreter.print_summary()
        analyze_raw_data.plot_histograms()
        n_injections = analyze_raw_data.n_injections  # store number of injections for later cross check

    with tb.open_file(
            output_filename + '_interpreted.h5',
            'r') as in_file_h5:  # Get scan parameters from interpreted file
        meta_data = in_file_h5.root.meta_data[:]
        hits = in_file_h5.root.Hits[:]
        scan_parameters_dict = get_scan_parameter(meta_data)
        inner_loop_parameter_values = scan_parameters_dict[next(
            reversed(
                scan_parameters_dict))]  # inner loop parameter name is unknown
        scan_parameter_names = scan_parameters_dict.keys()
        col_row_combinations = get_unique_scan_parameter_combinations(
            in_file_h5.root.meta_data[:],
            scan_parameters=('column', 'row'),
            scan_parameter_columns_only=True)

        meta_data_table_at_scan_parameter = get_unique_scan_parameter_combinations(
            meta_data, scan_parameters=scan_parameter_names)
        parameter_values = get_scan_parameters_table_from_meta_data(
            meta_data_table_at_scan_parameter, scan_parameter_names)
        event_number_ranges = get_ranges_from_array(
            meta_data_table_at_scan_parameter['event_number'])
        event_ranges_per_parameter = np.column_stack(
            (parameter_values, event_number_ranges))
        event_numbers = hits['event_number'].copy(
        )  # create contigous array, otherwise np.searchsorted too slow, http://stackoverflow.com/questions/15139299/performance-of-numpy-searchsorted-is-poor-on-structured-arrays

        with tb.openFile(output_filename + "_calibration.h5",
                         mode="w") as calibration_data_file:
            logging.info('Create calibration')
            output_pdf = PdfPages(output_filename + "_calibration.pdf")
            calibration_data = np.zeros(
                shape=(80, 336, len(inner_loop_parameter_values), 4),
                dtype='f4'
            )  # result of the calibration is a histogram with col_index, row_index, plsrDAC value, mean discrete tot, rms discrete tot, mean tot from TDC, rms tot from TDC

            progress_bar = progressbar.ProgressBar(
                widgets=[
                    '',
                    progressbar.Percentage(), ' ',
                    progressbar.Bar(marker='*', left='|', right='|'), ' ',
                    progressbar.AdaptiveETA()
                ],
                maxval=len(event_ranges_per_parameter),
                term_width=80)
            progress_bar.start()

            for index, (parameter_values, event_start,
                        event_stop) in enumerate(event_ranges_per_parameter):
                if event_stop is None:  # happens for the last chunk
                    event_stop = hits[-1]['event_number']
                array_index = np.searchsorted(
                    event_numbers, np.array([event_start, event_stop]))
                actual_hits = hits[array_index[0]:array_index[1]]
                actual_col, actual_row, parameter_value = parameter_values

                if len(hits[np.logical_and(actual_hits['column'] != actual_col,
                                           actual_hits['row'] != actual_row)]):
                    logging.warning(
                        'There are %d hits from not selected pixels in the data',
                        len(actual_hits[np.logical_and(
                            actual_hits['column'] != actual_col,
                            actual_hits['row'] != actual_row)]))

                actual_hits = actual_hits[np.logical_and(
                    actual_hits['column'] == actual_col,
                    actual_hits['row'] == actual_row)]
                actual_tdc_hits = actual_hits[
                    (actual_hits['event_status'] & 0b0000111110011100) ==
                    0b0000000100000000]  # only take hits from good events (one TDC word only, no error)
                actual_tot_hits = actual_hits[
                    (actual_hits['event_status'] & 0b0000100010011100) ==
                    0b0000000000000000]  # only take hits from good events for tot
                tot, tdc = actual_tot_hits['tot'], actual_tdc_hits['TDC']

                if tdc.shape[
                        0] != n_injections and index == event_ranges_per_parameter.shape[
                            0] - 1:
                    logging.warning('There are %d != %d TDC hits for %s = %s',
                                    tdc.shape[0], n_injections,
                                    str(scan_parameter_names),
                                    str(parameter_values))

                inner_loop_scan_parameter_index = np.where(
                    parameter_value == inner_loop_parameter_values
                )[0][
                    0]  # translate the scan parameter value to an index for the result histogram
                calibration_data[actual_col - 1, actual_row - 1,
                                 inner_loop_scan_parameter_index,
                                 0] = np.mean(tot)
                calibration_data[actual_col - 1, actual_row - 1,
                                 inner_loop_scan_parameter_index,
                                 1] = np.mean(tdc)
                calibration_data[actual_col - 1, actual_row - 1,
                                 inner_loop_scan_parameter_index,
                                 2] = np.std(tot)
                calibration_data[actual_col - 1, actual_row - 1,
                                 inner_loop_scan_parameter_index,
                                 3] = np.std(tdc)

                progress_bar.update(index)

            calibration_data_out = calibration_data_file.createCArray(
                calibration_data_file.root,
                name='HitOrCalibration',
                title='Hit OR calibration data',
                atom=tb.Atom.from_dtype(calibration_data.dtype),
                shape=calibration_data.shape,
                filters=tb.Filters(complib='blosc',
                                   complevel=5,
                                   fletcher32=False))
            calibration_data_out[:] = calibration_data
            calibration_data_out.attrs.dimensions = scan_parameter_names
            calibration_data_out.attrs.scan_parameter_values = inner_loop_parameter_values
            plot_calibration(col_row_combinations,
                             scan_parameter=inner_loop_parameter_values,
                             calibration_data=calibration_data,
                             filename=output_pdf)
            output_pdf.close()
            progress_bar.finish()
Пример #3
0
def histogram_cluster_table(analyzed_data_file,
                            output_file,
                            chunk_size=10000000):
    '''Reads in the cluster info table in chunks and histograms the seed pixels into one occupancy array.
    The 3rd dimension of the occupancy array is the number of different scan parameters used

    Parameters
    ----------
    analyzed_data_file : string
        HDF5 filename of the file containing the cluster table. If a scan parameter is given in the meta data, the occupancy histogramming is done per scan parameter step.

    Returns
    -------
    occupancy_array: numpy.array with dimensions (col, row, #scan_parameter)
    '''

    with tb.open_file(analyzed_data_file, mode="r") as in_file_h5:
        with tb.open_file(output_file, mode="w") as out_file_h5:
            histogram = PyDataHistograming()
            histogram.create_occupancy_hist(True)
            scan_parameters = None
            event_number_indices = None
            scan_parameter_indices = None
            try:
                meta_data = in_file_h5.root.meta_data[:]
                scan_parameters = analysis_utils.get_unique_scan_parameter_combinations(
                    meta_data)
                if scan_parameters is not None:
                    scan_parameter_indices = np.array(range(
                        0, len(scan_parameters)),
                                                      dtype='u4')
                    event_number_indices = np.ascontiguousarray(
                        scan_parameters['event_number']).astype(np.uint64)
                    histogram.add_meta_event_index(
                        event_number_indices,
                        array_length=len(scan_parameters['event_number']))
                    histogram.add_scan_parameter(scan_parameter_indices)
                    logging.info(
                        "Add %d different scan parameter(s) for analysis",
                        len(scan_parameters))
                else:
                    logging.info("No scan parameter data provided")
                    histogram.set_no_scan_parameter()
            except tb.exceptions.NoSuchNodeError:
                logging.info("No meta data provided, use no scan parameter")
                histogram.set_no_scan_parameter()

            logging.info('Histogram cluster seeds...')
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    '',
                    progressbar.Percentage(), ' ',
                    progressbar.Bar(marker='*', left='|', right='|'), ' ',
                    progressbar.AdaptiveETA()
                ],
                maxval=in_file_h5.root.Cluster.shape[0],
                term_width=80)
            progress_bar.start()
            total_cluster = 0  # to check analysis
            for cluster, index in analysis_utils.data_aligned_at_events(
                    in_file_h5.root.Cluster, chunk_size=chunk_size):
                total_cluster += len(cluster)
                histogram.add_cluster_seed_hits(cluster, len(cluster))
                progress_bar.update(index)
            progress_bar.finish()

            filter_table = tb.Filters(
                complib='blosc', complevel=5,
                fletcher32=False)  # compression of the written data
            occupancy_array = histogram.get_occupancy().T
            occupancy_array_table = out_file_h5.create_carray(
                out_file_h5.root,
                name='HistOcc',
                title='Occupancy Histogram',
                atom=tb.Atom.from_dtype(occupancy_array.dtype),
                shape=occupancy_array.shape,
                filters=filter_table)
            occupancy_array_table[:] = occupancy_array

            if total_cluster != np.sum(occupancy_array):
                logging.warning(
                    'Analysis shows inconsistent number of cluster used. Check needed!'
                )
            in_file_h5.root.meta_data.copy(
                out_file_h5.root)  # copy meta_data note to new file
Пример #4
0
    def analyze(self):
        logging.info('Analyze and plot results')

        def plot_calibration(col_row_combinations, scan_parameter,
                             calibration_data, repeat_command,
                             filename):  # Result calibration plot function
            for index, (column, row) in enumerate(col_row_combinations):
                logging.info("Plot calibration for pixel " + str(column) +
                             '/' + str(row))
                fig = Figure()
                canvas = FigureCanvas(fig)
                ax = fig.add_subplot(111)
                fig.patch.set_facecolor('white')
                ax.grid(True)
                ax.errorbar(
                    scan_parameter,
                    calibration_data[column - 1, row - 1, :, 0] * 25. + 25.,
                    yerr=[
                        calibration_data[column - 1, row - 1, :, 2] * 25,
                        calibration_data[column - 1, row - 1, :, 2] * 25
                    ],
                    fmt='o',
                    label='FE-I4 ToT [ns]')
                ax.errorbar(
                    scan_parameter,
                    calibration_data[column - 1, row - 1, :, 1] * 1.5625,
                    yerr=[
                        calibration_data[column - 1, row - 1, :, 3] * 1.5625,
                        calibration_data[column - 1, row - 1, :, 3] * 1.5625
                    ],
                    fmt='o',
                    label='TDC ToT [ns]')
                ax.set_title('Calibration for pixel ' + str(column) + '/' +
                             str(row) + '; ' + str(repeat_command) +
                             ' injections per setting')
                ax.set_xlabel('Charge [PlsrDAC]')
                ax.set_ylabel('TOT')
                ax.legend(loc=0)
                filename.savefig(fig)
                if index > 100:  # stop for too many plots
                    break

        with AnalyzeRawData(
                raw_data_file=self.output_filename, create_pdf=True
        ) as analyze_raw_data:  # Interpret the raw data file
            analyze_raw_data.create_occupancy_hist = False  # too many scan parameters to do in ram histograming
            analyze_raw_data.create_hit_table = True
            analyze_raw_data.create_tdc_hist = True
            analyze_raw_data.interpreter.use_tdc_word(
                True
            )  # align events at TDC words, first word of event has to be a tdc word
            analyze_raw_data.interpret_word_table()
            analyze_raw_data.interpreter.print_summary()
            analyze_raw_data.plot_histograms()

        with tb.open_file(
                self.output_filename + '_interpreted.h5', 'r'
        ) as in_file_h5:  # Get scan parameters from interpreted file
            scan_parameters_dict = get_scan_parameter(
                in_file_h5.root.meta_data[:])
            inner_loop_parameter_values = scan_parameters_dict[next(
                reversed(scan_parameters_dict)
            )]  # inner loop parameter name is unknown
            scan_parameter_names = scan_parameters_dict.keys()
            n_par_combinations = len(
                get_unique_scan_parameter_combinations(
                    in_file_h5.root.meta_data[:]))
            col_row_combinations = get_unique_scan_parameter_combinations(
                in_file_h5.root.meta_data[:],
                scan_parameters=('column', 'row'),
                scan_parameter_columns_only=True)

        with tb.openFile(self.output_filename + "_calibration.h5",
                         mode="w") as calibration_data_file:
            logging.info('Create calibration')
            output_pdf = PdfPages(self.output_filename + "_calibration.pdf")
            calibration_data = np.zeros(
                shape=(80, 336, len(inner_loop_parameter_values), 4),
                dtype='f4'
            )  # result of the calibration is a histogram with col_index, row_index, plsrDAC value, mean discrete tot, rms discrete tot, mean tot from TDC, rms tot from TDC

            progress_bar = progressbar.ProgressBar(widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
                                                   maxval=n_par_combinations,
                                                   term_width=80)
            old_scan_parameters = None
            tot_data = None
            tdc_data = None

            for index, (actual_scan_parameters, hits) in enumerate(
                    get_hits_of_scan_parameter(self.output_filename +
                                               '_interpreted.h5',
                                               scan_parameter_names,
                                               chunk_size=1.5e7)):
                if index == 0:
                    progress_bar.start(
                    )  # start after the event index is created to get reasonable ETA

                actual_col, actual_row, _ = actual_scan_parameters

                if len(hits[np.logical_and(hits['column'] != actual_col,
                                           hits['row'] != actual_row)]):
                    logging.warning(
                        'There are %d hits from not selected pixels in the data'
                        % len(hits[np.logical_and(hits['column'] != actual_col,
                                                  hits['row'] != actual_row)]))

                hits = hits[
                    (hits['event_status'] & 0b0000011110001000) ==
                    0b0000000100000000]  # only take hits from good events (one TDC word only, no error)
                column, row, tot, tdc = hits['column'], hits['row'], hits[
                    'tot'], hits['TDC']

                if old_scan_parameters != actual_scan_parameters:  # Store the data of the actual PlsrDAC value
                    if old_scan_parameters:  # Special case for the first PlsrDAC setting
                        inner_loop_scan_parameter_index = np.where(
                            old_scan_parameters[-1] ==
                            inner_loop_parameter_values
                        )[0][
                            0]  # translate the scan parameter value to an index for the result histogram
                        calibration_data[column - 1, row - 1,
                                         inner_loop_scan_parameter_index,
                                         0] = np.mean(tot_data)
                        calibration_data[column - 1, row - 1,
                                         inner_loop_scan_parameter_index,
                                         1] = np.mean(tdc_data)
                        calibration_data[column - 1, row - 1,
                                         inner_loop_scan_parameter_index,
                                         2] = np.std(tot_data)
                        calibration_data[column - 1, row - 1,
                                         inner_loop_scan_parameter_index,
                                         3] = np.std(tdc_data)
                        progress_bar.update(index)
                    tot_data = np.array(tot)
                    tdc_data = np.array(tdc)
                    old_scan_parameters = actual_scan_parameters
                else:
                    np.concatenate((tot_data, tot))
                    np.concatenate((tdc_data, tdc))

            else:
                inner_loop_scan_parameter_index = np.where(
                    old_scan_parameters[-1] == inner_loop_parameter_values
                )[0][
                    0]  # translate the scan parameter value to an index for the result histogram
                calibration_data[column - 1, row - 1,
                                 inner_loop_scan_parameter_index,
                                 0] = np.mean(tot_data)
                calibration_data[column - 1, row - 1,
                                 inner_loop_scan_parameter_index,
                                 1] = np.mean(tdc_data)
                calibration_data[column - 1, row - 1,
                                 inner_loop_scan_parameter_index,
                                 2] = np.std(tot_data)
                calibration_data[column - 1, row - 1,
                                 inner_loop_scan_parameter_index,
                                 3] = np.std(tdc_data)

            calibration_data_out = calibration_data_file.createCArray(
                calibration_data_file.root,
                name='HitOrCalibration',
                title='Hit OR calibration data',
                atom=tb.Atom.from_dtype(calibration_data.dtype),
                shape=calibration_data.shape,
                filters=tb.Filters(complib='blosc',
                                   complevel=5,
                                   fletcher32=False))
            calibration_data_out[:] = calibration_data
            calibration_data_out.attrs.dimensions = scan_parameter_names
            calibration_data_out.attrs.scan_parameter_values = inner_loop_parameter_values
            plot_calibration(col_row_combinations,
                             scan_parameter=inner_loop_parameter_values,
                             calibration_data=calibration_data,
                             repeat_command=self.repeat_command,
                             filename=output_pdf)
            output_pdf.close()
            progress_bar.finish()
Пример #5
0
def analyze_hits_per_scan_parameter(analyze_data, scan_parameters=None, chunk_size=50000):
    """Takes the hit table and analyzes the hits per scan parameter

    Parameters
    ----------
    analyze_data : analysis.analyze_raw_data.AnalyzeRawData object with an opened hit file (AnalyzeRawData.out_file_h5) or a
    file name with the hit data given (AnalyzeRawData._analyzed_data_file)
    scan_parameters : list of strings:
        The names of the scan parameters to use
    chunk_size : int:
        The chunk size of one hit table read. The bigger the faster. Too big causes memory errors.
    Returns
    -------
    yields the analysis.analyze_raw_data.AnalyzeRawData for each scan parameter
    """

    if analyze_data.out_file_h5 is None or analyze_data.out_file_h5.isopen == 0:
        in_hit_file_h5 = tb.open_file(analyze_data._analyzed_data_file, "r+")
        opened_file = True
    else:
        in_hit_file_h5 = analyze_data.out_file_h5
        opened_file = False

    meta_data = in_hit_file_h5.root.meta_data[:]  # get the meta data table
    try:
        hit_table = in_hit_file_h5.root.Hits  # get the hit table
    except tb.NoSuchNodeError:
        logging.error("analyze_hits_per_scan_parameter needs a hit table, but no hit table found.")
        return

    meta_data_table_at_scan_parameter = analysis_utils.get_unique_scan_parameter_combinations(
        meta_data, scan_parameters=scan_parameters
    )
    parameter_values = analysis_utils.get_scan_parameters_table_from_meta_data(
        meta_data_table_at_scan_parameter, scan_parameters
    )
    event_number_ranges = analysis_utils.get_ranges_from_array(
        meta_data_table_at_scan_parameter["event_number"]
    )  # get the event number ranges for the different scan parameter settings

    analysis_utils.index_event_number(
        hit_table
    )  # create a event_numer index to select the hits by their event number fast, no needed but important for speed up

    # variables for read speed up
    index = 0  # index where to start the read out of the hit table, 0 at the beginning, increased during looping
    best_chunk_size = (
        chunk_size
    )  # number of hits to copy to RAM during looping, the optimal chunk size is determined during looping

    # loop over the selected events
    for parameter_index, (start_event_number, stop_event_number) in enumerate(event_number_ranges):
        logging.info("Analyze hits for " + str(scan_parameters) + " = " + str(parameter_values[parameter_index]))
        analyze_data.reset()  # resets the front end data of the last analysis step but not the options
        readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
        # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
        for hits, index in analysis_utils.data_aligned_at_events(
            hit_table,
            start_event_number=start_event_number,
            stop_event_number=stop_event_number,
            start=index,
            chunk_size=best_chunk_size,
        ):
            analyze_data.analyze_hits(hits, scan_parameter=False)  # analyze the selected hits in chunks
            readout_hit_len += hits.shape[0]
        best_chunk_size = (
            int(1.5 * readout_hit_len)
            if int(1.05 * readout_hit_len) < chunk_size and int(1.05 * readout_hit_len) > 1e3
            else chunk_size
        )  # to increase the readout speed, estimated the number of hits for one read instruction
        file_name = (
            " ".join(re.findall("[a-zA-Z0-9]+", str(scan_parameters)))
            + "_"
            + " ".join(re.findall("[a-zA-Z0-9]+", str(parameter_values[parameter_index])))
        )
        analyze_data._create_additional_hit_data(safe_to_file=False)
        analyze_data._create_additional_cluster_data(safe_to_file=False)
        yield analyze_data, file_name

    if opened_file:
        in_hit_file_h5.close()
Пример #6
0
def histogram_cluster_table(analyzed_data_file, output_file, chunk_size=10000000):
    """Reads in the cluster info table in chunks and histograms the seed pixels into one occupancy array.
    The 3rd dimension of the occupancy array is the number of different scan parameters used

    Parameters
    ----------
    analyzed_data_file : hdf5 file containing the cluster table. If a scan parameter is given in the meta data the occupancy
                        histograming is done per scan parameter.
    Returns
    -------
    occupancy_array: numpy.array with dimensions (col, row, #scan_parameter)
    """

    with tb.openFile(analyzed_data_file, mode="r") as in_file_h5:
        with tb.openFile(output_file, mode="w") as out_file_h5:
            histograming = PyDataHistograming()
            histograming.create_occupancy_hist(True)
            scan_parameters = None
            event_number_indices = None
            scan_parameter_indices = None
            try:
                meta_data = in_file_h5.root.meta_data[:]
                scan_parameters = analysis_utils.get_unique_scan_parameter_combinations(meta_data)
                if scan_parameters is not None:
                    scan_parameter_indices = np.array(range(0, len(scan_parameters)), dtype="u4")
                    event_number_indices = np.ascontiguousarray(scan_parameters["event_number"]).astype(np.uint64)
                    histograming.add_meta_event_index(
                        event_number_indices, array_length=len(scan_parameters["event_number"])
                    )
                    histograming.add_scan_parameter(scan_parameter_indices)
                    logging.info("Add %d different scan parameter(s) for analysis", len(scan_parameters))
                else:
                    logging.info("No scan parameter data provided")
                    histograming.set_no_scan_parameter()
            except tb.exceptions.NoSuchNodeError:
                logging.info("No meta data provided, use no scan parameter")
                histograming.set_no_scan_parameter()

            logging.info("Histogram cluster seeds...")
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=in_file_h5.root.Cluster.shape[0],
                term_width=80,
            )
            progress_bar.start()
            total_cluster = 0  # to check analysis
            for cluster, index in analysis_utils.data_aligned_at_events(in_file_h5.root.Cluster, chunk_size=chunk_size):
                total_cluster += len(cluster)
                histograming.add_cluster_seed_hits(cluster, len(cluster))
                progress_bar.update(index)
            progress_bar.finish()

            filter_table = tb.Filters(complib="blosc", complevel=5, fletcher32=False)  # compression of the written data
            occupancy_array = histograming.get_occupancy().T
            occupancy_array_table = out_file_h5.createCArray(
                out_file_h5.root,
                name="HistOcc",
                title="Occupancy Histogram",
                atom=tb.Atom.from_dtype(occupancy_array.dtype),
                shape=occupancy_array.shape,
                filters=filter_table,
            )
            occupancy_array_table[:] = occupancy_array

            if total_cluster != np.sum(occupancy_array):
                logging.warning("Analysis shows inconsistent number of cluster used. Check needed!")
            in_file_h5.root.meta_data.copy(out_file_h5.root)  # copy meta_data note to new file
Пример #7
0
def create_hitor_calibration(output_filename, plot_pixel_calibrations=False):
    '''Generating HitOr calibration file (_calibration.h5) from raw data file and plotting of calibration data.

    Parameters
    ----------
    output_filename : string
        Input raw data file name.
    plot_pixel_calibrations : bool, iterable
        If True, genearating additional pixel calibration plots. If list of column and row tuples (from 1 to 80 / 336), print selected pixels.

    Returns
    -------
    nothing
    '''
    logging.info('Analyze HitOR calibration data and plot results of %s',
                 output_filename)

    with AnalyzeRawData(raw_data_file=output_filename, create_pdf=True
                        ) as analyze_raw_data:  # Interpret the raw data file
        analyze_raw_data.create_occupancy_hist = False  # too many scan parameters to do in ram histogramming
        analyze_raw_data.create_hit_table = True
        analyze_raw_data.create_tdc_hist = True
        analyze_raw_data.align_at_tdc = True  # align events at TDC words, first word of event has to be a tdc word
        analyze_raw_data.interpret_word_table()
        analyze_raw_data.interpreter.print_summary()
        analyze_raw_data.plot_histograms()
        n_injections = analyze_raw_data.n_injections  # use later

        with tb.open_file(
                analyze_raw_data._analyzed_data_file, 'r'
        ) as in_file_h5:  # Get scan parameters from interpreted file
            meta_data = in_file_h5.root.meta_data[:]
            scan_parameters_dict = get_scan_parameter(meta_data)
            inner_loop_parameter_values = scan_parameters_dict[next(
                reversed(scan_parameters_dict)
            )]  # inner loop parameter name is unknown
            scan_parameter_names = scan_parameters_dict.keys()
            #             col_row_combinations = get_unique_scan_parameter_combinations(in_file_h5.root.meta_data[:], scan_parameters=('column', 'row'), scan_parameter_columns_only=True)

            meta_data_table_at_scan_parameter = get_unique_scan_parameter_combinations(
                meta_data, scan_parameters=scan_parameter_names)
            scan_parameter_values = get_scan_parameters_table_from_meta_data(
                meta_data_table_at_scan_parameter, scan_parameter_names)
            event_number_ranges = get_ranges_from_array(
                meta_data_table_at_scan_parameter['event_number'])
            event_ranges_per_parameter = np.column_stack(
                (scan_parameter_values, event_number_ranges))
            hits = in_file_h5.root.Hits[:]
            event_numbers = hits['event_number'].copy(
            )  # create contigous array, otherwise np.searchsorted too slow, http://stackoverflow.com/questions/15139299/performance-of-numpy-searchsorted-is-poor-on-structured-arrays

            output_filename = os.path.splitext(output_filename)[0]
            with tb.open_file(output_filename + "_calibration.h5",
                              mode="w") as calibration_data_file:
                logging.info('Create calibration')
                calibration_data = np.full(
                    shape=(80, 336, len(inner_loop_parameter_values), 4),
                    fill_value=np.nan,
                    dtype='f4'
                )  # result of the calibration is a histogram with col_index, row_index, plsrDAC value, mean discrete tot, rms discrete tot, mean tot from TDC, rms tot from TDC

                progress_bar = progressbar.ProgressBar(
                    widgets=[
                        '',
                        progressbar.Percentage(), ' ',
                        progressbar.Bar(marker='*', left='|', right='|'), ' ',
                        progressbar.AdaptiveETA()
                    ],
                    maxval=len(event_ranges_per_parameter),
                    term_width=80)
                progress_bar.start()

                for index, (
                        actual_scan_parameter_values, event_start,
                        event_stop) in enumerate(event_ranges_per_parameter):
                    if event_stop is None:  # happens for the last chunk
                        event_stop = hits[-1]['event_number'] + 1
                    array_index = np.searchsorted(
                        event_numbers, np.array([event_start, event_stop]))
                    actual_hits = hits[array_index[0]:array_index[1]]
                    for item_index, item in enumerate(scan_parameter_names):
                        if item == "column":
                            actual_col = actual_scan_parameter_values[
                                item_index]
                        elif item == "row":
                            actual_row = actual_scan_parameter_values[
                                item_index]
                        elif item == "PlsrDAC":
                            plser_dac = actual_scan_parameter_values[
                                item_index]
                        else:
                            raise ValueError("Unknown scan parameter %s" %
                                             item)

                    # Only pixel of actual column/row should be in the actual data chunk but since SRAM is not cleared for each scan step due to speed reasons and there might be noisy pixels this is not always the case
                    n_wrong_pixel = np.count_nonzero(
                        np.logical_or(actual_hits['column'] != actual_col,
                                      actual_hits['row'] != actual_row))
                    if n_wrong_pixel != 0:
                        logging.warning(
                            '%d hit(s) from other pixels for scan parameters %s',
                            n_wrong_pixel, ', '.join([
                                '%s=%s' % (name, value)
                                for (name, value
                                     ) in zip(scan_parameter_names,
                                              actual_scan_parameter_values)
                            ]))

                    actual_hits = actual_hits[np.logical_and(
                        actual_hits['column'] == actual_col, actual_hits['row']
                        == actual_row)]  # Only take data from selected pixel
                    actual_tdc_hits = actual_hits[
                        (actual_hits['event_status'] & 0b0000111110011100) ==
                        0b0000000100000000]  # only take hits from good events (one TDC word only, no error)
                    actual_tot_hits = actual_hits[
                        (actual_hits['event_status'] & 0b0000100010011100) ==
                        0b0000000000000000]  # only take hits from good events for tot
                    tot, tdc = actual_tot_hits['tot'], actual_tdc_hits['TDC']

                    if tdc.shape[0] < n_injections:
                        logging.info(
                            '%d of %d expected TDC hits for scan parameters %s',
                            tdc.shape[0], n_injections, ', '.join([
                                '%s=%s' % (name, value)
                                for (name, value
                                     ) in zip(scan_parameter_names,
                                              actual_scan_parameter_values)
                            ]))
                    if tot.shape[0] < n_injections:
                        logging.info(
                            '%d of %d expected hits for scan parameters %s',
                            tot.shape[0], n_injections, ', '.join([
                                '%s=%s' % (name, value)
                                for (name, value
                                     ) in zip(scan_parameter_names,
                                              actual_scan_parameter_values)
                            ]))

                    inner_loop_scan_parameter_index = np.where(
                        plser_dac == inner_loop_parameter_values
                    )[0][
                        0]  # translate the scan parameter value to an index for the result histogram
                    # numpy mean and std return nan if array is empty
                    calibration_data[actual_col - 1, actual_row - 1,
                                     inner_loop_scan_parameter_index,
                                     0] = np.mean(tot)
                    calibration_data[actual_col - 1, actual_row - 1,
                                     inner_loop_scan_parameter_index,
                                     1] = np.mean(tdc)
                    calibration_data[actual_col - 1, actual_row - 1,
                                     inner_loop_scan_parameter_index,
                                     2] = np.std(tot)
                    calibration_data[actual_col - 1, actual_row - 1,
                                     inner_loop_scan_parameter_index,
                                     3] = np.std(tdc)

                    progress_bar.update(index)
                progress_bar.finish()

                calibration_data_out = calibration_data_file.create_carray(
                    calibration_data_file.root,
                    name='HitOrCalibration',
                    title='Hit OR calibration data',
                    atom=tb.Atom.from_dtype(calibration_data.dtype),
                    shape=calibration_data.shape,
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                calibration_data_out[:] = calibration_data
                calibration_data_out.attrs.dimensions = scan_parameter_names
                calibration_data_out.attrs.scan_parameter_values = inner_loop_parameter_values
                calibration_data_out.flush()
                #                 with PdfPages(output_filename + "_calibration.pdf") as output_pdf:
                plot_scurves(calibration_data[:, :, :, 0],
                             inner_loop_parameter_values,
                             "ToT calibration",
                             "ToT",
                             15,
                             "Charge [PlsrDAC]",
                             filename=analyze_raw_data.output_pdf)
                plot_scurves(calibration_data[:, :, :, 1],
                             inner_loop_parameter_values,
                             "TDC calibration",
                             "TDC [ns]",
                             None,
                             "Charge [PlsrDAC]",
                             filename=analyze_raw_data.output_pdf)
                tot_mean_all_pix = np.nanmean(calibration_data[:, :, :, 0],
                                              axis=(0, 1))
                tot_error_all_pix = np.nanstd(calibration_data[:, :, :, 0],
                                              axis=(0, 1))
                tdc_mean_all_pix = np.nanmean(calibration_data[:, :, :, 1],
                                              axis=(0, 1))
                tdc_error_all_pix = np.nanstd(calibration_data[:, :, :, 1],
                                              axis=(0, 1))
                plot_tot_tdc_calibration(
                    scan_parameters=inner_loop_parameter_values,
                    tot_mean=tot_mean_all_pix,
                    tot_error=tot_error_all_pix,
                    tdc_mean=tdc_mean_all_pix,
                    tdc_error=tdc_error_all_pix,
                    filename=analyze_raw_data.output_pdf,
                    title="Mean charge calibration of %d pixel(s)" %
                    np.count_nonzero(~np.all(
                        np.isnan(calibration_data[:, :, :, 0]), axis=2)))
                # plotting individual pixels
                if plot_pixel_calibrations is True:
                    # selecting pixels with non-nan entries
                    col_row_non_nan = np.nonzero(~np.all(
                        np.isnan(calibration_data[:, :, :, 0]), axis=2))
                    plot_pixel_calibrations = np.dstack(col_row_non_nan)[0]
                elif plot_pixel_calibrations is False:
                    plot_pixel_calibrations = np.array([], dtype=np.int)
                else:  # assuming list of column / row tuples
                    plot_pixel_calibrations = np.array(
                        plot_pixel_calibrations) - 1
                # generate index array
                pixel_indices = np.arange(plot_pixel_calibrations.shape[0])
                plot_n_pixels = 10  # number of pixels at the beginning, center and end of the array
                np.random.seed(0)
                # select random pixels
                if pixel_indices.size - 2 * plot_n_pixels >= 0:
                    random_pixel_indices = np.sort(
                        np.random.choice(
                            pixel_indices[plot_n_pixels:-plot_n_pixels],
                            min(plot_n_pixels,
                                pixel_indices.size - 2 * plot_n_pixels),
                            replace=False))
                else:
                    random_pixel_indices = np.array([], dtype=np.int)
                selected_pixel_indices = np.unique(
                    np.hstack([
                        pixel_indices[:plot_n_pixels], random_pixel_indices,
                        pixel_indices[-plot_n_pixels:]
                    ]))
                # plotting individual pixels
                for (column,
                     row) in plot_pixel_calibrations[selected_pixel_indices]:
                    logging.info(
                        "Plotting charge calibration for pixel column " +
                        str(column + 1) + " / row " + str(row + 1))
                    tot_mean_single_pix = calibration_data[column, row, :, 0]
                    tot_std_single_pix = calibration_data[column, row, :, 2]
                    tdc_mean_single_pix = calibration_data[column, row, :, 1]
                    tdc_std_single_pix = calibration_data[column, row, :, 3]
                    plot_tot_tdc_calibration(
                        scan_parameters=inner_loop_parameter_values,
                        tot_mean=tot_mean_single_pix,
                        tot_error=tot_std_single_pix,
                        tdc_mean=tdc_mean_single_pix,
                        tdc_error=tdc_std_single_pix,
                        filename=analyze_raw_data.output_pdf,
                        title="Charge calibration for pixel column " +
                        str(column + 1) + " / row " + str(row + 1))
Пример #8
0
def create_hitor_calibration(output_filename):
    logging.info('Analyze and plot results of %s', output_filename)

    def plot_calibration(col_row_combinations, scan_parameter, calibration_data, filename):  # Result calibration plot function
        for index, (column, row) in enumerate(col_row_combinations):
            logging.info("Plot calibration for pixel " + str(column) + '/' + str(row))
            fig = Figure()
            FigureCanvas(fig)
            ax = fig.add_subplot(111)
            fig.patch.set_facecolor('white')
            ax.grid(True)
            ax.errorbar(scan_parameter, calibration_data[column - 1, row - 1, :, 0] * 25. + 25., yerr=[calibration_data[column - 1, row - 1, :, 2] * 25, calibration_data[column - 1, row - 1, :, 2] * 25], fmt='o', label='FE-I4 ToT [ns]')
            ax.errorbar(scan_parameter, calibration_data[column - 1, row - 1, :, 1] * 1.5625, yerr=[calibration_data[column - 1, row - 1, :, 3] * 1.5625, calibration_data[column - 1, row - 1, :, 3] * 1.5625], fmt='o', label='TDC ToT [ns]')
            ax.set_title('Calibration for pixel ' + str(column) + '/' + str(row))
            ax.set_xlabel('Charge [PlsrDAC]')
            ax.set_ylabel('TOT')
            ax.legend(loc=0)
            filename.savefig(fig)
            if index > 100:  # stop for too many plots
                logging.info('Do not create pixel plots for more than 100 pixels to safe time')
                break

    with AnalyzeRawData(raw_data_file=output_filename, create_pdf=True) as analyze_raw_data:  # Interpret the raw data file
        analyze_raw_data.create_occupancy_hist = False  # too many scan parameters to do in ram histograming
        analyze_raw_data.create_hit_table = True
        analyze_raw_data.create_tdc_hist = True
        analyze_raw_data.align_at_tdc = True  # align events at TDC words, first word of event has to be a tdc word
        analyze_raw_data.interpret_word_table()
        analyze_raw_data.interpreter.print_summary()
        analyze_raw_data.plot_histograms()
        n_injections = analyze_raw_data.n_injections  # store number of injections for later cross check

    with tb.open_file(output_filename + '_interpreted.h5', 'r') as in_file_h5:  # Get scan parameters from interpreted file
        meta_data = in_file_h5.root.meta_data[:]
        hits = in_file_h5.root.Hits[:]
        scan_parameters_dict = get_scan_parameter(meta_data)
        inner_loop_parameter_values = scan_parameters_dict[next(reversed(scan_parameters_dict))]  # inner loop parameter name is unknown
        scan_parameter_names = scan_parameters_dict.keys()
        col_row_combinations = get_unique_scan_parameter_combinations(in_file_h5.root.meta_data[:], scan_parameters=('column', 'row'), scan_parameter_columns_only=True)

        meta_data_table_at_scan_parameter = get_unique_scan_parameter_combinations(meta_data, scan_parameters=scan_parameter_names)
        parameter_values = get_scan_parameters_table_from_meta_data(meta_data_table_at_scan_parameter, scan_parameter_names)
        event_number_ranges = get_ranges_from_array(meta_data_table_at_scan_parameter['event_number'])
        event_ranges_per_parameter = np.column_stack((parameter_values, event_number_ranges))
        event_numbers = hits['event_number'].copy()  # create contigous array, otherwise np.searchsorted too slow, http://stackoverflow.com/questions/15139299/performance-of-numpy-searchsorted-is-poor-on-structured-arrays

        with tb.openFile(output_filename + "_calibration.h5", mode="w") as calibration_data_file:
            logging.info('Create calibration')
            output_pdf = PdfPages(output_filename + "_calibration.pdf")
            calibration_data = np.zeros(shape=(80, 336, len(inner_loop_parameter_values), 4), dtype='f4')  # result of the calibration is a histogram with col_index, row_index, plsrDAC value, mean discrete tot, rms discrete tot, mean tot from TDC, rms tot from TDC

            progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=len(event_ranges_per_parameter), term_width=80)
            progress_bar.start()

            for index, (parameter_values, event_start, event_stop) in enumerate(event_ranges_per_parameter):
                if event_stop is None:  # happens for the last chunk
                    event_stop = hits[-1]['event_number']
                array_index = np.searchsorted(event_numbers, np.array([event_start, event_stop]))
                actual_hits = hits[array_index[0]:array_index[1]]
                actual_col, actual_row, parameter_value = parameter_values

                if len(hits[np.logical_and(actual_hits['column'] != actual_col, actual_hits['row'] != actual_row)]):
                    logging.warning('There are %d hits from not selected pixels in the data', len(actual_hits[np.logical_and(actual_hits['column'] != actual_col, actual_hits['row'] != actual_row)]))

                actual_hits = actual_hits[np.logical_and(actual_hits['column'] == actual_col, actual_hits['row'] == actual_row)]
                actual_tdc_hits = actual_hits[(actual_hits['event_status'] & 0b0000111110011100) == 0b0000000100000000]  # only take hits from good events (one TDC word only, no error)
                actual_tot_hits = actual_hits[(actual_hits['event_status'] & 0b0000100010011100) == 0b0000000000000000]  # only take hits from good events for tot
                tot, tdc = actual_tot_hits['tot'], actual_tdc_hits['TDC']

                if tdc.shape[0] != n_injections and index == event_ranges_per_parameter.shape[0] - 1:
                    logging.warning('There are %d != %d TDC hits for %s = %s', tdc.shape[0], n_injections, str(scan_parameter_names), str(parameter_values))

                inner_loop_scan_parameter_index = np.where(parameter_value == inner_loop_parameter_values)[0][0]  # translate the scan parameter value to an index for the result histogram
                calibration_data[actual_col - 1, actual_row - 1, inner_loop_scan_parameter_index, 0] = np.mean(tot)
                calibration_data[actual_col - 1, actual_row - 1, inner_loop_scan_parameter_index, 1] = np.mean(tdc)
                calibration_data[actual_col - 1, actual_row - 1, inner_loop_scan_parameter_index, 2] = np.std(tot)
                calibration_data[actual_col - 1, actual_row - 1, inner_loop_scan_parameter_index, 3] = np.std(tdc)

                progress_bar.update(index)

            calibration_data_out = calibration_data_file.createCArray(calibration_data_file.root, name='HitOrCalibration', title='Hit OR calibration data', atom=tb.Atom.from_dtype(calibration_data.dtype), shape=calibration_data.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
            calibration_data_out[:] = calibration_data
            calibration_data_out.attrs.dimensions = scan_parameter_names
            calibration_data_out.attrs.scan_parameter_values = inner_loop_parameter_values
            plot_calibration(col_row_combinations, scan_parameter=inner_loop_parameter_values, calibration_data=calibration_data, filename=output_pdf)
            output_pdf.close()
            progress_bar.finish()