Пример #1
0
def create_threshold_calibration(
    scan_base_file_name,
    create_plots=True
):  # Create calibration function, can be called stand alone
    def analyze_raw_data_file(file_name):
        if os.path.isfile(file_name[:-3] +
                          '_interpreted.h5'):  # skip analysis if already done
            logging.warning('Analyzed data file ' + file_name +
                            ' already exists. Skip analysis for this file.')
        else:
            with AnalyzeRawData(raw_data_file=file_name,
                                create_pdf=False) as analyze_raw_data:
                analyze_raw_data.create_tot_hist = False
                analyze_raw_data.create_tot_pixel_hist = False
                analyze_raw_data.create_fitted_threshold_hists = True
                analyze_raw_data.create_threshold_mask = True
                analyze_raw_data.interpreter.set_warning_output(
                    False)  # RX errors would fill the console
                analyze_raw_data.interpret_word_table()

    def store_calibration_data_as_table(out_file_h5,
                                        mean_threshold_calibration,
                                        mean_threshold_rms_calibration,
                                        threshold_calibration,
                                        parameter_values):
        logging.info("Storing calibration data in a table...")
        filter_table = tb.Filters(complib='blosc',
                                  complevel=5,
                                  fletcher32=False)
        mean_threshold_calib_table = out_file_h5.createTable(
            out_file_h5.root,
            name='MeanThresholdCalibration',
            description=data_struct.MeanThresholdCalibrationTable,
            title='mean_threshold_calibration',
            filters=filter_table)
        threshold_calib_table = out_file_h5.createTable(
            out_file_h5.root,
            name='ThresholdCalibration',
            description=data_struct.ThresholdCalibrationTable,
            title='threshold_calibration',
            filters=filter_table)
        for column in range(80):
            for row in range(336):
                for parameter_value_index, parameter_value in enumerate(
                        parameter_values):
                    threshold_calib_table.row['column'] = column
                    threshold_calib_table.row['row'] = row
                    threshold_calib_table.row[
                        'parameter_value'] = parameter_value
                    threshold_calib_table.row[
                        'threshold'] = threshold_calibration[
                            column, row, parameter_value_index]
                    threshold_calib_table.row.append()
        for parameter_value_index, parameter_value in enumerate(
                parameter_values):
            mean_threshold_calib_table.row['parameter_value'] = parameter_value
            mean_threshold_calib_table.row[
                'mean_threshold'] = mean_threshold_calibration[
                    parameter_value_index]
            mean_threshold_calib_table.row[
                'threshold_rms'] = mean_threshold_rms_calibration[
                    parameter_value_index]
            mean_threshold_calib_table.row.append()
        threshold_calib_table.flush()
        mean_threshold_calib_table.flush()
        logging.info("done")

    def store_calibration_data_as_array(out_file_h5,
                                        mean_threshold_calibration,
                                        mean_threshold_rms_calibration,
                                        threshold_calibration, parameter_name,
                                        parameter_values):
        logging.info("Storing calibration data in an array...")
        filter_table = tb.Filters(complib='blosc',
                                  complevel=5,
                                  fletcher32=False)
        mean_threshold_calib_array = out_file_h5.createCArray(
            out_file_h5.root,
            name='HistThresholdMeanCalibration',
            atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype),
            shape=mean_threshold_calibration.shape,
            title='mean_threshold_calibration',
            filters=filter_table)
        mean_threshold_calib_rms_array = out_file_h5.createCArray(
            out_file_h5.root,
            name='HistThresholdRMSCalibration',
            atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype),
            shape=mean_threshold_calibration.shape,
            title='mean_threshold_rms_calibration',
            filters=filter_table)
        threshold_calib_array = out_file_h5.createCArray(
            out_file_h5.root,
            name='HistThresholdCalibration',
            atom=tb.Atom.from_dtype(threshold_calibration.dtype),
            shape=threshold_calibration.shape,
            title='threshold_calibration',
            filters=filter_table)
        mean_threshold_calib_array[:] = mean_threshold_calibration
        mean_threshold_calib_rms_array[:] = mean_threshold_rms_calibration
        threshold_calib_array[:] = threshold_calibration
        mean_threshold_calib_array.attrs.dimensions = [
            'column', 'row', parameter_name
        ]
        mean_threshold_calib_rms_array.attrs.dimensions = [
            'column', 'row', parameter_name
        ]
        threshold_calib_array.attrs.dimensions = [
            'column', 'row', parameter_name
        ]
        mean_threshold_calib_array.attrs.scan_parameter_values = parameter_values
        mean_threshold_calib_rms_array.attrs.scan_parameter_values = parameter_values
        threshold_calib_array.attrs.scan_parameter_values = parameter_values

        logging.info("done")

    def mask_columns(pixel_array, ignore_columns):
        idx = np.array(ignore_columns) - 1  # from FE to Array columns
        m = np.zeros_like(pixel_array)
        m[:, idx] = 1
        return np.ma.masked_array(pixel_array, m)

    raw_data_files = analysis_utils.get_data_file_names_from_scan_base(
        scan_base_file_name,
        filter_file_words=['interpreted', 'calibration_calibration'])
    first_scan_base_file_name = scan_base_file_name if isinstance(
        scan_base_file_name, basestring) else scan_base_file_name[
            0]  # multilpe scan_base_file_names for multiple runs

    with tb.openFile(
            first_scan_base_file_name + '.h5', mode="r"
    ) as in_file_h5:  # deduce scan parameters from the first (and often only) scan base file name
        ignore_columns = in_file_h5.root.configuration.run_conf[:][np.where(
            in_file_h5.root.configuration.run_conf[:]['name'] ==
            'ignore_columns')]['value'][0]
        parameter_name = in_file_h5.root.configuration.run_conf[:][np.where(
            in_file_h5.root.configuration.run_conf[:]['name'] ==
            'scan_parameters')]['value'][0]
        ignore_columns = ast.literal_eval(ignore_columns)
        parameter_name = ast.literal_eval(parameter_name)[1][0]

    calibration_file = first_scan_base_file_name + '_calibration'

    for raw_data_file in raw_data_files:  # analyze each raw data file, not using multithreading here, it is already used in s-curve fit
        analyze_raw_data_file(raw_data_file)

    files_per_parameter = analysis_utils.get_parameter_value_from_file_names(
        [file_name[:-3] + '_interpreted.h5' for file_name in raw_data_files],
        parameter_name,
        unique=True,
        sort=True)

    logging.info("Create calibration from data")
    mean_threshold_calibration = np.empty(shape=(len(raw_data_files), ),
                                          dtype='<f8')
    mean_threshold_rms_calibration = np.empty(shape=(len(raw_data_files), ),
                                              dtype='<f8')
    threshold_calibration = np.empty(shape=(80, 336, len(raw_data_files)),
                                     dtype='<f8')

    if create_plots:
        logging.info('Saving calibration plots in: %s',
                     calibration_file + '.pdf')
        output_pdf = PdfPages(calibration_file + '.pdf')

    progress_bar = progressbar.ProgressBar(widgets=[
        '',
        progressbar.Percentage(), ' ',
        progressbar.Bar(marker='*', left='|', right='|'), ' ',
        progressbar.AdaptiveETA()
    ],
                                           maxval=len(
                                               files_per_parameter.items()),
                                           term_width=80)
    progress_bar.start()
    parameter_values = []
    for index, (analyzed_data_file,
                parameters) in enumerate(files_per_parameter.items()):
        parameter_values.append(parameters.values()[0][0])
        with tb.openFile(analyzed_data_file, mode="r") as in_file_h5:
            occupancy_masked = mask_columns(
                pixel_array=in_file_h5.root.HistOcc[:],
                ignore_columns=ignore_columns
            )  # mask the not scanned columns for analysis and plotting
            thresholds_masked = mask_columns(
                pixel_array=in_file_h5.root.HistThresholdFitted[:],
                ignore_columns=ignore_columns)
            if create_plots:
                plot_three_way(hist=thresholds_masked,
                               title='Threshold Fitted for ' +
                               parameters.keys()[0] + ' = ' +
                               str(parameters.values()[0][0]),
                               filename=output_pdf)
                plsr_dacs = analysis_utils.get_scan_parameter(
                    meta_data_array=in_file_h5.root.meta_data[:])['PlsrDAC']
                plot_scurves(occupancy_hist=occupancy_masked,
                             scan_parameters=plsr_dacs,
                             scan_parameter_name='PlsrDAC',
                             filename=output_pdf)
            # fill the calibration data arrays
            mean_threshold_calibration[index] = np.ma.mean(thresholds_masked)
            mean_threshold_rms_calibration[index] = np.ma.std(
                thresholds_masked)
            threshold_calibration[:, :, index] = thresholds_masked.T
        progress_bar.update(index)
    progress_bar.finish()

    with tb.openFile(calibration_file + '.h5', mode="w") as out_file_h5:
        store_calibration_data_as_array(
            out_file_h5=out_file_h5,
            mean_threshold_calibration=mean_threshold_calibration,
            mean_threshold_rms_calibration=mean_threshold_rms_calibration,
            threshold_calibration=threshold_calibration,
            parameter_name=parameter_name,
            parameter_values=parameter_values)
        store_calibration_data_as_table(
            out_file_h5=out_file_h5,
            mean_threshold_calibration=mean_threshold_calibration,
            mean_threshold_rms_calibration=mean_threshold_rms_calibration,
            threshold_calibration=threshold_calibration,
            parameter_values=parameter_values)

    if create_plots:
        plot_scatter(x=parameter_values,
                     y=mean_threshold_calibration,
                     title='Threshold calibration',
                     x_label=parameter_name,
                     y_label='Mean threshold',
                     log_x=False,
                     filename=output_pdf)
        plot_scatter(x=parameter_values,
                     y=mean_threshold_calibration,
                     title='Threshold calibration',
                     x_label=parameter_name,
                     y_label='Mean threshold',
                     log_x=True,
                     filename=output_pdf)
        output_pdf.close()
Пример #2
0
                        plot_range=(analysis_configuration['min_thr'],
                                    analysis_configuration['max_thr']),
                        title=
                        'Mean single pixel cluster rate at different thresholds',
                        x_label='mean threshold [e]',
                        y_label='mean single pixel cluster rate',
                        filename=plot_file)

                if analysis_configuration['use_cluster_rate_correction']:
                    correction_h5.close()


if __name__ == "__main__":
    data_files = analysis_utils.get_data_file_names_from_scan_base(
        analysis_configuration['scan_name'],
        filter_file_words=[
            'analyzed', 'interpreted', 'cut_', 'cluster_sizes', 'trigger_fe'
        ],
        parameter=True)
    files_dict = analysis_utils.get_parameter_from_files(
        data_files, unique=True, parameters='GDAC'
    )  # get a sorted ordered dict with GDAC, raw_data_filename
    logging.info('Found ' + str(len(files_dict)) + ' raw data files.')

    hit_file = analysis_configuration['scan_name'][0] + '_interpreted.h5'
    hit_cut_file = analysis_configuration['scan_name'][0] + '_cut_hits.h5'
    hit_cut_analyzed_file = analysis_configuration['scan_name'][
        0] + '_cut_hits_analyzed.h5'
    cluster_seed_analyzed_file = analysis_configuration['scan_name'][
        0] + '_cluster_seeds_analyzed.h5'
    cluster_sizes_file = analysis_configuration['scan_name'][
        0] + '_ALL_cluster_sizes.h5'
                        out_3.append(result_3)

                    plot_result(x_p, y_p, y_p_e, smoothed_data, smoothed_data_diff)

                    #  calculate and plot mean results
                    x_mean = analysis_utils.get_mean_threshold_from_calibration(gdac_range_source_scan, mean_threshold_calibration)
                    y_mean = selected_pixel_hits.mean(axis=(0))

                    plotting.plot_scatter(np.array(gdac_range_source_scan), y_mean, log_x=True, plot_range=None, title='Mean single pixel cluster rate at different thresholds', x_label='threshold setting [GDAC]', y_label='mean single pixel cluster rate', filename=plot_file)
                    plotting.plot_scatter(x_mean * analysis_configuration['vcal_calibration'], y_mean, plot_range=(analysis_configuration['min_thr'], analysis_configuration['max_thr']), title='Mean single pixel cluster rate at different thresholds', x_label='mean threshold [e]', y_label='mean single pixel cluster rate', filename=plot_file)

                if analysis_configuration['use_cluster_rate_correction']:
                    correction_h5.close()

if __name__ == "__main__":
    data_files = analysis_utils.get_data_file_names_from_scan_base(analysis_configuration['scan_name'], filter_file_words=['analyzed', 'interpreted', 'cut_', 'cluster_sizes', 'trigger_fe'], parameter=True)
    files_dict = analysis_utils.get_parameter_from_files(data_files, unique=True, parameters='GDAC')  # get a sorted ordered dict with GDAC, raw_data_filename
    logging.info('Found ' + str(len(files_dict)) + ' raw data files.')

    hit_file = analysis_configuration['scan_name'][0] + '_interpreted.h5'
    hit_cut_file = analysis_configuration['scan_name'][0] + '_cut_hits.h5'
    hit_cut_analyzed_file = analysis_configuration['scan_name'][0] + '_cut_hits_analyzed.h5'
    cluster_seed_analyzed_file = analysis_configuration['scan_name'][0] + '_cluster_seeds_analyzed.h5'
    cluster_sizes_file = analysis_configuration['scan_name'][0] + '_ALL_cluster_sizes.h5'

    if 1 in analysis_configuration['analysis_steps']:
        analyze_raw_data(input_files=files_dict.keys(), output_file_hits=hit_file, scan_parameter='GDAC')
    if 2 in analysis_configuration['analysis_steps']:
        analyse_selected_hits(input_file_hits=hit_file, output_file_hits=hit_cut_file, output_file_hits_analyzed=hit_cut_analyzed_file, scan_data_filenames=analysis_configuration['scan_name'][0])
    if 2.5 in analysis_configuration['analysis_steps']:
        if os.path.isfile(cluster_seed_analyzed_file) and not analysis_configuration["overwrite_output_files"]:
Пример #4
0
                    plot_result(x_p, y_p, y_p_e, smoothed_data, smoothed_data_diff)

                    #  calculate and plot mean results
                    x_mean = analysis_utils.get_mean_threshold_from_calibration(gdac_range_source_scan, mean_threshold_calibration)
                    y_mean = selected_pixel_hits.mean(axis=(0))

                    plotting.plot_scatter(np.array(gdac_range_source_scan), y_mean, log_x=True, plot_range=None, title='Mean single pixel cluster rate at different thresholds', x_label='threshold setting [GDAC]', y_label='mean single pixel cluster rate', filename=plot_file)
                    plotting.plot_scatter(x_mean * analysis_configuration['vcal_calibration'], y_mean, plot_range=(analysis_configuration['min_thr'], analysis_configuration['max_thr']), title='Mean single pixel cluster rate at different thresholds', x_label='mean threshold [e]', y_label='mean single pixel cluster rate', filename=plot_file)

                if analysis_configuration['use_cluster_rate_correction']:
                    correction_h5.close()


if __name__ == "__main__":
    data_files = analysis_utils.get_data_file_names_from_scan_base(analysis_configuration['scan_name'])
    files_dict = analysis_utils.get_parameter_from_files(data_files, unique=True, parameters='GDAC')  # get a sorted ordered dict with GDAC, raw_data_filename
    logging.info('Found ' + str(len(files_dict)) + ' raw data files.')

    hit_file = analysis_configuration['scan_name'][0] + '_interpreted.h5'
    hit_cut_file = analysis_configuration['scan_name'][0] + '_cut_hits.h5'
    hit_cut_analyzed_file = analysis_configuration['scan_name'][0] + '_cut_hits_analyzed.h5'
    cluster_seed_analyzed_file = analysis_configuration['scan_name'][0] + '_cluster_seeds_analyzed.h5'
    cluster_sizes_file = analysis_configuration['scan_name'][0] + '_ALL_cluster_sizes.h5'

    if 1 in analysis_configuration['analysis_steps']:
        analyze_raw_data(input_files=files_dict.keys(), output_file_hits=hit_file, scan_parameter='GDAC')
    if 2 in analysis_configuration['analysis_steps']:
        analyse_selected_hits(input_file_hits=hit_file, output_file_hits=hit_cut_file, output_file_hits_analyzed=hit_cut_analyzed_file, scan_data_filenames=analysis_configuration['scan_name'][0])
    if 2.5 in analysis_configuration['analysis_steps']:
        if os.path.isfile(cluster_seed_analyzed_file) and not analysis_configuration["overwrite_output_files"]:
Пример #5
0
    def analyze(self):
        def analyze_raw_data_file(file_name):
            with AnalyzeRawData(raw_data_file=file_name,
                                create_pdf=False) as analyze_raw_data:
                analyze_raw_data.create_tot_hist = False
                analyze_raw_data.create_fitted_threshold_hists = True
                analyze_raw_data.create_threshold_mask = True
                analyze_raw_data.interpreter.set_warning_output(
                    True
                )  # so far the data structure in a threshold scan was always bad, too many warnings given
                analyze_raw_data.interpret_word_table()

        def store_calibration_data_as_table(out_file_h5,
                                            mean_threshold_calibration,
                                            mean_threshold_rms_calibration,
                                            threshold_calibration,
                                            parameter_values):
            logging.info("Storing calibration data in a table...")
            filter_table = tb.Filters(complib='blosc',
                                      complevel=5,
                                      fletcher32=False)
            mean_threshold_calib_table = out_file_h5.createTable(
                out_file_h5.root,
                name='MeanThresholdCalibration',
                description=data_struct.MeanThresholdCalibrationTable,
                title='mean_threshold_calibration',
                filters=filter_table)
            threshold_calib_table = out_file_h5.createTable(
                out_file_h5.root,
                name='ThresholdCalibration',
                description=data_struct.ThresholdCalibrationTable,
                title='threshold_calibration',
                filters=filter_table)
            for column in range(80):
                for row in range(336):
                    for parameter_value_index, parameter_value in enumerate(
                            parameter_values):
                        threshold_calib_table.row['column'] = column
                        threshold_calib_table.row['row'] = row
                        threshold_calib_table.row[
                            'parameter_value'] = parameter_value
                        threshold_calib_table.row[
                            'threshold'] = threshold_calibration[
                                column, row, parameter_value_index]
                        threshold_calib_table.row.append()
            for parameter_value_index, parameter_value in enumerate(
                    parameter_values):
                mean_threshold_calib_table.row[
                    'parameter_value'] = parameter_value
                mean_threshold_calib_table.row[
                    'mean_threshold'] = mean_threshold_calibration[
                        parameter_value_index]
                mean_threshold_calib_table.row[
                    'threshold_rms'] = mean_threshold_rms_calibration[
                        parameter_value_index]
                mean_threshold_calib_table.row.append()
            threshold_calib_table.flush()
            mean_threshold_calib_table.flush()
            logging.info("done")

        def store_calibration_data_as_array(out_file_h5,
                                            mean_threshold_calibration,
                                            mean_threshold_rms_calibration,
                                            threshold_calibration):
            logging.info("Storing calibration data in an array...")
            filter_table = tb.Filters(complib='blosc',
                                      complevel=5,
                                      fletcher32=False)
            mean_threshold_calib_array = out_file_h5.createCArray(
                out_file_h5.root,
                name='HistThresholdMeanCalibration',
                atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype),
                shape=mean_threshold_calibration.shape,
                title='mean_threshold_calibration',
                filters=filter_table)
            mean_threshold_calib_rms_array = out_file_h5.createCArray(
                out_file_h5.root,
                name='HistThresholdRMSCalibration',
                atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype),
                shape=mean_threshold_calibration.shape,
                title='mean_threshold_rms_calibration',
                filters=filter_table)
            threshold_calib_array = out_file_h5.createCArray(
                out_file_h5.root,
                name='HistThresholdCalibration',
                atom=tb.Atom.from_dtype(threshold_calibration.dtype),
                shape=threshold_calibration.shape,
                title='threshold_calibration',
                filters=filter_table)
            mean_threshold_calib_array[:] = mean_threshold_calibration
            mean_threshold_calib_rms_array[:] = mean_threshold_rms_calibration
            threshold_calib_array[:] = threshold_calibration
            logging.info("done")

        def mask_columns(pixel_array, ignore_columns):
            idx = np.array(ignore_columns) - 1  # from FE to Array columns
            m = np.zeros_like(pixel_array)
            m[:, idx] = 1
            return np.ma.masked_array(pixel_array, m)

        calibration_file = self.output_filename + '_calibration'
        raw_data_files = analysis_utils.get_data_file_names_from_scan_base(
            self.output_filename,
            filter_file_words=['interpreted', 'calibration_calibration'])
        parameter_name = self.scan_parameters._fields[1]

        for raw_data_file in raw_data_files:  # no using multithreading here, it is already used in fit
            analyze_raw_data_file(raw_data_file)

        files_per_parameter = analysis_utils.get_parameter_value_from_file_names(
            [
                file_name[:-3] + '_interpreted.h5'
                for file_name in raw_data_files
            ], parameter_name)

        logging.info("Create calibration from data")
        with tb.openFile(
                self.output_filename + '.h5',
                mode="r") as in_file_h5:  # deduce settings from raw data file
            ignore_columns = in_file_h5.root.configuration.run_conf[:][
                np.where(in_file_h5.root.configuration.run_conf[:]['name'] ==
                         'ignore_columns')]['value'][0]
            ignore_columns = ast.literal_eval(ignore_columns)

        mean_threshold_calibration = np.empty(shape=(len(raw_data_files), ),
                                              dtype='<f8')
        mean_threshold_rms_calibration = np.empty(
            shape=(len(raw_data_files), ), dtype='<f8')
        threshold_calibration = np.empty(shape=(80, 336, len(raw_data_files)),
                                         dtype='<f8')

        if self.create_plots:
            logging.info('Saving calibration plots in: %s' %
                         (calibration_file + '.pdf'))
            output_pdf = PdfPages(calibration_file + '.pdf')

        parameter_values = []
        for index, (analyzed_data_file,
                    parameters) in enumerate(files_per_parameter.items()):
            parameter_values.append(parameters.values()[0][0])
            with tb.openFile(analyzed_data_file, mode="r") as in_file_h5:
                occupancy_masked = mask_columns(
                    pixel_array=in_file_h5.root.HistOcc[:],
                    ignore_columns=ignore_columns
                )  # mask the not scanned columns for analysis and plotting
                thresholds_masked = mask_columns(
                    pixel_array=in_file_h5.root.HistThresholdFitted[:],
                    ignore_columns=ignore_columns)
                if self.create_plots:
                    plotThreeWay(hist=thresholds_masked,
                                 title='Threshold Fitted for ' +
                                 parameters.keys()[0] + ' = ' +
                                 str(parameters.values()[0][0]),
                                 filename=output_pdf)
                    plsr_dacs = analysis_utils.get_scan_parameter(
                        meta_data_array=in_file_h5.root.meta_data[:]
                    )['PlsrDAC']
                    plot_scurves(occupancy_hist=occupancy_masked,
                                 scan_parameters=plsr_dacs,
                                 scan_parameter_name='PlsrDAC',
                                 filename=output_pdf)
                # fill the calibration data arrays
                mean_threshold_calibration[index] = np.ma.mean(
                    thresholds_masked)
                mean_threshold_rms_calibration[index] = np.ma.std(
                    thresholds_masked)
                threshold_calibration[:, :, index] = thresholds_masked.T

        with tb.openFile(calibration_file + '.h5', mode="w") as out_file_h5:
            store_calibration_data_as_array(
                out_file_h5=out_file_h5,
                mean_threshold_calibration=mean_threshold_calibration,
                mean_threshold_rms_calibration=mean_threshold_rms_calibration,
                threshold_calibration=threshold_calibration)
            store_calibration_data_as_table(
                out_file_h5=out_file_h5,
                mean_threshold_calibration=mean_threshold_calibration,
                mean_threshold_rms_calibration=mean_threshold_rms_calibration,
                threshold_calibration=threshold_calibration,
                parameter_values=parameter_values)

        if self.create_plots:
            plot_scatter(x=parameter_values,
                         y=mean_threshold_calibration,
                         title='Threshold calibration',
                         x_label=parameter_name,
                         y_label='Mean threshold',
                         log_x=False,
                         filename=output_pdf)
            plot_scatter(x=parameter_values,
                         y=mean_threshold_calibration,
                         title='Threshold calibration',
                         x_label=parameter_name,
                         y_label='Mean threshold',
                         log_x=True,
                         filename=output_pdf)
            output_pdf.close()
Пример #6
0
def create_threshold_calibration(scan_base_file_name, create_plots=True):  # Create calibration function, can be called stand alone
    def analyze_raw_data_file(file_name):
        if os.path.isfile(file_name[:-3] + '_interpreted.h5'):  # skip analysis if already done
            logging.warning('Analyzed data file ' + file_name + ' already exists. Skip analysis for this file.')
        else:
            with AnalyzeRawData(raw_data_file=file_name, create_pdf=False) as analyze_raw_data:
                analyze_raw_data.create_tot_hist = False
                analyze_raw_data.create_tot_pixel_hist = False
                analyze_raw_data.create_fitted_threshold_hists = True
                analyze_raw_data.create_threshold_mask = True
                analyze_raw_data.interpreter.set_warning_output(False)  # RX errors would fill the console
                analyze_raw_data.interpret_word_table()

    def store_calibration_data_as_table(out_file_h5, mean_threshold_calibration, mean_threshold_rms_calibration, threshold_calibration, parameter_values):
        logging.info("Storing calibration data in a table...")
        filter_table = tb.Filters(complib='blosc', complevel=5, fletcher32=False)
        mean_threshold_calib_table = out_file_h5.createTable(out_file_h5.root, name='MeanThresholdCalibration', description=data_struct.MeanThresholdCalibrationTable, title='mean_threshold_calibration', filters=filter_table)
        threshold_calib_table = out_file_h5.createTable(out_file_h5.root, name='ThresholdCalibration', description=data_struct.ThresholdCalibrationTable, title='threshold_calibration', filters=filter_table)
        for column in range(80):
            for row in range(336):
                for parameter_value_index, parameter_value in enumerate(parameter_values):
                    threshold_calib_table.row['column'] = column
                    threshold_calib_table.row['row'] = row
                    threshold_calib_table.row['parameter_value'] = parameter_value
                    threshold_calib_table.row['threshold'] = threshold_calibration[column, row, parameter_value_index]
                    threshold_calib_table.row.append()
        for parameter_value_index, parameter_value in enumerate(parameter_values):
            mean_threshold_calib_table.row['parameter_value'] = parameter_value
            mean_threshold_calib_table.row['mean_threshold'] = mean_threshold_calibration[parameter_value_index]
            mean_threshold_calib_table.row['threshold_rms'] = mean_threshold_rms_calibration[parameter_value_index]
            mean_threshold_calib_table.row.append()
        threshold_calib_table.flush()
        mean_threshold_calib_table.flush()
        logging.info("done")

    def store_calibration_data_as_array(out_file_h5, mean_threshold_calibration, mean_threshold_rms_calibration, threshold_calibration, parameter_name, parameter_values):
        logging.info("Storing calibration data in an array...")
        filter_table = tb.Filters(complib='blosc', complevel=5, fletcher32=False)
        mean_threshold_calib_array = out_file_h5.createCArray(out_file_h5.root, name='HistThresholdMeanCalibration', atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype), shape=mean_threshold_calibration.shape, title='mean_threshold_calibration', filters=filter_table)
        mean_threshold_calib_rms_array = out_file_h5.createCArray(out_file_h5.root, name='HistThresholdRMSCalibration', atom=tb.Atom.from_dtype(mean_threshold_calibration.dtype), shape=mean_threshold_calibration.shape, title='mean_threshold_rms_calibration', filters=filter_table)
        threshold_calib_array = out_file_h5.createCArray(out_file_h5.root, name='HistThresholdCalibration', atom=tb.Atom.from_dtype(threshold_calibration.dtype), shape=threshold_calibration.shape, title='threshold_calibration', filters=filter_table)
        mean_threshold_calib_array[:] = mean_threshold_calibration
        mean_threshold_calib_rms_array[:] = mean_threshold_rms_calibration
        threshold_calib_array[:] = threshold_calibration
        mean_threshold_calib_array.attrs.dimensions = ['column', 'row', parameter_name]
        mean_threshold_calib_rms_array.attrs.dimensions = ['column', 'row', parameter_name]
        threshold_calib_array.attrs.dimensions = ['column', 'row', parameter_name]
        mean_threshold_calib_array.attrs.scan_parameter_values = parameter_values
        mean_threshold_calib_rms_array.attrs.scan_parameter_values = parameter_values
        threshold_calib_array.attrs.scan_parameter_values = parameter_values

        logging.info("done")

    def mask_columns(pixel_array, ignore_columns):
        idx = np.array(ignore_columns) - 1  # from FE to Array columns
        m = np.zeros_like(pixel_array)
        m[:, idx] = 1
        return np.ma.masked_array(pixel_array, m)

    raw_data_files = analysis_utils.get_data_file_names_from_scan_base(scan_base_file_name, filter_file_words=['interpreted', 'calibration_calibration'])
    first_scan_base_file_name = scan_base_file_name if isinstance(scan_base_file_name, basestring) else scan_base_file_name[0]  # multilpe scan_base_file_names for multiple runs

    with tb.openFile(first_scan_base_file_name + '.h5', mode="r") as in_file_h5:  # deduce scan parameters from the first (and often only) scan base file name
        ignore_columns = in_file_h5.root.configuration.run_conf[:][np.where(in_file_h5.root.configuration.run_conf[:]['name'] == 'ignore_columns')]['value'][0]
        parameter_name = in_file_h5.root.configuration.run_conf[:][np.where(in_file_h5.root.configuration.run_conf[:]['name'] == 'scan_parameters')]['value'][0]
        ignore_columns = ast.literal_eval(ignore_columns)
        parameter_name = ast.literal_eval(parameter_name)[1][0]

    calibration_file = first_scan_base_file_name + '_calibration'

    for raw_data_file in raw_data_files:  # analyze each raw data file, not using multithreading here, it is already used in s-curve fit
        analyze_raw_data_file(raw_data_file)

    files_per_parameter = analysis_utils.get_parameter_value_from_file_names([file_name[:-3] + '_interpreted.h5' for file_name in raw_data_files], parameter_name, unique=True, sort=True)

    logging.info("Create calibration from data")
    mean_threshold_calibration = np.empty(shape=(len(raw_data_files),), dtype='<f8')
    mean_threshold_rms_calibration = np.empty(shape=(len(raw_data_files),), dtype='<f8')
    threshold_calibration = np.empty(shape=(80, 336, len(raw_data_files)), dtype='<f8')

    if create_plots:
        logging.info('Saving calibration plots in: %s', calibration_file + '.pdf')
        output_pdf = PdfPages(calibration_file + '.pdf')

    progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=len(files_per_parameter.items()), term_width=80)
    progress_bar.start()
    parameter_values = []
    for index, (analyzed_data_file, parameters) in enumerate(files_per_parameter.items()):
        parameter_values.append(parameters.values()[0][0])
        with tb.openFile(analyzed_data_file, mode="r") as in_file_h5:
            occupancy_masked = mask_columns(pixel_array=in_file_h5.root.HistOcc[:], ignore_columns=ignore_columns)  # mask the not scanned columns for analysis and plotting
            thresholds_masked = mask_columns(pixel_array=in_file_h5.root.HistThresholdFitted[:], ignore_columns=ignore_columns)
            if create_plots:
                plot_three_way(hist=thresholds_masked, title='Threshold Fitted for ' + parameters.keys()[0] + ' = ' + str(parameters.values()[0][0]), filename=output_pdf)
                plsr_dacs = analysis_utils.get_scan_parameter(meta_data_array=in_file_h5.root.meta_data[:])['PlsrDAC']
                plot_scurves(occupancy_hist=occupancy_masked, scan_parameters=plsr_dacs, scan_parameter_name='PlsrDAC', filename=output_pdf)
            # fill the calibration data arrays
            mean_threshold_calibration[index] = np.ma.mean(thresholds_masked)
            mean_threshold_rms_calibration[index] = np.ma.std(thresholds_masked)
            threshold_calibration[:, :, index] = thresholds_masked.T
        progress_bar.update(index)
    progress_bar.finish()

    with tb.openFile(calibration_file + '.h5', mode="w") as out_file_h5:
        store_calibration_data_as_array(out_file_h5=out_file_h5, mean_threshold_calibration=mean_threshold_calibration, mean_threshold_rms_calibration=mean_threshold_rms_calibration, threshold_calibration=threshold_calibration, parameter_name=parameter_name, parameter_values=parameter_values)
        store_calibration_data_as_table(out_file_h5=out_file_h5, mean_threshold_calibration=mean_threshold_calibration, mean_threshold_rms_calibration=mean_threshold_rms_calibration, threshold_calibration=threshold_calibration, parameter_values=parameter_values)

    if create_plots:
        plot_scatter(x=parameter_values, y=mean_threshold_calibration, title='Threshold calibration', x_label=parameter_name, y_label='Mean threshold', log_x=False, filename=output_pdf)
        plot_scatter(x=parameter_values, y=mean_threshold_calibration, title='Threshold calibration', x_label=parameter_name, y_label='Mean threshold', log_x=True, filename=output_pdf)
        output_pdf.close()