Exemplo n.º 1
0
def filter_idc_data(data_a, data_c, z_range):
    """
    Select the A-side and/or C-side data based on the z range.

    :param list data_a: list of arrays of values from the A-side
    :param list data_c: list of arrays of values from the C-side
    :param list z_range: a list of [min_z, max_z] values.
                         If the interval contains positive z, A-side data will be used.
                         Similarly, for any negative z C-side data is used.
    :return: tuple with selected data. If both A and C-side are selected,
             the correspondings arrays are stacked.
    :rtype: tuple
    """
    # TODO: Getter and application of Fourier coefficients need to be modified to handle both A and
    # C side at the same time
    if z_range[0] < 0 and z_range[1] > 0:  # pylint: disable=chained-comparison
        logger = get_logger()
        logger.fatal("Framework not yet fully prepared to use data from both A and C side at once.")

    output_data = []
    for data in data_a:
        output_data.append([])
    if z_range[1] > 0:
        for ind, data in enumerate(data_a):
            output_data[ind] = np.hstack((output_data[ind],
                                          data / (scipy.constants.e * NELE_PER_ADC))) # C -> ADC
    if z_range[0] < 0:
        for ind, data in enumerate(data_c):
            output_data[ind] = np.hstack((output_data[ind],
                                          data / (scipy.constants.e * NELE_PER_ADC))) # C -> ADC
    return tuple(output_data)
Exemplo n.º 2
0
def load_train_apply(input_data, event_index, input_z_range, output_z_range,
                     grid_r, grid_rphi, grid_z, opt_train, opt_pred):

    [vec_mean_sc, vec_fluctuation_sc, vec_fluctuation_dist_r,
     vec_fluctuation_dist_rphi, vec_fluctuation_dist_z] = \
        load_data(input_data, event_index, input_z_range, output_z_range)
    dim_input = sum(opt_train)
    dim_output = sum(opt_pred)
    inputs = np.empty((grid_rphi, grid_r, grid_z, dim_input))
    exp_outputs = np.empty((grid_rphi, grid_r, grid_z, dim_output))

    indexfillx = 0
    if opt_train[0] == 1:
        inputs[:, :, :, indexfillx] = \
                vec_mean_sc.reshape(grid_rphi, grid_r, grid_z)
        indexfillx = indexfillx + 1
    if opt_train[1] == 1:
        inputs[:, :, :, indexfillx] = \
                vec_fluctuation_sc.reshape(grid_rphi, grid_r, grid_z)

    if dim_output > 1:
        logger = get_logger()
        logger.fatal("YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1")

    flucs = np.array((vec_fluctuation_dist_r, vec_fluctuation_dist_rphi, vec_fluctuation_dist_z))
    sel_flucs = flucs[np.array(opt_pred) == 1]
    for ind, vec_fluctuation_dist in enumerate(sel_flucs):
        exp_outputs[:, :, :, ind] = \
                vec_fluctuation_dist.reshape(grid_rphi, grid_r, grid_z)

    #print("DIMENSION INPUT TRAINING", inputs.shape)
    #print("DIMENSION OUTPUT TRAINING", exp_outputs.shape)

    return inputs, exp_outputs
Exemplo n.º 3
0
 def __init__(self):
     """
     Initialize the validator
     """
     logger = get_logger()
     logger.info("IDCDataValidator::Init")
     self.model = None
     self.config = None
Exemplo n.º 4
0
def log_memory_usage(objects):
    """
    Write memory sizes of the objects to the console.

    :param list objects: list of tuples(obj, str) with objects and logging comments
    """
    logger = get_logger()
    for obj, comment in objects:
        size, mult = format_memory(get_memory_usage(obj))
        logger.info("%s memory usage: %d %sB", comment, size, mult)
Exemplo n.º 5
0
def load_train_apply(dirinput, event_index, z_range, grid_r, grid_rphi, grid_z,
                     opt_train, opt_pred):
    """
    Load inputs and outputs for training / apply for one event.
    NOTE: Function for the old data, will be deprecated.

    :param str dirinput: the directory with the input data, value taken from the config file
    :param list event_index: a list of [random_index, mean_map_index] indices of the random
                             and the mean map, respectively.
    :param list z_range: a list of [min_z, max_z] values, the input and output data is taken
                         from this interval
    :param int grid_r: grid granularity (number of voxels) along r-axis
    :param int grid_rphi: grid granularity (number of voxels) along rphi-axis
    :param int grid_z: grid granularity (number of voxels) along z-axis
    :param list opt_train: list of 2 binary values corresponding to activating the train input of
                           average space charge and space-charge fluctuations, respectively,
                           taken from the config file
    :param list opt_pred: list of 3 binary values corresponding to activating the prediction of
                          r, rphi and z distortion corrections, taken from the config file
    :return: tuple of inputs and expected outputs
    :rtype: tuple
    """
    [vec_mean_sc, vec_fluctuation_sc, vec_fluctuation_dist_r,
     vec_fluctuation_dist_rphi, vec_fluctuation_dist_z] = \
        load_data(dirinput, event_index, z_range)
    dim_input = sum(opt_train)
    dim_output = sum(opt_pred)
    inputs = np.empty((grid_rphi, grid_r, grid_z, dim_input))
    exp_outputs = np.empty((grid_rphi, grid_r, grid_z, dim_output))

    indexfillx = 0
    if opt_train[0] == 1:
        inputs[:, :, :, indexfillx] = \
                vec_mean_sc.reshape(grid_rphi, grid_r, grid_z)
        indexfillx = indexfillx + 1
    if opt_train[1] == 1:
        inputs[:, :, :, indexfillx] = \
                vec_fluctuation_sc.reshape(grid_rphi, grid_r, grid_z)

    if dim_output > 1:
        logger = get_logger()
        logger.fatal(
            "YOU CAN PREDICT ONLY 1 DISTORTION. The sum of opt_predout == 1")

    flucs = np.array((vec_fluctuation_dist_r, vec_fluctuation_dist_rphi,
                      vec_fluctuation_dist_z))
    sel_flucs = flucs[np.array(opt_pred) == 1]
    for ind, vec_fluctuation_dist in enumerate(sel_flucs):
        exp_outputs[:, :, :, ind] = \
                vec_fluctuation_dist.reshape(grid_rphi, grid_r, grid_z)

    return inputs, exp_outputs
Exemplo n.º 6
0
def log_time(start, end, comment):
    """
    Write elapsed time to the console.

    :param double stat: start time
    :param double end: end time
    :param str comment: string attached to the console output
    """
    logger = get_logger()
    elapsed_time = end - start
    time_min = int(elapsed_time // 60)
    time_sec = int(elapsed_time % 60)
    logger.info("Elapsed time %s: %dm %ds", comment, time_min, time_sec)
Exemplo n.º 7
0
def setup_tf():
    # optionally limit GPU memory usage
    if os.environ.get('TPCwithDNNSETMEMLIMIT'):
        gpus = tf.config.experimental.list_physical_devices('GPU')
        if gpus:
            try:
                tf.config.experimental.set_virtual_device_configuration(gpus[0], \
                    [tf.config.experimental.VirtualDeviceConfiguration(memory_limit= \
                    int(os.environ.get('TPCwithDNNSETMEMLIMIT')))])
                # for gpu in gpus:
                #     tf.config.experimental.set_memory_growth(gpu, True)
            except RuntimeError as e:
                logger = get_logger()
                logger.error(e)
Exemplo n.º 8
0
def log_total_memory_usage(comment=None):
    """
    Write the memory usage of the program to the console.

    :param str comment: additional comment for logging
    """
    logger = get_logger()
    if comment is not None:
        logger.info(comment)
    size, mult = format_memory(psutil.virtual_memory().available)
    logger.info("Free RAM: %d %sB", size, mult)
    size, mult = format_memory(
        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
    logger.info("RAM used by application: %d %sB", size, mult)
Exemplo n.º 9
0
def load_event_idc(dirinput, event_index, input_z_range, output_z_range,
                   opt_pred):

    inputs, exp_outputs = load_data_one_idc(dirinput, event_index, input_z_range, output_z_range,
                                            opt_pred)

    dim_output = sum(opt_pred)
    if dim_output > 1:
        logger = get_logger()
        logger.fatal("YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1")

    #print("DIMENSION INPUT TRAINING", inputs.shape)
    #print("DIMENSION OUTPUT TRAINING", exp_outputs.shape)

    return inputs, exp_outputs
Exemplo n.º 10
0
def main():
    """ The global main function """
    logger = get_logger()
    logger.info("Starting TPC ML...")

    if len(sys.argv) == 2:
        default_file_name = sys.argv[1]
        print("Using user specified steering options file: %s" %
              default_file_name)
    else:
        default_file_name = "default.yml"

    with open(default_file_name, 'r') as default_data:
        default = yaml.safe_load(default_data)
    case = default["case"]
    with open("database_parameters_%s.yml" % case, 'r') as parameters_data:
        db_parameters = yaml.safe_load(parameters_data)
Exemplo n.º 11
0
def mat_to_vec(opt_pred, mat_tuple):
    """
    Convert multidimensional arrays to flat vectors.

    :param list opt_pred: list of 3 binary values corresponding to activation of
                          r, rphi and z distortion corrections, taken from the config file
    :param tuple mat_tuple: tuple of arrays to be flattened
    :return: tuple of flattened input arrays
    :rtype: tuple
    """
    if sum(opt_pred) > 1:
        logger = get_logger()
        logger.fatal("Framework not yet fully prepared for more than one distortion direction.")

    sel_opts = np.array(opt_pred) > 0
    res = tuple(np.hstack(mat[sel_opts]) for mat in mat_tuple)
    return res
Exemplo n.º 12
0
    def train(self):
        """
        Train the optimizer.
        """
        self.config.logger.info("XGBoostOptimiser::train")
        if self.config.dim_output > 1:
            logger = get_logger()
            logger.fatal(
                "YOU CAN PREDICT ONLY 1 DISTORTION. dim_output is bigger than 1."
            )

        model = XGBRFRegressor(verbosity=1, **(self.config.params))
        start = timer()
        inputs, exp_outputs, *_ = self.__get_data("train")
        end = timer()
        log_time(start, end, "for loading training data")
        log_memory_usage(
            ((inputs, "Input train data"), (exp_outputs, "Output train data")))
        log_total_memory_usage("Memory usage after loading data")
        if self.config.plot_train:
            inputs_val, outputs_val, *_ = self.__get_data("validation")
            log_memory_usage(((inputs_val, "Input validation data"),
                              (outputs_val, "Output validation data")))
            log_total_memory_usage(
                "Memory usage after loading validation data")
            self.__plot_train(model, inputs, exp_outputs, inputs_val,
                              outputs_val)
        start = timer()
        model.fit(inputs, exp_outputs)
        end = timer()
        log_time(start, end, "actual train")
        model.get_booster().feature_names = get_input_names_oned_idc(
            self.config.opt_usederivative,
            self.config.num_fourier_coeffs_train)
        self.__plot_feature_importance(model)
        self.save_model(model)
Exemplo n.º 13
0
def main():
    """ The global main function """
    logger = get_logger()
    logger.info("Starting TPC ML...")

    log_total_memory_usage("Initial memory usage")

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-c",
                        "--config",
                        dest="config_file",
                        default="config_model_parameters.yml",
                        type=str,
                        help="path to the *.yml configuration file")
    parser.add_argument("-s",
                        "--steer",
                        dest="steer_file",
                        default="default.yml",
                        type=str,
                        help="path to the *.yml steering file")
    # parameters for steer file
    parser.add_argument("--dotrain",
                        action='store_true',
                        default=argparse.SUPPRESS,
                        help="Perform the training")
    parser.add_argument("--docreateinputdata",
                        action='store_true',
                        default=argparse.SUPPRESS,
                        help="Create input data trees")
    parser.add_argument("--docreatevaldata",
                        action='store_true',
                        default=argparse.SUPPRESS,
                        help="Create validation data trees")
    # parameters for config file
    parser.add_argument("--rndaugment",
                        action='store_true',
                        default=argparse.SUPPRESS,
                        help="Use random-random augmentation for training")
    parser.add_argument("--ntrain1d",
                        dest='train_events_oned',
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set custom number of training events")
    parser.add_argument("--nval",
                        dest='val_events',
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set custom number of validation events")
    parser.add_argument(
        "--frac",
        dest='downsample_fraction',
        type=float,
        default=argparse.SUPPRESS,
        help="Set downsampling fraction if --downsample is set")
    parser.add_argument("--nestimators",
                        dest='n_estimators',
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set the number of trees for xgboost models")
    parser.add_argument("--maxdepth",
                        dest='max_depth',
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set maximum depth of trees for xgboost models")
    args = parser.parse_args()

    logger.info("Using configuration: %s steer file: %s", args.config_file,
                args.steer_file)

    with open(args.steer_file, "r") as steer_data:
        default = yaml.safe_load(steer_data)
    with open(args.config_file, "r") as config_data:
        config_parameters = yaml.safe_load(config_data)

    logger.info("Arguments provided: %s", str(args))
    if "dotrain" in args:
        default['dotrain'] = True
    if "docreateinputdata" in args or "docreatevaldata" in args:
        default['docreatevaldata'] = True
        config_parameters['common']['nd_validate_model'] = False
    if "docreatevaldata" in args:
        config_parameters['common']['nd_validate_model'] = True
    if "rndaugment" in args:
        config_parameters['common']['rnd_augment'] = True
    if "train_events_oned" in args:
        config_parameters['xgboost']['train_events'] = [args.train_events_oned]
    if "val_events" in args:
        config_parameters['common']['val_events'] = args.val_events
    if "downsample_fraction" in args:
        config_parameters['xgboost']['downsample'] = True
        config_parameters['xgboost'][
            'downsample_fraction'] = args.downsample_fraction
    if "n_estimators" in args:
        config_parameters['xgboost']['params'][
            'n_estimators'] = args.n_estimators
    if "max_depth" in args:
        config_parameters['xgboost']['params']['max_depth'] = args.max_depth

    models, corr, dataval = init_models(config_parameters)
    events_counts = (get_events_counts(
        config_parameters[model.name]["train_events"],
        config_parameters[model.name]["validation_events"],
        config_parameters[model.name]["apply_events"]) for model in models)
    ranges_rnd = config_parameters["common"]["range_rnd_index_train"]
    ranges_mean = config_parameters["common"]["range_mean_index"]
    if config_parameters["common"]["rnd_augment"]:
        max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * (
            ranges_rnd[1] - ranges_rnd[0])
    else:
        max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * \
            (ranges_mean[1] + 1 - ranges_mean[0])

    for model, model_events_counts in zip(models, events_counts):
        all_events_counts = []
        for (train_events, val_events, apply_events) in model_events_counts:
            total_events = train_events + val_events + apply_events
            if total_events > max_available_events:
                logger.warning(
                    "Too big number of events requested: %d available: %d",
                    total_events, max_available_events)
                continue

            all_events_counts.append(
                (train_events, val_events, apply_events, total_events))

            ranges = {
                "train": [0, train_events],
                "val": [train_events, train_events + val_events],
                "apply": [train_events + val_events, total_events]
            }
            model.config.set_ranges(ranges, total_events, train_events,
                                    val_events, apply_events)

            run_model_and_val(model, dataval, default,
                              config_parameters["common"])

            # TODO: apply the correction and save in files
            if corr is not None:
                pass

        if default["doprofile"] is True:
            model.draw_profile(all_events_counts)

    logger.info("Program finished.")
Exemplo n.º 14
0
def main():
    """ The global main function """
    logger = get_logger()
    logger.info("Starting TPC ML...")

    log_total_memory_usage("Initial memory usage")

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-c",
                        "--config",
                        dest="config_file",
                        default="config_model_parameters.yml",
                        type=str,
                        help="path to the *.yml configuration file")
    parser.add_argument("-s",
                        "--steer",
                        dest="steer_file",
                        default="default.yml",
                        type=str,
                        help="path to the *.yml steering file")
    # parameters for steer file
    parser.add_argument("--dotrain",
                        action="store_true",
                        default=argparse.SUPPRESS,
                        help="Perform the training")
    parser.add_argument("--docreatendvaldata",
                        action="store_true",
                        default=argparse.SUPPRESS,
                        help="Create validation data trees")
    parser.add_argument("--docache",
                        action="store_true",
                        default=argparse.SUPPRESS,
                        help="Cache training data if not already existing")
    # parameters for config file
    parser.add_argument("--rndaugment",
                        action="store_true",
                        default=argparse.SUPPRESS,
                        help="Use random-random augmentation for training")
    parser.add_argument("--ntrain1d",
                        dest="train_events_oned",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set custom number of training events")
    parser.add_argument("--nval",
                        dest="nd_val_events",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set custom number of max nd validation events")
    parser.add_argument("--dnpoints",
                        dest="downsample_npoints",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set number of downsampling points")
    parser.add_argument("--nestimators",
                        dest="n_estimators",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set the number of trees for xgboost models")
    parser.add_argument("--maxdepth",
                        dest="max_depth",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set maximum depth of trees for xgboost models")
    parser.add_argument("--nfftidcs", dest="num_fft_idcs", type=int,
                        default=argparse.SUPPRESS, help="Set number of 1D IDCs used for" \
                        " the FFT. Corresponds to the ion drift time (ms) used in simulation.")
    parser.add_argument("--nfouriertrain", dest="num_fourier_coeffs_train", type=int,
                        default=argparse.SUPPRESS, help="Set number of Fourier coefficients" \
                        " to take from the 1D IDC train input")
    parser.add_argument("--nfourierapply", dest="num_fourier_coeffs_apply", type=int,
                        default=argparse.SUPPRESS, help="Set number of Fourier coefficients" \
                        " to take from the 1D IDC apply input")
    # parameters for caching
    parser.add_argument("--cache-events",
                        dest="cache_events",
                        type=int,
                        default=argparse.SUPPRESS,
                        help="Set the number of events to cache")
    parser.add_argument("--cache-train",
                        action="store_true",
                        default=argparse.SUPPRESS,
                        help="Use cached data for training")
    parser.add_argument(
        "--cache-file-size",
        dest="cache_file_size",
        type=int,
        default=argparse.SUPPRESS,
        help="Set the number of events per single temporary cache file")
    args = parser.parse_args()

    logger.info("Using configuration: %s steer file: %s", args.config_file,
                args.steer_file)

    with open(args.steer_file, "r", encoding="utf-8") as steer_data:
        default = yaml.safe_load(steer_data)
    with open(args.config_file, "r", encoding="utf-8") as config_data:
        config_parameters = yaml.safe_load(config_data)

    logger.info("Arguments provided: %s", str(args))
    if "dotrain" in args:
        default["dotrain"] = True
    if "docreatendvaldata" in args:
        default["docreatendvaldata"] = True
    if "docache" in args:
        default["docache"] = True
    #
    if "rndaugment" in args:
        config_parameters["common"]["rnd_augment"] = True
    if "train_events_oned" in args:
        config_parameters["xgboost"]["train_events"] = [args.train_events_oned]
    if "nd_val_events" in args:
        config_parameters["common"]["nd_val_events"] = args.nd_val_events
    if "downsample_npoints" in args:
        config_parameters["xgboost"]["downsample"] = True
        config_parameters["xgboost"][
            "downsample_npoints"] = args.downsample_npoints
    if "n_estimators" in args:
        config_parameters["xgboost"]["params"][
            "n_estimators"] = args.n_estimators
    if "max_depth" in args:
        config_parameters["xgboost"]["params"]["max_depth"] = args.max_depth
    if "num_fft_idcs" in args:
        config_parameters["common"]["num_fft_idcs"] = args.num_fft_idcs
    if "num_fourier_coeffs_train" in args:
        config_parameters["common"][
            "num_fourier_coeffs_train"] = args.num_fourier_coeffs_train
    if "num_fourier_coeffs_apply" in args:
        config_parameters["common"][
            "num_fourier_coeffs_apply"] = args.num_fourier_coeffs_apply
    #
    if "cache_events" in args:
        config_parameters["xgboost"]["cache_events"] = args.cache_events
    if "cache_train" in args:
        config_parameters["xgboost"]["cache_train"] = True
    if "cache_file_size" in args:
        config_parameters["xgboost"]["cache_file_size"] = args.cache_file_size

    models, corr, dataval = init_models(config_parameters)
    events_counts = (get_events_counts(
        config_parameters[model.name]["train_events"],
        config_parameters[model.name]["validation_events"],
        config_parameters[model.name]["apply_events"]) for model in models)
    ranges_rnd = config_parameters["common"]["range_rnd_index_train"]
    ranges_mean = config_parameters["common"]["range_mean_index"]
    if config_parameters["common"]["rnd_augment"]:
        max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * (
            ranges_rnd[1] - ranges_rnd[0])
    else:
        max_available_events = (ranges_rnd[1] + 1 - ranges_rnd[0]) * \
            (ranges_mean[1] + 1 - ranges_mean[0])

    for model in models:
        if default["docache"] is True and model.name == "xgboost":
            start = timer()
            model.cache_train_data()
            end = timer()
            log_time(start, end, "cache")
    for model, model_events_counts in zip(models, events_counts):
        all_events_counts = []
        for (train_events, val_events, apply_events) in model_events_counts:
            total_events = train_events + val_events + apply_events
            if total_events > max_available_events:
                logger.warning(
                    "Too big number of events requested: %d available: %d",
                    total_events, max_available_events)
                continue

            all_events_counts.append(
                (train_events, val_events, apply_events, total_events))

            ranges = {
                "train": [0, train_events],
                "validation": [train_events, train_events + val_events],
                "apply": [train_events + val_events, total_events]
            }
            model.config.set_ranges(ranges, total_events, train_events,
                                    val_events, apply_events)

            run_model_and_val(model, dataval, default,
                              config_parameters["common"])

            # TODO: apply the correction and save in files
            if corr is not None:
                pass

        if default["doprofile"] is True:
            model.draw_profile(all_events_counts)

    logger.info("Program finished.")
Exemplo n.º 15
0
    def __init__(self, data_param):
        """
        Read and store the parameters from the file.

        :param dict data_param: dictionary of values read from the config file
        """
        self.logger = get_logger()

        # Dataset config
        self.grid_phi = data_param["grid_phi"]
        self.grid_z = data_param["grid_z"]
        self.grid_r = data_param["grid_r"]

        self.z_range = data_param["z_range"]
        self.opt_train = data_param["opt_train"]
        self.opt_predout = data_param["opt_predout"]
        self.opt_usederivative = data_param["opt_usederivative"]
        self.nameopt_predout = data_param["nameopt_predout"]
        self.dim_input = sum(self.opt_train)
        self.dim_output = sum(self.opt_predout)

        self.num_fft_idcs = data_param["num_fft_idcs"]
        self.num_fourier_coeffs_train = data_param["num_fourier_coeffs_train"]
        self.num_fourier_coeffs_apply = data_param["num_fourier_coeffs_apply"]

        self.logger.info(
            "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)",
            self.opt_train[0], self.opt_train[1])
        self.logger.info("Outputs: (dR, dRPhi, dZ) = (%d, %d, %d)",
                         self.opt_predout[0], self.opt_predout[1],
                         self.opt_predout[2])

        # Directories
        self.dirmodel = data_param["dirmodel"]
        self.dirapply = data_param["dirapply"]
        self.dirplots = data_param["dirplots"]
        self.dirtree = data_param["dirtree"]
        self.dirhist = data_param["dirhist"]
        train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \
                    else data_param["dirinput_nobias"]
        val_dir = data_param["dirinput_bias"] if data_param["validation_bias"] \
                    else data_param["dirinput_nobias"]
        apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \
                    else data_param["dirinput_nobias"]
        self.dirinput_train = "%s/SC-%d-%d-%d" % \
                              (train_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_validation = "%s/SC-%d-%d-%d" % \
                                   (val_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_apply = "%s/SC-%d-%d-%d" % \
                              (apply_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_nd_val = "%s/SC-%d-%d-%d" % (
            data_param["dirinput_nobias"], self.grid_z, self.grid_r,
            self.grid_phi)

        for dirname in (self.dirmodel, self.dirapply, self.dirplots,
                        self.dirtree, self.dirhist):
            if not os.path.isdir(dirname):
                os.makedirs(dirname)

        self.suffix = None
        self.suffix_ds = "phi%d_r%d_z%d" % \
                (self.grid_phi, self.grid_r, self.grid_z)

        # Parameters for getting input indices
        self.maxrandomfiles = data_param["maxrandomfiles"]
        self.nd_val_events = data_param["nd_val_events"]
        self.range_rnd_index_train = data_param["range_rnd_index_train"]
        self.range_rnd_index_nd_val = data_param["range_rnd_index_nd_val"]
        self.rnd_augment = data_param["rnd_augment"]
        self.part_inds = None
        self.nd_val_partition = data_param["nd_val_partition"]
        self.range_mean_index = data_param["range_mean_index"]
        self.indices_events_means = None
        self.partition = None
        self.total_events = 0
        self.train_events = 0
        self.val_events = 0
        self.apply_events = 0
Exemplo n.º 16
0
 def __init__(self):
     super().__init__()
     logger = get_logger()
     logger.info("IDCDataValidator::Init")
     self.model = None
     self.config = None
Exemplo n.º 17
0
    def __init__(self, data_param, case):
        self.logger = get_logger()
        self.logger.info("DnnOptimizer::Init\nCase: %s", case)

        # Dataset config
        self.grid_phi = data_param["grid_phi"]
        self.grid_z = data_param["grid_z"]
        self.grid_r = data_param["grid_r"]

        self.selopt_input = data_param["selopt_input"]
        self.selopt_output = data_param["selopt_output"]
        self.opt_train = data_param["opt_train"]
        self.opt_predout = data_param["opt_predout"]
        self.nameopt_predout = data_param["nameopt_predout"]
        self.dim_input = sum(self.opt_train)
        self.dim_output = sum(self.opt_predout)
        self.use_scaler = data_param["use_scaler"]

        # Directories
        self.dirmodel = data_param["dirmodel"]
        self.dirval = data_param["dirval"]
        train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \
                    else data_param["dirinput_nobias"]
        test_dir = data_param["dirinput_bias"] if data_param["test_bias"] \
                    else data_param["dirinput_nobias"]
        apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \
                    else data_param["dirinput_nobias"]
        self.dirinput_train = "%s/SC-%d-%d-%d/" % \
                              (train_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_test = "%s/SC-%d-%d-%d/" % \
                             (test_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_apply = "%s/SC-%d-%d-%d/" % \
                              (apply_dir, self.grid_z, self.grid_r, self.grid_phi)

        # DNN config
        self.filters = data_param["filters"]
        self.pooling = data_param["pooling"]
        self.batch_size = data_param["batch_size"]
        self.shuffle = data_param["shuffle"]
        self.depth = data_param["depth"]
        self.batch_normalization = data_param["batch_normalization"]
        self.dropout = data_param["dropout"]
        self.epochs = data_param["epochs"]
        self.lossfun = data_param["lossfun"]
        self.metrics = data_param["metrics"]
        self.adamlr = data_param["adamlr"]

        self.params = {
            'phi_slice': self.grid_phi,
            'r_row': self.grid_r,
            'z_col': self.grid_z,
            'batch_size': self.batch_size,
            'shuffle': self.shuffle,
            'opt_train': self.opt_train,
            'opt_predout': self.opt_predout,
            'selopt_input': self.selopt_input,
            'selopt_output': self.selopt_output,
            'use_scaler': self.use_scaler
        }

        self.suffix = "phi%d_r%d_z%d_filter%d_poo%d_drop%.2f_depth%d_batch%d_scaler%d" % \
                (self.grid_phi, self.grid_r, self.grid_z, self.filters, self.pooling,
                 self.dropout, self.depth, self.batch_normalization, self.use_scaler)
        self.suffix = "%s_useSCMean%d_useSCFluc%d" % \
                (self.suffix, self.opt_train[0], self.opt_train[1])
        self.suffix = "%s_pred_doR%d_dophi%d_doz%d" % \
                (self.suffix, self.opt_predout[0], self.opt_predout[1], self.opt_predout[2])
        self.suffix_ds = "phi%d_r%d_z%d" % \
                (self.grid_phi, self.grid_r, self.grid_z)

        if not os.path.isdir("plots"):
            os.makedirs("plots")

        if not os.path.isdir(self.dirmodel):
            os.makedirs(self.dirmodel)

        if not os.path.isdir(self.dirval):
            os.makedirs(self.dirval)

        self.logger.info("I am processing the configuration %s", self.suffix)
        if self.dim_output > 1:
            self.logger.fatal(
                "YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1"
            )
        self.logger.info(
            "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)",
            self.opt_train[0], self.opt_train[1])

        # Parameters for getting input indices
        self.maxrandomfiles = data_param["maxrandomfiles"]
        self.range_mean_index = data_param["range_mean_index"]
        self.indices_events_means = None
        self.partition = None
        self.total_events = 0
        self.train_events = 0
        self.test_events = 0
        self.apply_events = 0

        gROOT.SetStyle("Plain")
        gROOT.SetBatch()
Exemplo n.º 18
0
def merge_root_file(target, source_list):
    """
    Merge next file from the source list with the target file.
    Function called recursively for each element of the list.

    :param TFile target: the result ROOT file
    :param TList source_list: list of input files to merge
    """
    logger = get_logger()
    raw_path = target.GetPath()
    path = raw_path[raw_path.find(":") + 1:]

    first_source = source_list.First()
    first_source.cd(path)
    current_source_dir = gDirectory
    # gain time, do not add the objects in the list in memory
    status = TH1.AddDirectoryStatus()
    TH1.AddDirectory(False)

    # loop over all keys in this directory
    #global_chain = TChain()
    next_key = TIter(current_source_dir.GetListOfKeys())
    #key = TKey()
    #TKey old_key = None
    key = next_key()
    while key:
        # keep only the highest cycle number for each key
        #if old_key and not old_key.GetName() == key.GetName():
        #    continue
        # read object from first source file
        first_source.cd(path)
        obj = key.ReadObj()

        if obj.IsA().InheritsFrom(TH1.Class()):
            # descendant of TH1 -> merge it
            logger.info("Merging histogram %s", obj.GetName())
            h1 = TH1(obj)

            # loop over all source files and add the content of the
            # correspondant histogram to the one pointed to by "h1"
            next_source = source_list.After(first_source)
            while next_source:
                # make sure we are at the correct directory level by cd'ing to path
                next_source.cd(path)
                key2 = gDirectory.GetListOfKeys().FindObject(h1.GetName())
                if key2:
                    h2 = TH1(key2.ReadObj())
                    h1.Add(h2)
                    #del h2
                next_source = source_list.After(next_source)

        elif obj.IsA().InheritsFrom(TTree.Class()):
            logger.info("Merging tree %s", obj.GetName())
            # loop over all source files and create a chain of Trees "global_chain"
            obj_name = obj.GetName()
            global_chain = TChain(obj_name)
            global_chain.Add(first_source.GetName())
            next_source = source_list.After(first_source)
            while next_source:
                global_chain.Add(next_source.GetName())
                next_source = source_list.After(next_source)

        elif obj.IsA().InheritsFrom(TDirectory.Class()):
            logger.info("Found subdirectory %s", obj.GetName())
            # create a new subdir of same name and title in the target file
            target.cd()
            new_dir = target.mkdir(obj.GetName(), obj.GetTitle())
            # newdir is now the starting point of another round of merging
            # newdir still knows its depth within the target file via
            # GetPath(), so we can still figure out where we are in the recursion
            merge_root_file(new_dir, source_list)

        else:
            logger.info("Unknown object type, name: %s, title: %s",
                        obj.GetName(), obj.GetTitle())

        # now write the merged histogram (which is "in" obj) to the target file
        # note that this will just store obj in the current directory level,
        # which is not persistent until the complete directory itself is stored
        # by "target.Write()" below
        if obj is not None:
            target.cd()
            # if the object is a tree, it is stored in global_chain...
            if obj.IsA().InheritsFrom(TTree.Class()):
                global_chain.Merge(target.GetFile(), 0, "keep")
            else:
                obj.Write(key.GetName())

        # move to the next element
        key = next_key()

    # save modifications to target file
    target.SaveSelf(True)
    TH1.AddDirectory(status)
    target.Write()
Exemplo n.º 19
0
    def __init__(self, data_param, case):
        self.logger = get_logger()
        self.logger.info("DataValidator::Init\nCase: %s", case)

        # Dataset config
        self.grid_phi = data_param["grid_phi"]
        self.grid_z = data_param["grid_z"]
        self.grid_r = data_param["grid_r"]

        self.selopt_input = data_param["selopt_input"]
        self.selopt_output = data_param["selopt_output"]
        self.opt_train = data_param["opt_train"]
        self.opt_predout = data_param["opt_predout"]
        self.nameopt_predout = data_param["nameopt_predout"]
        self.dim_input = sum(self.opt_train)
        self.dim_output = sum(self.opt_predout)

        self.validate_model = data_param["validate_model"]
        self.use_scaler = data_param["use_scaler"]

        # Directories
        self.dirmodel = data_param["dirmodel"]
        self.dirval = data_param["dirval"]
        self.diroutflattree = data_param["diroutflattree"]
        self.dirouthistograms = data_param["dirouthistograms"]
        train_dir = data_param["dirinput_bias"] if data_param["train_bias"] \
                    else data_param["dirinput_nobias"]
        test_dir = data_param["dirinput_bias"] if data_param["test_bias"] \
                    else data_param["dirinput_nobias"]
        apply_dir = data_param["dirinput_bias"] if data_param["apply_bias"] \
                    else data_param["dirinput_nobias"]
        self.dirinput_train = "%s/SC-%d-%d-%d/" % \
                              (train_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_test = "%s/SC-%d-%d-%d/" % \
                             (test_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_apply = "%s/SC-%d-%d-%d/" % \
                              (apply_dir, self.grid_z, self.grid_r, self.grid_phi)
        self.dirinput_val = "%s/SC-%d-%d-%d/" % \
                            (data_param["dirinput_nobias"], self.grid_z, self.grid_r, self.grid_phi)

        # DNN config
        self.filters = data_param["filters"]
        self.pooling = data_param["pooling"]
        self.depth = data_param["depth"]
        self.batch_normalization = data_param["batch_normalization"]
        self.dropout = data_param["dropout"]

        self.suffix = "phi%d_r%d_z%d_filter%d_poo%d_drop%.2f_depth%d_batch%d_scaler%d" % \
                (self.grid_phi, self.grid_r, self.grid_z, self.filters, self.pooling,
                 self.dropout, self.depth, self.batch_normalization, self.use_scaler)
        self.suffix = "%s_useSCMean%d_useSCFluc%d" % \
                (self.suffix, self.opt_train[0], self.opt_train[1])
        self.suffix = "%s_pred_doR%d_dophi%d_doz%d" % \
                (self.suffix, self.opt_predout[0], self.opt_predout[1], self.opt_predout[2])
        self.suffix_ds = "phi%d_r%d_z%d" % \
                (self.grid_phi, self.grid_r, self.grid_z)

        self.logger.info("I am processing the configuration %s", self.suffix)
        if self.dim_output > 1:
            self.logger.fatal(
                "YOU CAN PREDICT ONLY 1 DISTORSION. The sum of opt_predout == 1"
            )
        self.logger.info(
            "Inputs active for training: (SCMean, SCFluctuations)=(%d, %d)",
            self.opt_train[0], self.opt_train[1])

        # Parameters for getting input indices
        self.maxrandomfiles = data_param["maxrandomfiles"]
        self.range_mean_index = data_param["range_mean_index"]
        self.indices_events_means = None
        self.partition = None
        self.total_events = 0
        self.train_events = 0
        self.test_events = 0
        self.apply_events = 0
        self.tree_events = data_param["tree_events"]

        if not os.path.isdir(self.diroutflattree):
            os.makedirs(self.diroutflattree)
        if not os.path.isdir("%s/%s" % (self.diroutflattree, self.suffix)):
            os.makedirs("%s/%s" % (self.diroutflattree, self.suffix))
        if not os.path.isdir("%s/%s" % (self.dirouthistograms, self.suffix)):
            os.makedirs("%s/%s" % (self.dirouthistograms, self.suffix))
Exemplo n.º 20
0
def main():
    """ The global main function """
    logger = get_logger()
    logger.info("Starting TPC ML...")

    if len(sys.argv) == 2:
        default_file_name = sys.argv[1]
        print("Using user specified steering options file: %s" %
              default_file_name)
    else:
        default_file_name = "default.yml"

    with open(default_file_name, 'r') as default_data:
        default = yaml.safe_load(default_data)
    with open("config_model_parameters.yml", 'r') as parameters_data:
        config_parameters = yaml.safe_load(parameters_data)

    # FIXME: Do we need these commented lines anymore?
    #dirmodel = config_parameters["common"]["dirmodel"]
    #dirval = config_parameters["common"]["dirval"]
    #dirinput = config_parameters["common"]["dirinput"]

    # NOTE
    # checkdir and checkmakedir not yet implemented. Was previously used from
    # machine_learning_hep package but is now the only thing required from there.
    # Easy to adapt an implementation like that to avoid heavy dependency
    # on machine_learning_hep

    #counter = 0
    #if dotraining is True:
    #    counter = counter + checkdir(dirmodel)
    #if dotesting is True:
    #    counter = counter + checkdir(dirval)
    #if counter < 0:
    #    sys.exit()

    #if dotraining is True:
    #    checkmakedir(dirmodel)
    #if dotesting is True:
    #    checkmakedir(dirval)

    models, corr, dataval = init_models(config_parameters)
    events_counts = (get_events_counts(
        config_parameters[model.name]["train_events"],
        config_parameters[model.name]["test_events"],
        config_parameters[model.name]["apply_events"]) for model in models)
    max_available_events = config_parameters["common"]["max_events"]

    for model, model_events_counts in zip(models, events_counts):
        all_events_counts = []
        for (train_events, test_events, apply_events) in model_events_counts:
            total_events = train_events + test_events + apply_events
            if total_events > max_available_events:
                print("Too big number of events requested: %d available: %d" % \
                      (total_events, max_available_events))
                continue

            all_events_counts.append(
                (train_events, test_events, apply_events, total_events))

            ranges = {
                "train": [0, train_events],
                "test": [train_events, train_events + test_events],
                "apply": [train_events + test_events, total_events]
            }
            model.config.set_ranges(ranges, total_events, train_events,
                                    test_events, apply_events)

            run_model_and_val(model, dataval, default,
                              config_parameters["common"])

            # TODO: apply the correction and save in files
            if corr is not None:
                pass

        if default["doprofile"] is True:
            model.draw_profile(all_events_counts)

    logger.info("Program finished.")