Пример #1
0
def load_yaml_file(dataset_, path_, **kwargs):
    data = open_yaml_file(path_)
    if len(data) != 1:
        msg.fatal(
            "HEP_data_utils.helpers.load_yaml_file",
            "{0} contains {1} entries, but I am only configured for 1... is this really a single distribution/matrix?"
            .format(path_, len(data)))
    dep_vars = data[0]["dependent_variables"]
    indep_vars = data[0]["independent_variables"]
    if len(indep_vars) < 3:
        load_distributions_from_yaml(dataset_,
                                     dep_vars,
                                     indep_vars,
                                     path_,
                                     n_dim_=len(indep_vars),
                                     metadata_global_=kwargs.get(
                                         "metadata_global_", {}),
                                     metadata_local_=data[0])
    else:
        msg.error(
            "HEP_data_utils.helpers.load_yaml_file",
            "file {0} has {1} independent_variables... I don't know what to do. I'm such a failure, I knew I wasn't cut out for this :(. The problematic entries are as follows:"
            .format(path_, len(indep_vars)))
        print(indep_vars)
        msg.fatal("HEP_data_utils.helpers.load_yaml_file",
                  "could not interpret number of independent_variables")
Пример #2
0
def regularise_bins(dist_2D_):
    if not isinstance(dist_2D_, Distribution_2D):
        msg.fatal("HEP_data_utils.helpers.regularise_bins",
                  "argument must be of type Distribution_2D")
    n_vals = len(dist_2D_._values)
    n_bins_x = len(dist_2D_._bin_labels_x)
    n_bins_y = len(dist_2D_._bin_labels_y)
    if n_vals == n_bins_x == n_bins_y:
        bin_labels_x = [y for y in {x for x in dist_2D_._bin_labels_x}]
        bin_labels_x.sort()
        bin_labels_y = [y for y in {x for x in dist_2D_._bin_labels_y}]
        bin_labels_y.sort()
        if bin_labels_x != bin_labels_y: return
        new_n_bins_x = len(bin_labels_x)
        new_n_bins_y = len(bin_labels_y)
        new_values = np.array(np.zeros(shape=(new_n_bins_x, new_n_bins_y)))
        for x, y, v in zip(dist_2D_._bin_labels_x, dist_2D_._bin_labels_y,
                           dist_2D_._values):
            new_values[bin_labels_x.index(x), bin_labels_y.index(y)] = v
        dist_2D_._values = new_values
        dist_2D_._bin_labels_x = bin_labels_x
        dist_2D_._bin_labels_y = bin_labels_y
    elif n_vals == n_bins_x * n_bins_y:
        new_values = np.array(np.zeros(shape=(n_bins_x, n_bins_y)))
        for x_idx in enumerate(dist_2D_._bin_labels_x):
            for y_idx in enumerate(dist_2D_._bin_labels_y):
                v = dist_2D_._values[x_idx + n_bins_x * y_idx]
                new_values[x_idx, y_idx] = v
        dist_2D_._values = new_values
    else:
        msg.error("HEP_data_utils.helpers.regularise_bins",
                  "function not implemented for this type of matrix",
                  _verbose_level=0)
Пример #3
0
 def remove_local_key(self, key_):
     if key_ not in self._local_keys:
         msg.error(
             "HEP_data_utils.data_structures.Distribution_2D.remove_local_key",
             "key {0} does not exist... returning with nothing done".format(
                 key_),
             _verbose_level=0)
         return
     self._local_keys.remove(key_)
     del self._local_key_indices[key_]
Пример #4
0
 def set_local_key(self, key_, key_idx_lower_, key_idx_upper_):
     if key_ not in self._local_keys: self._local_keys.append(key_)
     if key_idx_lower_ > key_idx_upper_:
         msg.error(
             "HEP_data_utils.data_structures.Distribution_2D.set_local_key",
             "upper index {0} cannot be greater than lower index {1}... returning with nothing done"
             .format(key_idx_lower_, key_idx_upper_),
             _verbose_level=0)
         return
     self._local_key_indices[key_] = [key_idx_lower_, key_idx_upper_]
Пример #5
0
 def copy_2D_local_keys(self, from_key_, *args):
     if from_key_ not in self._distributions_2D:
         msg.error(
             "HEP_data_utils.data_structures.Distribution_store.copy_2D_local_keys",
             "key {0} does not exist... returning with nothing done".format(
                 from_key_),
             _verbose_level=0)
         return
     from_dist = self._distributions_2D[from_key_]
     for to_key in args:
         to_dist = self._distributions_2D[to_key]
         for local_key in to_dist._local_keys:
             to_dist.remove_local_key(local_key)
         for local_key in from_dist._local_keys:
             to_dist.set_local_key(
                 local_key, from_dist._local_key_indices[local_key][0],
                 from_dist._local_key_indices[local_key][1])
Пример #6
0
 def change_local_key(self, old_key_, new_key_):
     if old_key_ not in self._local_keys:
         msg.error(
             "HEP_data_utils.data_structures.Distribution_2D.change_local_key",
             "key {0} does not exist... returning with nothing done".format(
                 old_key_),
             _verbose_level=0)
         return
     if new_key_ in self._local_keys:
         msg.error(
             "HEP_data_utils.data_structures.Distribution_2D.change_local_key",
             "key {0} already exists... returning with nothing done".format(
                 new_key_),
             _verbose_level=0)
         return
     self._local_keys.remove(old_key_)
     self._local_keys.append(new_key_)
     self._local_key_indices[new_key_] = self._local_key_indices[old_key_]
     del self._local_key_indices[old_key_]
Пример #7
0
 def plot_all(self):
     for key in self._distributions_1D:
         try:
             self.plot_1D_distribution(key, label=key)
         except Exception as e:
             print(e)
             msg.error(
                 "Distribution_store.plot_all",
                 "Error when plotting 1D distribution with key {0}... skipping"
                 .format(key))
     for key in self._distributions_2D:
         try:
             self.plot_matrix(key, label=key)
         except Exception as e:
             print(e)
             msg.error(
                 "Distribution_store.plot_all",
                 "Error when plotting 2D distribution with key {0}... skipping"
                 .format(key))
Пример #8
0
 def load_keys(self, filename_):
     config = configparser.ConfigParser()
     config.optionxform = str
     try:
         config.read(filename_)
     except:
         msg.check_verbosity_and_print(str(sys.exc_info()[0]),
                                       _verbose_level=-1)
         msg.error(
             "HEP_data_utils.data_structures.Distribution_store",
             "an exception occured when parsing the config file... Continuing with nothing done"
         )
         return
     if "KEYS" not in config.sections():
         msg.error(
             "HEP_data_utils.data_structures.Distribution_store",
             "no section titled \"KEYS\" in file {0}".format(filename_))
         return
     keys = config["KEYS"]
     for old_key in keys:
         self.rename(old_key, keys[old_key])
     self.print_keys()
Пример #9
0
def set_1D_bins(distribution_, indep_vars_):
    if len(indep_vars_) != 1:
        msg.fatal(
            "HEP_data_utils.helpers.set_1D_bins",
            "distribution {0} has {1} independent_variables but I am only configured to deal with 1"
            .format(distribution_._description, len(indep_vars_)))
    distribution_._bin_values = np.zeros(shape=(1 + len(distribution_)))
    distribution_._bin_labels = [
        "unlabeled" for i in range(0, len(distribution_))
    ]
    for i in range(0, len(indep_vars_[0]["values"])):
        bin = indep_vars_[0]["values"][i]
        if bin.get("value", None) != None:
            distribution_._bin_labels[i] = bin["value"]
        elif bin.get("high", None) != None and bin.get("low", None) != None:
            bin_lo, bin_hi = bin["low"], bin["high"]
            if i == 0:
                distribution_._bin_values[0], distribution_._bin_values[
                    1] = bin_lo, bin_hi
                continue
            if bin_hi == distribution_._bin_values[i]:
                distribution_._bin_values[i + 1] = bin_lo
            elif bin_lo == distribution_._bin_values[i]:
                distribution_._bin_values[i + 1] = bin_hi
            else:
                distribution_._has_errors = True
                msg.error(
                    "HEP_data_utils.helpers.set_1D_bins",
                    "Bin entry {0} for distribution {1} is not continuous from the previous bin which ended at {2}"
                    .format(bin, distribution_._description,
                            distribution_._bin_values[i]),
                    verbose_level=-1)
        else:
            distribution_._has_errors = True
            msg.error("HEP_data_utils.helpers.set_1D_bins",
                      "Could not interpret bin entry {0} for distribution {1}".
                      format(bin, distribution_._description),
                      verbose_level=-1)
Пример #10
0
 def plot_1D_distribution(self, key_, **kwargs):
     if self._distributions_1D[key_]._has_errors:
         msg.error(
             "Distribution_store.plot_1D_distribution",
             "Key {0} had some errors when loading. Please clear them before plotting."
             .format(key_), -1)
         return
     x, y, [ey_lo,
            ey_hi], ex, labels, keys = HEPData_plt.get_1D_distribution(
                self, key_)
     x, y, [ey_lo_sys, ey_hi_sys
            ], ex, labels, sys_keys = HEPData_plt.get_1D_distribution(
                self, key_, "sys")
     x, y, [ey_lo_stat, ey_hi_stat
            ], ex, labels, stat_keys = HEPData_plt.get_1D_distribution(
                self, key_, "stat")
     fig = plt.figure(figsize=(15, 5))
     ax = fig.add_subplot(111)
     str_tot_legend = kwargs.get(
         "label", "distribution") + " ( " + " + ".join(keys) + " )"
     str_tot_legend = "\n".join([
         str_tot_legend[120 * i:min(len(str_tot_legend), 120 * (i + 1))]
         for i in range(int(len(str_tot_legend) / 120) + 1)
     ])
     str_sys_legend = kwargs.get(
         "label", "distribution") + " ( " + " + ".join(sys_keys) + " )"
     str_sys_legend = "\n".join([
         str_sys_legend[120 * i:min(len(str_sys_legend), 120 * (i + 1))]
         for i in range(int(len(str_sys_legend) / 120) + 1)
     ])
     if sum([np.fabs(x) for x in ey_hi_sys + ey_lo_sys]) > 0:
         ax.errorbar(x,
                     y,
                     yerr=[ey_lo_sys, ey_hi_sys],
                     c='royalblue',
                     linewidth=18,
                     linestyle='None',
                     marker='None',
                     alpha=0.4,
                     label=str_tot_legend)
     if sum([np.fabs(x) for x in ey_hi_stat + ey_lo_stat]) > 0:
         ax.errorbar(x,
                     y,
                     yerr=[ey_lo_stat, ey_hi_stat],
                     c='indianred',
                     linewidth=6,
                     linestyle='None',
                     marker='None',
                     alpha=0.6,
                     label=kwargs.get("label", "distribution") +
                     " ( stat )")
     ax.errorbar(x,
                 y,
                 yerr=[ey_lo, ey_hi],
                 xerr=ex,
                 c='k',
                 linewidth=2,
                 linestyle='None',
                 marker='+',
                 alpha=1,
                 label=str_sys_legend)
     if labels:
         ax.set_xticks(x)
         ax.set_xticklabels(self._distributions_1D[key_]._bin_labels,
                            rotation=45)
     box = ax.get_position()
     ax.set_position([box.x0, box.y0, box.width * 0.4, box.height])
     if "legend_loc" in kwargs:
         ax.legend(loc=kwargs.get("legend_loc", "best"))
     else:
         ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
     plt.xlabel(
         kwargs.get("xlabel", self._distributions_1D[key_]._indep_var))
     plt.ylabel(kwargs.get("ylabel", self._distributions_1D[key_]._dep_var))
     plt.title(kwargs.get("title", ""))
     ax.axis(xlim=kwargs.get("xlim", [x[0], x[len(x) - 1]]))
     ax.axis(ylim=kwargs.get("ylim", [x[0], x[len(x) - 1]]))
     if kwargs.get("logy", False) is True: plt.yscale("log")
     if kwargs.get("logx", False) is True: plt.xscale("log")
     plt.grid()
     plt.show()
Пример #11
0
def load_distributions_from_yaml(dataset_, dep_vars_, indep_vars_, path_,
                                 **argv):
    n_dim_ = argv.get("n_dim_", 1)
    extra_info_global = argv.get("metadata_global_", {})
    extra_info_local = argv.get("metadata_global_", {})
    for var_idx in range(0, len(dep_vars_)):
        dep_var = dep_vars_[var_idx]
        distribution = Distribution()
        dist_key = "|"
        if extra_info_global.get("table_doi", None) != None:
            dist_key = dist_key + str(extra_info_global["table_doi"]) + "|"
        if extra_info_global.get("data_file", None) != None:
            dist_key = dist_key + str(extra_info_global["data_file"]) + "|"
        else:
            dist_key = dist_key + path_ + "|"
        if n_dim_ == 1: distribution = Distribution_1D()
        if n_dim_ == 2: distribution = Distribution_2D()
        distribution._description = dep_var["header"].get("name", "unknown")
        distribution._name = distribution._description
        distribution._dep_var = dep_var["header"].get("name", "unknown")
        distribution._indep_var = indep_vars_[0]["header"].get(
            "name", "unknown")
        distribution._units = dep_var["header"].get("units", "unknown")
        for key in extra_info_local:
            if key == "dependent_variables" or key == "independent_variables":
                continue
            distribution._meta["LOCAL::" + key] = extra_info_local[key]
        for key in extra_info_global:
            distribution._meta["GLOBAL::" + key] = extra_info_global[key]
        for key in dep_var:
            if key == "values": continue
            if key == "errors": continue
            distribution._meta["LOCAL::DEP_VARS::" + key] = dep_var[key]
        pt_idx = 0
        for entry in dep_var["values"]:
            try:
                distribution._values = np.append(distribution._values,
                                                 entry["value"])
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=-1)
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"value\" when trying to create distribution {0} in file {1}"
                    .format(distribution._description, path_))
        for entry in dep_var["values"]:
            try:
                errors = entry["errors"]
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=1)
                msg.warning(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"errors\" when trying to create distribution {0} in file {1}... Assuming there are none"
                    .format(distribution._description, path_),
                    _verbose_level=1)
                errors = []
            err_idx = 0
            for error in errors:
                get_error_from_yaml_map(distribution, error, pt_idx, err_idx)
                err_idx = err_idx + 1
            pt_idx = pt_idx + 1
        for var_idx in range(0, len(indep_vars_)):
            indep_var = indep_vars_[var_idx]
            for key in indep_var:
                if key == "values": continue
                distribution._meta["LOCAL::INDEP_VARS::" +
                                   key] = indep_var[key]
        expected_size = len(distribution._values)
        if n_dim_ == 1: set_1D_bins(distribution, indep_vars_)
        elif n_dim_ == 2:
            set_2D_bins(distribution, indep_vars_)
            regularise_bins(distribution)
        else:
            msg.fatal(
                "HEP_data_utils.helpers.load_distributions_from_yaml",
                "number of bin dimensions is {0} but I can only handle 1 or 2".
                format(n_dim_))
        for error in [
                distribution._symm_errors, distribution._asymm_errors_up,
                distribution._asymm_errors_down
        ]:
            for key in error:
                this_err_size = len(error[key])
                if this_err_size == expected_size: continue
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "error source {0} has length {1} for distribution [{2}] where {3} was expected"
                    .format(key, this_err_size, distribution._description,
                            expected_size))
        msg.info("HEP_data_utils.helpers.load_distributions_from_yaml",
                 "yaml file loaded with the following entries",
                 _verbose_level=0)
        dist_key = dist_key + str(distribution._name) + "|"
        if msg.VERBOSE_LEVEL >= 0: print(distribution)
        if n_dim_ == 1:
            if dataset_._distributions_1D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_1D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_1D[dist_key] = distribution
        if n_dim_ == 2:
            if dataset_._distributions_2D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_2D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_2D[dist_key] = distribution