Exemplo n.º 1
0
def load_submission_file(dataset_, path_, fname_=""):
    if len(fname_) > 0:
        path_ = path_ + "/" + fname_
    data = open_yaml_file(path_)
    dataset_properties = data[0]
    msg.info("HEP_data_utils.helpers.load_submission_file",
             "submission file with the following metadata:",
             _verbose_level=1)
    msg.check_verbosity_and_print(yaml.safe_dump(dataset_properties),
                                  _verbose_level=1)
    dataset_._description = dataset_properties["additional_resources"][0][
        "description"]
    dataset_._location = dataset_properties["additional_resources"][0][
        "location"]
    dataset_._comment = dataset_properties["comment"]
    dataset_._hepdata_doi = dataset_properties["hepdata_doi"]
    for idx in range(1, len(data)):
        datum = data[idx]
        msg.info("HEP_data_utils.helpers.load_submission_file",
                 "submission file entry with the following definitions:",
                 _verbose_level=1)
        msg.check_verbosity_and_print(yaml.safe_dump(datum), _verbose_level=1)
        filename = hlp.get_directory(path_) + "/" + datum["data_file"]
        if not os.path.isfile(filename):
            msg.fatal(
                "HEP_data_utils.helpers.load_submission_file",
                "submission file asks for a yaml file called {0} but none exists"
                .format(filename))
        msg.info("HEP_data_utils.helpers.load_submission_file",
                 "opening yaml file {0}".format(filename),
                 _verbose_level=0)
        load_yaml_file(dataset_, filename, metadata_global_=datum)
Exemplo n.º 2
0
def regularise_bins(dist_2D_):
    if not isinstance(dist_2D_, Distribution_2D):
        msg.fatal("HEP_data_utils.helpers.regularise_bins",
                  "argument must be of type Distribution_2D")
    n_vals = len(dist_2D_._values)
    n_bins_x = len(dist_2D_._bin_labels_x)
    n_bins_y = len(dist_2D_._bin_labels_y)
    if n_vals == n_bins_x == n_bins_y:
        bin_labels_x = [y for y in {x for x in dist_2D_._bin_labels_x}]
        bin_labels_x.sort()
        bin_labels_y = [y for y in {x for x in dist_2D_._bin_labels_y}]
        bin_labels_y.sort()
        if bin_labels_x != bin_labels_y: return
        new_n_bins_x = len(bin_labels_x)
        new_n_bins_y = len(bin_labels_y)
        new_values = np.array(np.zeros(shape=(new_n_bins_x, new_n_bins_y)))
        for x, y, v in zip(dist_2D_._bin_labels_x, dist_2D_._bin_labels_y,
                           dist_2D_._values):
            new_values[bin_labels_x.index(x), bin_labels_y.index(y)] = v
        dist_2D_._values = new_values
        dist_2D_._bin_labels_x = bin_labels_x
        dist_2D_._bin_labels_y = bin_labels_y
    elif n_vals == n_bins_x * n_bins_y:
        new_values = np.array(np.zeros(shape=(n_bins_x, n_bins_y)))
        for x_idx in enumerate(dist_2D_._bin_labels_x):
            for y_idx in enumerate(dist_2D_._bin_labels_y):
                v = dist_2D_._values[x_idx + n_bins_x * y_idx]
                new_values[x_idx, y_idx] = v
        dist_2D_._values = new_values
    else:
        msg.error("HEP_data_utils.helpers.regularise_bins",
                  "function not implemented for this type of matrix",
                  _verbose_level=0)
Exemplo n.º 3
0
def load_yaml_file(dataset_, path_, **kwargs):
    data = open_yaml_file(path_)
    if len(data) != 1:
        msg.fatal(
            "HEP_data_utils.helpers.load_yaml_file",
            "{0} contains {1} entries, but I am only configured for 1... is this really a single distribution/matrix?"
            .format(path_, len(data)))
    dep_vars = data[0]["dependent_variables"]
    indep_vars = data[0]["independent_variables"]
    if len(indep_vars) < 3:
        load_distributions_from_yaml(dataset_,
                                     dep_vars,
                                     indep_vars,
                                     path_,
                                     n_dim_=len(indep_vars),
                                     metadata_global_=kwargs.get(
                                         "metadata_global_", {}),
                                     metadata_local_=data[0])
    else:
        msg.error(
            "HEP_data_utils.helpers.load_yaml_file",
            "file {0} has {1} independent_variables... I don't know what to do. I'm such a failure, I knew I wasn't cut out for this :(. The problematic entries are as follows:"
            .format(path_, len(indep_vars)))
        print(indep_vars)
        msg.fatal("HEP_data_utils.helpers.load_yaml_file",
                  "could not interpret number of independent_variables")
Exemplo n.º 4
0
def open_yaml_file(path_):
    yaml_file = open(path_, 'r')
    data = []
    try:
        for datum in yaml.safe_load_all(yaml_file):
            msg.info("HEP_data_utils.helpers.open_yaml_file",
                     "yaml file opened with entries:",
                     _verbose_level=1)
            msg.check_verbosity_and_print(yaml.safe_dump(datum),
                                          _verbose_level=1)
            data.append(datum)
    except yaml.YAMLError as exc:
        print(exc)
        msg.fatal(
            "HEP_data_utils.helpers.open_yaml_file",
            "Exception thrown when opening the yaml file (see previous messages)"
        )
    return data
Exemplo n.º 5
0
def set_2D_bins(distribution_, indep_vars_):
    if len(indep_vars_) != 2:
        msg.fatal(
            "HEP_data_utils.helpers.set_2D_bins",
            "distribution {0} has {1} independent_variables but I am only configured to deal with 2"
            .format(distribution_._description, len(indep_vars_)))
    distribution_._bin_labels_x = [
        "unlabeled" for i in range(0, len(distribution_))
    ]
    distribution_._bin_labels_y = [
        "unlabeled" for i in range(0, len(distribution_))
    ]
    for i in range(0, len(indep_vars_[0]["values"])):
        bin = indep_vars_[0]["values"][i]
        if bin.get("value", None) != None:
            distribution_._bin_labels_x[i] = bin["value"]
        else:
            msg.fatal(
                "HEP_data_utils.helpers.set_2D_bins",
                "Could not interpret bin entry {0} for distribution {1}".
                format(bin, distribution_._description))
    for i in range(0, len(indep_vars_[1]["values"])):
        bin = indep_vars_[1]["values"][i]
        if bin.get("value", None) != None:
            distribution_._bin_labels_y[i] = bin["value"]
        else:
            msg.fatal(
                "HEP_data_utils.helpers.set_2D_bins",
                "Could not interpret bin entry {0} for distribution {1}".
                format(bin, distribution_._description))
Exemplo n.º 6
0
def set_1D_bins(distribution_, indep_vars_):
    if len(indep_vars_) != 1:
        msg.fatal(
            "HEP_data_utils.helpers.set_1D_bins",
            "distribution {0} has {1} independent_variables but I am only configured to deal with 1"
            .format(distribution_._description, len(indep_vars_)))
    distribution_._bin_values = np.zeros(shape=(1 + len(distribution_)))
    distribution_._bin_labels = [
        "unlabeled" for i in range(0, len(distribution_))
    ]
    for i in range(0, len(indep_vars_[0]["values"])):
        bin = indep_vars_[0]["values"][i]
        if bin.get("value", None) != None:
            distribution_._bin_labels[i] = bin["value"]
        elif bin.get("high", None) != None and bin.get("low", None) != None:
            if i == 0:
                distribution_._bin_values[0] = bin["low"]
                distribution_._bin_values[1] = bin["high"]
                continue
            if bin["low"] != distribution_._bin_values[i]:
                msg.fatal(
                    "HEP_data_utils.helpers.set_1D_bins",
                    "Bin entry {0} for distribution {1} is not continuous from the previous bin which ended at {2}"
                    .format(bin, distribution_._description,
                            distribution_._bin_values[i]))
            distribution_._bin_values[i + 1] = bin["high"]
        else:
            msg.fatal(
                "HEP_data_utils.helpers.set_1D_bins",
                "Could not interpret bin entry {0} for distribution {1}".
                format(bin, distribution_._description))
Exemplo n.º 7
0
def load_dataset(dataset_, path_):
    path_ = hlp.remove_subleading(path_, "/")
    if os.path.isdir(path_):
        msg.info(
            "HEP_data_utils.helpers.load_dataset",
            "{0} is a directory... I am expanding the entries (but will only go one directory deep!)"
            .format(path_))
        if os.path.isfile(path_ + "/submission.yaml"):
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "submission.yaml file found in directory {0}... I will use this to steer the directory"
                .format(path_))
            load_submission_file(dataset_, path_, "submission.yaml")
        else:
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "no submission.yaml file found in directory {0}... I will open all available yaml files"
                .format(path_))
            load_all_yaml_files(dataset_, path_)
    else:
        if is_yaml_file(path_) == False:
            msg.fatal(
                "HEP_data_utils.helpers.load_dataset",
                "{0} doesn't seem to be a yaml file or a directory... I don't know what to do with it"
                .format(path_))
        if is_submission_file(path_):
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "{0} is a submission.yaml file... I will use this to steer the directory"
                .format(path_))
            load_submission_file(dataset_, path_)
        else:
            msg.info(
                "HEP_data_utils.helpers.load_dataset",
                "Interpreting {0} as a yaml file... I will use it as my only input"
                .format(path_))
            load_yaml_file(dataset_, path_)
Exemplo n.º 8
0
def get_error_from_yaml_map(distribution_, error_, pt_idx_, err_idx_=0):
    key = error_.get("label", "err{0}".format(err_idx_))
    if "symerror" in error_:
        if key not in distribution_._symm_errors:
            msg.info("HEP_data_utils.helpers.get_error_from_yaml_map",
                     "Creating symmetric error {0} with length {1}".format(
                         key, len(distribution_)),
                     _verbose_level=1)
            distribution_._symm_errors[key] = np.zeros(
                shape=(len(distribution_)))
        distribution_._symm_errors[key][pt_idx_] = error_["symerror"]
    elif "asymerror" in error_:
        err_asymm = error_["asymerror"]
        if key not in distribution_._asymm_errors_up:
            msg.info("HEP_data_utils.helpers.get_error_from_yaml_map",
                     "Creating asymmetric error {0} with length {1}".format(
                         key, len(distribution_)),
                     _verbose_level=1)
            distribution_._asymm_errors_up[key] = np.zeros(
                shape=(len(distribution_)))
            distribution_._asymm_errors_down[key] = np.zeros(
                shape=(len(distribution_)))
        if "plus" not in err_asymm:
            msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                      "No entry named \"plus\" for error \"asymerror\"")
        else:
            distribution_._asymm_errors_up[key][pt_idx_] = err_asymm["plus"]
        if err_asymm.get("minus", None) == None:
            msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                      "No entry named \"minus\" for error \"asymerror\"")
        else:
            distribution_._asymm_errors_down[key][pt_idx_] = err_asymm["minus"]
    else:
        print(error_)
        msg.fatal("HEP_data_utils.helpers.get_error_from_yaml_map",
                  "map does not have an entry called symerror or asymerror")
    return key
Exemplo n.º 9
0
 def plot(self):
     msg.fatal(
         "Distribution.plot",
         "not implemented for base class - you should have created a Distribution_1D or Distribution_2D object"
     )
Exemplo n.º 10
0
import general_utils.helpers as hlp
import HEP_data_utils.helpers as HEPData_hlp
from HEP_data_utils.data_structures import *


def print_help () :
	msg.info("study_DM_paper.py:print_help","Usage is: python study_DM_paper.py <single yaml-file OR directory with yaml-files OR submission.yaml steering file>")
	msg.info("study_DM_paper.py:print_help","I'm currently configured specifically for the DM paper format")

if __name__ == "__main__" :
	#  Welcome, check arguments and set verbosity (to be a config in future)
	msg.info("study_DM_paper.py","Running program")
	msg.VERBOSE_LEVEL = -1
	if len(sys.argv) != 2 :
		print_help()
		msg.fatal("study_DM_paper.py","{0} argument(s) provided where 2 were expected".format(len(sys.argv)))
	if sys.argv[1] == "--help" or sys.argv[1] == "-h" :
		print_help()
		exit()
	#  Create dataset container and load from yaml file(s)
	dataset = Distribution_store("Darren DM paper")
	HEPData_hlp.load_dataset ( dataset , sys.argv[1] )
	msg.info("study_DM_paper.py","Dataset loaded with the following entries")
	dataset.print_keys()
	#  Set a table key to something intelligible. If I don't know what info is contained within a file, I will print it's metadata to find out. This will help me set a nicely informative key.
	dataset.print_meta("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|")
	dataset.rename("|10.17182/hepdata.78366.v2/t1|Table1.yaml|measured $R^\\text{miss}$|","R_pT_miss_geq1j_meas")
	#  Do for the rest of the tables. I put them in a file to make our life easier.
	dataset.load_keys("DM_paper_keys.dat")
	dataset.print_keys()
	#  Let's see what we have loaded using print(dataset) or dataset.print_all()
Exemplo n.º 11
0
def load_distributions_from_yaml(dataset_, dep_vars_, indep_vars_, path_,
                                 **argv):
    n_dim_ = argv.get("n_dim_", 1)
    extra_info_global = argv.get("metadata_global_", {})
    extra_info_local = argv.get("metadata_global_", {})
    for var_idx in range(0, len(dep_vars_)):
        dep_var = dep_vars_[var_idx]
        distribution = Distribution()
        dist_key = "|"
        if extra_info_global.get("table_doi", None) != None:
            dist_key = dist_key + str(extra_info_global["table_doi"]) + "|"
        if extra_info_global.get("data_file", None) != None:
            dist_key = dist_key + str(extra_info_global["data_file"]) + "|"
        else:
            dist_key = dist_key + path_ + "|"
        if n_dim_ == 1: distribution = Distribution_1D()
        if n_dim_ == 2: distribution = Distribution_2D()
        distribution._description = dep_var["header"].get("name", "unknown")
        distribution._name = distribution._description
        distribution._dep_var = dep_var["header"].get("name", "unknown")
        distribution._indep_var = indep_vars_[0]["header"].get(
            "name", "unknown")
        distribution._units = dep_var["header"].get("units", "unknown")
        for key in extra_info_local:
            if key == "dependent_variables" or key == "independent_variables":
                continue
            distribution._meta["LOCAL::" + key] = extra_info_local[key]
        for key in extra_info_global:
            distribution._meta["GLOBAL::" + key] = extra_info_global[key]
        for key in dep_var:
            if key == "values": continue
            if key == "errors": continue
            distribution._meta["LOCAL::DEP_VARS::" + key] = dep_var[key]
        pt_idx = 0
        for entry in dep_var["values"]:
            try:
                distribution._values = np.append(distribution._values,
                                                 entry["value"])
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=-1)
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"value\" when trying to create distribution {0} in file {1}"
                    .format(distribution._description, path_))
        for entry in dep_var["values"]:
            try:
                errors = entry["errors"]
            except KeyError as exc:
                msg.error(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "KeyError: {0}".format(exc),
                    _verbose_level=1)
                msg.warning(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "Entry with no \"errors\" when trying to create distribution {0} in file {1}... Assuming there are none"
                    .format(distribution._description, path_),
                    _verbose_level=1)
                errors = []
            err_idx = 0
            for error in errors:
                get_error_from_yaml_map(distribution, error, pt_idx, err_idx)
                err_idx = err_idx + 1
            pt_idx = pt_idx + 1
        for var_idx in range(0, len(indep_vars_)):
            indep_var = indep_vars_[var_idx]
            for key in indep_var:
                if key == "values": continue
                distribution._meta["LOCAL::INDEP_VARS::" +
                                   key] = indep_var[key]
        expected_size = len(distribution._values)
        if n_dim_ == 1: set_1D_bins(distribution, indep_vars_)
        elif n_dim_ == 2:
            set_2D_bins(distribution, indep_vars_)
            regularise_bins(distribution)
        else:
            msg.fatal(
                "HEP_data_utils.helpers.load_distributions_from_yaml",
                "number of bin dimensions is {0} but I can only handle 1 or 2".
                format(n_dim_))
        for error in [
                distribution._symm_errors, distribution._asymm_errors_up,
                distribution._asymm_errors_down
        ]:
            for key in error:
                this_err_size = len(error[key])
                if this_err_size == expected_size: continue
                msg.fatal(
                    "HEP_data_utils.helpers.load_distributions_from_yaml",
                    "error source {0} has length {1} for distribution [{2}] where {3} was expected"
                    .format(key, this_err_size, distribution._description,
                            expected_size))
        msg.info("HEP_data_utils.helpers.load_distributions_from_yaml",
                 "yaml file loaded with the following entries",
                 _verbose_level=0)
        dist_key = dist_key + str(distribution._name) + "|"
        if msg.VERBOSE_LEVEL >= 0: print(distribution)
        if n_dim_ == 1:
            if dataset_._distributions_1D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_1D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_1D[dist_key] = distribution
        if n_dim_ == 2:
            if dataset_._distributions_2D.get(dist_key, None) != None:
                dist_key = dist_key + "-duplicated-auto-key;1"
                while dataset_._distributions_2D.get(dist_key, None) != None:
                    dist_key = dist_key[:-1] + str(1 + int(dist_key[-1:]))
            dataset_._distributions_2D[dist_key] = distribution
import os, sys, yaml
import general_utils.messaging as msg
import general_utils.helpers as hlp
import HEP_data_utils.helpers as HEPData_hlp
from HEP_data_utils.data_structures import *

if __name__ == "__main__":
    msg.info("study_yaml.py", "Running program")
    msg.VERBOSE_LEVEL = 0
    if len(sys.argv) != 2:
        msg.fatal(
            "study_yaml.py",
            "{0} argument(s) provided where 2 were expected".format(
                len(sys.argv)))
    in_str = sys.argv[1]
    in_str = hlp.remove_subleading(in_str, "/")
    dataset = Dataset()
    if os.path.isdir(in_str):
        msg.info(
            "study_yaml.py",
            "{0} is a directory... I am expanding the entries (but will only go one directory deep!)"
            .format(in_str))
        if os.path.isfile(in_str + "/submission.yaml"):
            msg.info(
                "study_yaml.py",
                "submission.yaml file found in directory {0}... I will use this to steer the directory"
                .format(in_str))
            HEPData_hlp.load_submission_file(dataset, in_str,
                                             "submission.yaml")
        else:
            msg.info(