Exemplo n.º 1
0
    def _create_weight_configs(self):
        """Extract different weight configurations from `self._config`."""

        weights_list = [[(name, weight) for weight in self._weight_dict[name]]
                        for name in self._weight_dict]

        common_weights_list = [[(name, weight)
                                for weight in  self._common_weights[name]]
                               for name in self._common_weights]

        common_weights_dicts = []
        for item in itertools.product(*common_weights_list):
            d = {}
            for name,weight in item:
                d[name] = weight
            common_weights_dicts.append(d)

        weight_configs = []
        for weight_conf in itertools.product(*weights_list):
            number_of_configs = len(weight_configs)
            for common_weight in common_weights_dicts:
                replaced_config \
                    = tuple([(dataset[0],
                              reduce(lambda x,y: mps_tools.replace_factors(x, y, common_weight[y]),
                                     common_weight, dataset[1]))
                             for dataset in weight_conf])
                if replaced_config not in weight_configs:
                    weight_configs.append(replaced_config)

            # default if config contains no common weights:
            if len(weight_configs) == number_of_configs:
                weight_configs.append(weight_conf)

        for weight_config in weight_configs:
            resolved_weight_config \
                = [(dataset[0], mps_tools.compute_product_string(dataset[1]))
                   for dataset in weight_config]
            self._weight_configs.append(resolved_weight_config)
Exemplo n.º 2
0
    def _create_weight_configs(self):
        """Extract different weight configurations from `self._config`."""

        weights_list = [[(name, weight) for weight in self._weight_dict[name]]
                        for name in self._weight_dict]

        common_weights_list = [[(name, weight)
                                for weight in  self._common_weights[name]]
                               for name in self._common_weights]

        common_weights_dicts = []
        for item in itertools.product(*common_weights_list):
            d = {}
            for name,weight in item:
                d[name] = weight
            common_weights_dicts.append(d)

        weight_configs = []
        for weight_conf in itertools.product(*weights_list):
            number_of_configs = len(weight_configs)
            for common_weight in common_weights_dicts:
                replaced_config \
                    = tuple([(dataset[0],
                              reduce(lambda x,y: mps_tools.replace_factors(x, y, common_weight[y]),
                                     common_weight, dataset[1]))
                             for dataset in weight_conf])
                if replaced_config not in weight_configs:
                    weight_configs.append(replaced_config)

            # default if config contains no common weights:
            if len(weight_configs) == number_of_configs:
                weight_configs.append(weight_conf)

        for weight_config in weight_configs:
            resolved_weight_config \
                = [(dataset[0], mps_tools.compute_product_string(dataset[1]))
                   for dataset in weight_config]
            self._weight_configs.append(resolved_weight_config)
Exemplo n.º 3
0
    def _fetch_datasets(self):
        """Fetch internal and external dataset configurations."""

        all_configs = collections.OrderedDict()
        all_configs["main"] = {"config": self._config,
                               "general": self._general_options,
                               "weight": None}
        all_configs.update(self._external_datasets)

        for config in six.itervalues(all_configs):
            global_weight = "1" if config["weight"] is None else config["weight"]
            if global_weight+self._config.config_path in self._common_weights:
                global_weight = self._common_weights[global_weight+
                                                     self._config.config_path]
            elif global_weight in self._common_weights:
                global_weight = self._common_weights[global_weight]
            else:
                global_weight = (global_weight,)
            common_weights = {}
            weight_dict = {}
            for section in config["config"].sections():
                cache_datasetdir = os.environ["datasetdir"]
                if "general" in section:
                    if config["config"].has_option("general", "datasetdir"):
                        os.environ["datasetdir"] = config["config"].get("general", "datasetdir")
                elif section == "weights":
                    for option in config["config"].options(section):
                        common_weights[option] \
                            = [x.strip() for x in
                               config["config"].get(section, option).split(",")]
                elif section.startswith("dataset:"):
                    print("-"*75)
                    # set name from section-name
                    name = section[8:]
                    if name in self._datasets:
                        print("WARNING: Duplicate definition of dataset '{}'".format(name))
                        print(" -> Using defintion in '{}':\n".format(config["config"].config_path))
                        print("    [{}]".format(section))
                        for k,v in config["config"].items(section):
                            print("   ", k, "=", v)
                        print()
                    self._datasets[name] = {}

                    # extract weight for the dataset
                    if config["config"].has_option(section, "weight"):
                        self._weight_dict[name] \
                            = [x.strip() for x in
                               config["config"].get(section, "weight").split(",")]
                    else:
                        self._weight_dict[name] = ["1.0"]
                    self._weight_dict[name] = [global_w+"*"+w
                                               for w in self._weight_dict[name]
                                               for global_w in global_weight]
                    weight_dict[name] = self._weight_dict[name]

                    # extract essential variables
                    for var in ("inputFileList", "collection"):
                        try:
                            self._datasets[name][var] = config["config"].get(section, var)
                        except ConfigParser.NoOptionError:
                            print("No", var, "found in", section+". Please check ini-file.")
                            sys.exit(1)

                    # get globaltag and configTemplate. If none in section, try to get
                    # default from [general] section.
                    for var in ("configTemplate", "globaltag"):
                        try:
                            self._datasets[name][var] = config["config"].get(section, var)
                        except (ConfigParser.NoSectionError,ConfigParser.NoOptionError):
                            try:
                                self._datasets[name][var] = config["general"][var]
                            except KeyError:
                                try:
                                    self._datasets[name][var] \
                                        = all_configs["main"]["general"][var]
                                except KeyError:
                                    print("No",var,"found in ["+section+"]", end=' ')
                                    print("and no default in [general] section.")
                                    sys.exit(1)

                    # extract non-essential options
                    if "ALCARECOTkAlCosmics" in self._datasets[name]["collection"]:
                        try:
                            self._datasets[name]["cosmicsZeroTesla"] \
                                = config["config"].getboolean(section,"cosmicsZeroTesla")
                        except ConfigParser.NoOptionError:
                            print("No option cosmicsZeroTesla found in", section,"even though it is required for dataset type", self._datasets[name]["collection"], ". Please check ini-file.")
                            sys.exit(1)
                        try:
                            self._datasets[name]["cosmicsDecoMode"] \
                                = config["config"].getboolean(section,"cosmicsDecoMode")
                        except ConfigParser.NoOptionError:
                            print("No option cosmicsDecoMode found in", section,"even though it is required for dataset type", self._datasets[name]["collection"], ".Please check ini-file.")
                            sys.exit(1)

                    self._datasets[name]["primaryWidth"] = -1.0
                    if config["config"].has_option(section,"primaryWidth"):
                        self._datasets[name]["primaryWidth"] \
                            = config["config"].getfloat(section,"primaryWidth")

                    self._datasets[name]["numberOfEvents"] = -1
                    if config["config"].has_option(section, "numberOfEvents"):
                        self._datasets[name]["numberOfEvents"] \
                            = config["config"].getint(section, "numberOfEvents")

                    self._datasets[name]["json"] = ""
                    try:
                        self._datasets[name]["json"] = config["config"].get(section,"json")
                    except ConfigParser.NoOptionError:
                        try:
                            self._datasets[name]["json"] = config["general"]["json"]
                        except KeyError:
                            try:
                                self._datasets[name]["json"] \
                                    = all_configs["main"]["general"]["json"]
                            except KeyError:
                                print("No json given in either [general] or", end=' ')
                                print("["+section+"] sections.")
                                print(" -> Proceeding without json-file.")


                    #replace ${datasetdir} and other variables, e.g. $CMSSW_BASE
                    for var in ("inputFileList", "json", "configTemplate"):
                        self._datasets[name][var] \
                            = os.path.expandvars(self._datasets[name][var])


                    # Get number of jobs from lines in inputfilelist
                    self._datasets[name]["njobs"] = 0
                    try:
                        with open(self._datasets[name]["inputFileList"], "r") as filelist:
                            for line in filelist:
                                if "CastorPool" in line:
                                    continue
                                # ignore empty lines
                                if not line.strip()=="":
                                    self._datasets[name]["njobs"] += 1
                    except IOError:
                        print("Inputfilelist", self._datasets[name]["inputFileList"], end=' ')
                        print("does not exist.")
                        sys.exit(1)
                    if self._datasets[name]["njobs"] == 0:
                        print("Number of jobs is 0. There may be a problem with the inputfilelist:")
                        print(self._datasets[name]["inputFileList"])
                        sys.exit(1)

                    # Check if njobs gets overwritten in .ini-file
                    if config["config"].has_option(section, "njobs"):
                        if config["config"].getint(section, "njobs") <= self._datasets[name]["njobs"]:
                            self._datasets[name]["njobs"] = config["config"].getint(section, "njobs")
                        else:
                            print("'njobs' is bigger than the number of files for this", end=' ')
                            print("dataset:", self._datasets[name]["njobs"])
                            print("Using default.")
                    else:
                        print("No number of jobs specified. Using number of files in", end=' ')
                        print("inputfilelist as the number of jobs.")

            # check if local weights override global weights and resolve name clashes
            for weight_name, weight_values in six.iteritems(common_weights):
                for key, weight in six.iteritems(weight_dict):
                    if any([weight_name in w for w in weight]):
                        self._common_weights[weight_name+config["config"].config_path] = weight_values
                        self._weight_dict[key] = [mps_tools.replace_factors(w,
                                                                            weight_name,
                                                                            weight_name+config["config"].config_path)
                                                  for w in weight]
                    else:
                        self._common_weights[weight_name] = weight_values
                        self._weight_dict[key] = weight

            os.environ["datasetdir"] = cache_datasetdir

        if len(self._datasets) == 0:
            print("No dataset section defined in '{0}'".format(
                ", ".join([self._args.aligmentConfig]+self._external_datasets.keys())))
            print("At least one section '[dataset:<name>]' is required.")
            sys.exit(1)

        self._global_tag = self._datasets[name]["globaltag"]
Exemplo n.º 4
0
    def _fetch_datasets(self):
        """Fetch internal and external dataset configurations."""

        all_configs = collections.OrderedDict()
        all_configs["main"] = {"config": self._config,
                               "general": self._general_options,
                               "weight": None}
        all_configs.update(self._external_datasets)

        for config in all_configs.itervalues():
            global_weight = "1" if config["weight"] is None else config["weight"]
            if global_weight+self._config.config_path in self._common_weights:
                global_weight = self._common_weights[global_weight+
                                                     self._config.config_path]
            elif global_weight in self._common_weights:
                global_weight = self._common_weights[global_weight]
            else:
                global_weight = (global_weight,)
            common_weights = {}
            weight_dict = {}
            for section in config["config"].sections():
                cache_datasetdir = os.environ["datasetdir"]
                if "general" in section:
                    if config["config"].has_option("general", "datasetdir"):
                        os.environ["datasetdir"] = config["config"].get("general", "datasetdir")
                elif section == "weights":
                    for option in config["config"].options(section):
                        common_weights[option] \
                            = [x.strip() for x in
                               config["config"].get(section, option).split(",")]
                elif section.startswith("dataset:"):
                    print "-"*75
                    # set name from section-name
                    name = section[8:]
                    if name in self._datasets:
                        print "WARNING: Duplicate definition of dataset '{}'".format(name)
                        print " -> Using defintion in '{}':\n".format(config["config"].config_path)
                        print "    [{}]".format(section)
                        for k,v in config["config"].items(section):
                            print "   ", k, "=", v
                        print
                    self._datasets[name] = {}

                    # extract weight for the dataset
                    if config["config"].has_option(section, "weight"):
                        self._weight_dict[name] \
                            = [x.strip() for x in
                               config["config"].get(section, "weight").split(",")]
                    else:
                        self._weight_dict[name] = ["1.0"]
                    self._weight_dict[name] = [global_w+"*"+w
                                               for w in self._weight_dict[name]
                                               for global_w in global_weight]
                    weight_dict[name] = self._weight_dict[name]

                    # extract essential variables
                    for var in ("inputFileList", "collection"):
                        try:
                            self._datasets[name][var] = config["config"].get(section, var)
                        except ConfigParser.NoOptionError:
                            print "No", var, "found in", section+". Please check ini-file."
                            sys.exit(1)

                    # get globaltag and configTemplate. If none in section, try to get
                    # default from [general] section.
                    for var in ("configTemplate", "globaltag"):
                        try:
                            self._datasets[name][var] = config["config"].get(section, var)
                        except (ConfigParser.NoSectionError,ConfigParser.NoOptionError):
                            try:
                                self._datasets[name][var] = config["general"][var]
                            except KeyError:
                                try:
                                    self._datasets[name][var] \
                                        = all_configs["main"]["general"][var]
                                except KeyError:
                                    print "No",var,"found in ["+section+"]",
                                    print "and no default in [general] section."
                                    sys.exit(1)

                    # extract non-essential options
                    self._datasets[name]["cosmicsZeroTesla"] = False
                    if config["config"].has_option(section,"cosmicsZeroTesla"):
                        self._datasets[name]["cosmicsZeroTesla"] \
                            = config["config"].getboolean(section,"cosmicsZeroTesla")

                    self._datasets[name]["cosmicsDecoMode"] = False
                    if config["config"].has_option(section,"cosmicsDecoMode"):
                        self._datasets[name]["cosmicsDecoMode"] \
                            = config["config"].getboolean(section,"cosmicsDecoMode")

                    self._datasets[name]["primaryWidth"] = -1.0
                    if config["config"].has_option(section,"primaryWidth"):
                        self._datasets[name]["primaryWidth"] \
                            = config["config"].getfloat(section,"primaryWidth")

                    self._datasets[name]["numberOfEvents"] = -1
                    if config["config"].has_option(section, "numberOfEvents"):
                        self._datasets[name]["numberOfEvents"] \
                            = config["config"].getint(section, "numberOfEvents")

                    self._datasets[name]["json"] = ""
                    try:
                        self._datasets[name]["json"] = config["config"].get(section,"json")
                    except ConfigParser.NoOptionError:
                        try:
                            self._datasets[name]["json"] = config["general"]["json"]
                        except KeyError:
                            try:
                                self._datasets[name]["json"] \
                                    = all_configs["main"]["general"]["json"]
                            except KeyError:
                                print "No json given in either [general] or",
                                print "["+section+"] sections."
                                print " -> Proceeding without json-file."


                    #replace ${datasetdir} and other variables, e.g. $CMSSW_BASE
                    for var in ("inputFileList", "json", "configTemplate"):
                        self._datasets[name][var] \
                            = os.path.expandvars(self._datasets[name][var])


                    # Get number of jobs from lines in inputfilelist
                    self._datasets[name]["njobs"] = 0
                    try:
                        with open(self._datasets[name]["inputFileList"], "r") as filelist:
                            for line in filelist:
                                if "CastorPool" in line:
                                    continue
                                # ignore empty lines
                                if not line.strip()=="":
                                    self._datasets[name]["njobs"] += 1
                    except IOError:
                        print "Inputfilelist", self._datasets[name]["inputFileList"],
                        print "does not exist."
                        sys.exit(1)
                    if self._datasets[name]["njobs"] == 0:
                        print "Number of jobs is 0. There may be a problem with the inputfilelist:"
                        print self._datasets[name]["inputFileList"]
                        sys.exit(1)

                    # Check if njobs gets overwritten in .ini-file
                    if config["config"].has_option(section, "njobs"):
                        if config["config"].getint(section, "njobs") <= self._datasets[name]["njobs"]:
                            self._datasets[name]["njobs"] = config["config"].getint(section, "njobs")
                        else:
                            print "'njobs' is bigger than the number of files for this",
                            print "dataset:", self._datasets[name]["njobs"]
                            print "Using default."
                    else:
                        print "No number of jobs specified. Using number of files in",
                        print "inputfilelist as the number of jobs."

            # check if local weights override global weights and resolve name clashes
            for weight_name, weight_values in common_weights.iteritems():
                for key, weight in weight_dict.iteritems():
                    if any([weight_name in w for w in weight]):
                        self._common_weights[weight_name+config["config"].config_path] = weight_values
                        self._weight_dict[key] = [mps_tools.replace_factors(w,
                                                                            weight_name,
                                                                            weight_name+config["config"].config_path)
                                                  for w in weight]
                    else:
                        self._common_weights[weight_name] = weight_values
                        self._weight_dict[key] = weight

            os.environ["datasetdir"] = cache_datasetdir

        if len(self._datasets) == 0:
            print "No dataset section defined in '{0}'".format(
                ", ".join([self._args.aligmentConfig]+self._external_datasets.keys()))
            print "At least one section '[dataset:<name>]' is required."
            sys.exit(1)

        self._global_tag = self._datasets[name]["globaltag"]