示例#1
0
文件: paxroot.py 项目: XENON1T/hax
def get_filename(run_id):
    try:
        run_name = runs.get_run_name(run_id)
        filename = runs.datasets.loc[runs.datasets["name"] == run_name].iloc[0].location
    except IndexError:
        print("Don't know a run named %s, trying to find it anyway..." % run_id)
        filename = find_file_in_folders(run_id + ".root", hax.config["main_data_paths"])
    if not filename:
        raise ValueError("Cannot find processed data for run name %s." % run_id)
    return filename
示例#2
0
文件: minitrees.py 项目: XENON1T/hax
 def get_data(self, dataset):
     """Return data extracted from running over dataset"""
     self.run_name = runs.get_run_name(dataset)
     self.run_number = runs.get_run_number(dataset)
     loop_over_dataset(dataset, self.process_event,
                       branch_selection=self.branch_selection,
                       desc='Making %s minitree' % self.__class__.__name__)
     self.check_cache(force_empty=True)
     if not hasattr(self, 'data'):
         self.log.warning("Not a single row was extracted from dataset %s!" % dataset)
         return pd.DataFrame([], columns='event_number')
     else:
         return self.data
示例#3
0
def get_filename(run_id):
    try:
        run_name = runs.get_run_name(run_id)
        filename = runs.datasets.loc[runs.datasets['name'] ==
                                     run_name].iloc[0].location
    except (IndexError, AttributeError):
        # Either we don't know this dataset, or runs.datasets is None (if runs db is not used)
        print("Don't know a run named %s, trying to find it anyway..." %
              run_id)
        filename = find_file_in_folders(run_id + '.root',
                                        hax.config['main_data_paths'])
    if not filename:
        raise ValueError("Cannot find processed data for run name %s." %
                         run_id)
    return filename
示例#4
0
 def get_data(self, dataset, event_list=None):
     """Return data extracted from running over dataset"""
     self.mc_data = runs.is_mc(dataset)[0]
     self.run_name = runs.get_run_name(dataset)
     self.run_number = runs.get_run_number(dataset)
     self.run_start = runs.get_run_start(dataset)
     loop_over_dataset(dataset, self.process_event,
                       event_lists=event_list,
                       branch_selection=self.branch_selection,
                       desc='Making %s minitree' % self.__class__.__name__)
     self.check_cache(force_empty=True)
     if not len(self.data):
         log.warning("Not a single row was extracted from dataset %s!" % dataset)
         return pd.DataFrame([], columns=['event_number', 'run_number'])
     else:
         hax.log.debug("Extraction completed, now concatenating data")
         return pd.concat(self.data, ignore_index=True)
示例#5
0
def check(run_id, treemaker, force_reload=False):
    """Return if the minitree exists and where it is found / where to make it.

    :param treemaker: treemaker name or class

    :param run_id: run name or number

    :param force_reload: ignore available minitrees, just tell me where to write the new one.

    :returns: (treemaker, available, path).
      - treemaker_class: class of the treemaker you named.
      - already_made is True if there is an up-to-date minitree we can load, False otherwise (always if force_reload)
      - path is the path to the minitree to load if it is available, otherwise path where we should create the minitree.

    """
    run_name = runs.get_run_name(run_id)
    treemaker_name, treemaker = get_treemaker_name_and_class(treemaker)
    preferred_format = hax.config['preferred_minitree_format']

    # If we need to remake the minitree, where would we place it?
    minitree_filename = _minitree_filename(
        run_name, treemaker_name, preferred_format)
    creation_dir = hax.config['minitree_paths'][0]
    if not os.path.exists(creation_dir):
        os.makedirs(creation_dir)
    path_to_new = os.path.join(creation_dir, minitree_filename)

    # Value to return if the minitree is not available
    sorry_not_available = treemaker, False, path_to_new

    if force_reload:
        return sorry_not_available

    # Find the file
    try:
        minitree_path = find_file_in_folders(
            minitree_filename, hax.config['minitree_paths'])
    except FileNotFoundError:
        # Maybe it exists, but was made in a non-preferred file format
        log.debug("Minitree %s not found" % minitree_filename)
        for mt_format in hax.config['other_minitree_formats']:
            if mt_format == preferred_format:
                # Already tried this format
                continue
            else:
                try:
                    minitree_filename = _minitree_filename(run_name, treemaker_name, mt_format)
                    minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths'])
                    log.debug("Minitree found in non-preferred format: %s" % minitree_filename)
                    break
                except FileNotFoundError:
                    log.debug("Not found in non-preferred formats either. Minitree will be created.")
                    pass
        else:
            # Not found in any format
            return sorry_not_available

    log.debug("Found minitree at %s" % minitree_path)

    # Load the metadata ONLY, to see if we can load this file
    minitree_metadata = get_format(minitree_path).load_metadata()

    # Check if the minitree has an outdated treemaker version
    if LooseVersion(minitree_metadata['version']) < treemaker.__version__:
        log.debug(
            "Minitreefile %s is outdated (version %s, treemaker is version %s), will be recreated" %
            (minitree_path, minitree_metadata['version'], treemaker.__version__))
        return sorry_not_available

    # Check for incompatible hax version (e.g. event_number and run_number
    # columns not yet included in each minitree)
    if (LooseVersion(minitree_metadata.get('hax_version', '0.0')) < hax.config['minimum_minitree_hax_version']):
        log.debug("Minitreefile %s is from an incompatible hax version and must be recreated" % minitree_path)
        return sorry_not_available

    # Check if pax_version agrees with the version policy.
    version_policy = hax.config['pax_version_policy']

    if treemaker.pax_version_independent:
        return treemaker, True, minitree_path

    elif version_policy == 'latest':
        # What the latest pax version is differs per dataset. We'll open the root file to find out
        # (you may think we can use the runs db info instead, but that won't work on e.g. MC root files)
        try:
            pax_metadata = hax.paxroot.get_metadata(run_name)
        except FileNotFoundError:
            log.warning(
                "Minitree %s was found, but the main data root file was not. "
                "Your version policy is 'latest', but I can't check whether you really have the latest... "
                "I'll load the cached minitree and assume you know what you are doing." % minitree_path)
        else:
            if ('pax_version' not in minitree_metadata or
                    LooseVersion(minitree_metadata['pax_version']) <
                    LooseVersion(pax_metadata['file_builder_version'])):
                log.debug(
                    "Minitreefile %s is from an outdated pax version (pax %s, %s available), "
                    "will be recreated." %
                    (minitree_path,
                     minitree_metadata.get(
                         'pax_version',
                         'not known'),
                        pax_metadata['file_builder_version']))
                return sorry_not_available

    elif version_policy == 'loose':
        # Anything goes
        pass

    else:
        if not hax.runs.version_is_consistent_with_policy(
                minitree_metadata.get('pax_version', 'unknown')):
            log.debug(
                "Minitree found from pax version %s, but you required pax version %s. "
                "Will attempt to create it from the main root file." %
                (minitree_metadata['pax_version'], version_policy))
            return sorry_not_available

    return treemaker, True, minitree_path
示例#6
0
文件: minitrees.py 项目: XENON1T/hax
def check(run_id, treemaker, force_reload=False):
    """Return if the minitree exists and where it is found / where to make it.
    :param treemaker: treemaker name or class
    :param run_id: run name or number
    :param force_reload: ignore available minitrees, just tell me where to write the new one.
    :returns : (treemaker, available, path).
      - treemaker_class: class of the treemaker you named.
      - already_made is True if there is an up-to-date minitree we can load, False otherwise (always if force_reload)
      - path is the path to the minitree to load if it is available, otherwise path where we should create the minitree.
    """
    run_name = runs.get_run_name(run_id)
    treemaker_name, treemaker = get_treemaker_name_and_class(treemaker)
    preferred_format = hax.config['preferred_minitree_format']

    # If we need to remake the minitree, where would we place it?
    minitree_filename = _minitree_filename(run_name, treemaker_name, preferred_format)
    creation_dir = hax.config['minitree_paths'][0]
    if not os.path.exists(creation_dir):
        os.makedirs(creation_dir)
    path_to_new = os.path.join(creation_dir, minitree_filename)

    # Value to return if the minitree is not available
    sorry_not_available = treemaker, False, path_to_new

    if force_reload:
        return sorry_not_available

    # Find the file
    try:
        minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths'])
    except FileNotFoundError:
        # Maybe it exists, but was made in a non-preferred file format
        log.debug("Minitree %s not found" % minitree_filename)
        for mt_format in hax.config['other_minitree_formats']:
            if mt_format == preferred_format:
                # Already tried this format
                continue
            else:
                try:
                    minitree_filename = _minitree_filename(run_name, treemaker_name, mt_format)
                    minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths'])
                    log.debug("Minitree found in non-preferred format: %s" % minitree_filename)
                    break
                except FileNotFoundError:
                    log.debug("Not found in non-preferred formats either. Minitree will be created.")
                    pass
        else:
            # Not found in any format
            return sorry_not_available

    log.debug("Found minitree at %s" % minitree_path)

    # Load the metadata ONLY, to see if we can load this file
    minitree_metadata = get_format(minitree_path).load_metadata()

    # Check if the minitree has an outdated treemaker version
    if LooseVersion(minitree_metadata['version']) < treemaker.__version__:
        log.debug("Minitreefile %s is outdated (version %s, treemaker is version %s), will be recreated" % (
            minitree_path, minitree_metadata['version'], treemaker.__version__))
        return sorry_not_available

    # Check for incompatible hax version (e.g. event_number and run_number columns not yet included in each minitree)
    if (LooseVersion(minitree_metadata.get('hax_version', '0.0')) < hax.config['minimum_minitree_hax_version']):
        log.debug("Minitreefile %s is from an incompatible hax version and must be recreated" % minitree_path)
        return sorry_not_available

    # Check if pax_version agrees with the version policy.
    version_policy = hax.config['pax_version_policy']
    if version_policy == 'latest':
        # What the latest pax version is differs per dataset. For now we'll open the root file to find out
        # TODO: we shouldn't need to; the runs db keeps track of this, and we use it in hax.runs for this purpose!
        try:
            pax_metadata = hax.paxroot.get_metadata(run_name)
        except FileNotFoundError:
            log.warning("Minitree %s was found, but the main data root file was not. "
                        "Your version policy is 'latest', but I can't check whether you really have the latest... "
                        "well, let's load it and see what happens." % minitree_path)
        else:
            if ('pax_version' not in minitree_metadata or
                    LooseVersion(minitree_metadata['pax_version']) <
                        LooseVersion(pax_metadata['file_builder_version'])):
                log.debug("Minitreefile %s is from an outdated pax version (pax %s, %s available), "
                          "will be recreated." % (minitree_path,
                                                  minitree_metadata.get('pax_version', 'not known'),
                                                  pax_metadata['file_builder_version']))
                return sorry_not_available

    elif version_policy == 'loose':
        # Anything goes
        pass

    else:
        if not minitree_metadata['pax_version'] == version_policy:
            log.debug("Minitree found from pax version %s, but you required pax version %s. "
                      "Will attempt to create it from the main root file." % (minitree_metadata['pax_version'],
                                                                              version_policy))
            return sorry_not_available

    return treemaker, True, minitree_path