def get_filename(run_id): try: run_name = runs.get_run_name(run_id) filename = runs.datasets.loc[runs.datasets["name"] == run_name].iloc[0].location except IndexError: print("Don't know a run named %s, trying to find it anyway..." % run_id) filename = find_file_in_folders(run_id + ".root", hax.config["main_data_paths"]) if not filename: raise ValueError("Cannot find processed data for run name %s." % run_id) return filename
def get_data(self, dataset): """Return data extracted from running over dataset""" self.run_name = runs.get_run_name(dataset) self.run_number = runs.get_run_number(dataset) loop_over_dataset(dataset, self.process_event, branch_selection=self.branch_selection, desc='Making %s minitree' % self.__class__.__name__) self.check_cache(force_empty=True) if not hasattr(self, 'data'): self.log.warning("Not a single row was extracted from dataset %s!" % dataset) return pd.DataFrame([], columns='event_number') else: return self.data
def get_filename(run_id): try: run_name = runs.get_run_name(run_id) filename = runs.datasets.loc[runs.datasets['name'] == run_name].iloc[0].location except (IndexError, AttributeError): # Either we don't know this dataset, or runs.datasets is None (if runs db is not used) print("Don't know a run named %s, trying to find it anyway..." % run_id) filename = find_file_in_folders(run_id + '.root', hax.config['main_data_paths']) if not filename: raise ValueError("Cannot find processed data for run name %s." % run_id) return filename
def get_data(self, dataset, event_list=None): """Return data extracted from running over dataset""" self.mc_data = runs.is_mc(dataset)[0] self.run_name = runs.get_run_name(dataset) self.run_number = runs.get_run_number(dataset) self.run_start = runs.get_run_start(dataset) loop_over_dataset(dataset, self.process_event, event_lists=event_list, branch_selection=self.branch_selection, desc='Making %s minitree' % self.__class__.__name__) self.check_cache(force_empty=True) if not len(self.data): log.warning("Not a single row was extracted from dataset %s!" % dataset) return pd.DataFrame([], columns=['event_number', 'run_number']) else: hax.log.debug("Extraction completed, now concatenating data") return pd.concat(self.data, ignore_index=True)
def check(run_id, treemaker, force_reload=False): """Return if the minitree exists and where it is found / where to make it. :param treemaker: treemaker name or class :param run_id: run name or number :param force_reload: ignore available minitrees, just tell me where to write the new one. :returns: (treemaker, available, path). - treemaker_class: class of the treemaker you named. - already_made is True if there is an up-to-date minitree we can load, False otherwise (always if force_reload) - path is the path to the minitree to load if it is available, otherwise path where we should create the minitree. """ run_name = runs.get_run_name(run_id) treemaker_name, treemaker = get_treemaker_name_and_class(treemaker) preferred_format = hax.config['preferred_minitree_format'] # If we need to remake the minitree, where would we place it? minitree_filename = _minitree_filename( run_name, treemaker_name, preferred_format) creation_dir = hax.config['minitree_paths'][0] if not os.path.exists(creation_dir): os.makedirs(creation_dir) path_to_new = os.path.join(creation_dir, minitree_filename) # Value to return if the minitree is not available sorry_not_available = treemaker, False, path_to_new if force_reload: return sorry_not_available # Find the file try: minitree_path = find_file_in_folders( minitree_filename, hax.config['minitree_paths']) except FileNotFoundError: # Maybe it exists, but was made in a non-preferred file format log.debug("Minitree %s not found" % minitree_filename) for mt_format in hax.config['other_minitree_formats']: if mt_format == preferred_format: # Already tried this format continue else: try: minitree_filename = _minitree_filename(run_name, treemaker_name, mt_format) minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths']) log.debug("Minitree found in non-preferred format: %s" % minitree_filename) break except FileNotFoundError: log.debug("Not found in non-preferred formats either. Minitree will be created.") pass else: # Not found in any format return sorry_not_available log.debug("Found minitree at %s" % minitree_path) # Load the metadata ONLY, to see if we can load this file minitree_metadata = get_format(minitree_path).load_metadata() # Check if the minitree has an outdated treemaker version if LooseVersion(minitree_metadata['version']) < treemaker.__version__: log.debug( "Minitreefile %s is outdated (version %s, treemaker is version %s), will be recreated" % (minitree_path, minitree_metadata['version'], treemaker.__version__)) return sorry_not_available # Check for incompatible hax version (e.g. event_number and run_number # columns not yet included in each minitree) if (LooseVersion(minitree_metadata.get('hax_version', '0.0')) < hax.config['minimum_minitree_hax_version']): log.debug("Minitreefile %s is from an incompatible hax version and must be recreated" % minitree_path) return sorry_not_available # Check if pax_version agrees with the version policy. version_policy = hax.config['pax_version_policy'] if treemaker.pax_version_independent: return treemaker, True, minitree_path elif version_policy == 'latest': # What the latest pax version is differs per dataset. We'll open the root file to find out # (you may think we can use the runs db info instead, but that won't work on e.g. MC root files) try: pax_metadata = hax.paxroot.get_metadata(run_name) except FileNotFoundError: log.warning( "Minitree %s was found, but the main data root file was not. " "Your version policy is 'latest', but I can't check whether you really have the latest... " "I'll load the cached minitree and assume you know what you are doing." % minitree_path) else: if ('pax_version' not in minitree_metadata or LooseVersion(minitree_metadata['pax_version']) < LooseVersion(pax_metadata['file_builder_version'])): log.debug( "Minitreefile %s is from an outdated pax version (pax %s, %s available), " "will be recreated." % (minitree_path, minitree_metadata.get( 'pax_version', 'not known'), pax_metadata['file_builder_version'])) return sorry_not_available elif version_policy == 'loose': # Anything goes pass else: if not hax.runs.version_is_consistent_with_policy( minitree_metadata.get('pax_version', 'unknown')): log.debug( "Minitree found from pax version %s, but you required pax version %s. " "Will attempt to create it from the main root file." % (minitree_metadata['pax_version'], version_policy)) return sorry_not_available return treemaker, True, minitree_path
def check(run_id, treemaker, force_reload=False): """Return if the minitree exists and where it is found / where to make it. :param treemaker: treemaker name or class :param run_id: run name or number :param force_reload: ignore available minitrees, just tell me where to write the new one. :returns : (treemaker, available, path). - treemaker_class: class of the treemaker you named. - already_made is True if there is an up-to-date minitree we can load, False otherwise (always if force_reload) - path is the path to the minitree to load if it is available, otherwise path where we should create the minitree. """ run_name = runs.get_run_name(run_id) treemaker_name, treemaker = get_treemaker_name_and_class(treemaker) preferred_format = hax.config['preferred_minitree_format'] # If we need to remake the minitree, where would we place it? minitree_filename = _minitree_filename(run_name, treemaker_name, preferred_format) creation_dir = hax.config['minitree_paths'][0] if not os.path.exists(creation_dir): os.makedirs(creation_dir) path_to_new = os.path.join(creation_dir, minitree_filename) # Value to return if the minitree is not available sorry_not_available = treemaker, False, path_to_new if force_reload: return sorry_not_available # Find the file try: minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths']) except FileNotFoundError: # Maybe it exists, but was made in a non-preferred file format log.debug("Minitree %s not found" % minitree_filename) for mt_format in hax.config['other_minitree_formats']: if mt_format == preferred_format: # Already tried this format continue else: try: minitree_filename = _minitree_filename(run_name, treemaker_name, mt_format) minitree_path = find_file_in_folders(minitree_filename, hax.config['minitree_paths']) log.debug("Minitree found in non-preferred format: %s" % minitree_filename) break except FileNotFoundError: log.debug("Not found in non-preferred formats either. Minitree will be created.") pass else: # Not found in any format return sorry_not_available log.debug("Found minitree at %s" % minitree_path) # Load the metadata ONLY, to see if we can load this file minitree_metadata = get_format(minitree_path).load_metadata() # Check if the minitree has an outdated treemaker version if LooseVersion(minitree_metadata['version']) < treemaker.__version__: log.debug("Minitreefile %s is outdated (version %s, treemaker is version %s), will be recreated" % ( minitree_path, minitree_metadata['version'], treemaker.__version__)) return sorry_not_available # Check for incompatible hax version (e.g. event_number and run_number columns not yet included in each minitree) if (LooseVersion(minitree_metadata.get('hax_version', '0.0')) < hax.config['minimum_minitree_hax_version']): log.debug("Minitreefile %s is from an incompatible hax version and must be recreated" % minitree_path) return sorry_not_available # Check if pax_version agrees with the version policy. version_policy = hax.config['pax_version_policy'] if version_policy == 'latest': # What the latest pax version is differs per dataset. For now we'll open the root file to find out # TODO: we shouldn't need to; the runs db keeps track of this, and we use it in hax.runs for this purpose! try: pax_metadata = hax.paxroot.get_metadata(run_name) except FileNotFoundError: log.warning("Minitree %s was found, but the main data root file was not. " "Your version policy is 'latest', but I can't check whether you really have the latest... " "well, let's load it and see what happens." % minitree_path) else: if ('pax_version' not in minitree_metadata or LooseVersion(minitree_metadata['pax_version']) < LooseVersion(pax_metadata['file_builder_version'])): log.debug("Minitreefile %s is from an outdated pax version (pax %s, %s available), " "will be recreated." % (minitree_path, minitree_metadata.get('pax_version', 'not known'), pax_metadata['file_builder_version'])) return sorry_not_available elif version_policy == 'loose': # Anything goes pass else: if not minitree_metadata['pax_version'] == version_policy: log.debug("Minitree found from pax version %s, but you required pax version %s. " "Will attempt to create it from the main root file." % (minitree_metadata['pax_version'], version_policy)) return sorry_not_available return treemaker, True, minitree_path