def _load_file(f, compressor, dtype): try: data = f.read() if not len(data): return np.zeros(0, dtype=dtype) data = COMPRESSORS[compressor]['decompress'](data) return np.frombuffer(data, dtype=dtype) except Exception: raise strax.DataCorrupted(f"Fatal Error while reading file {f}: " + strax.utils.formatted_exception())
def get_metadata(self, dirname: str, **kwargs): prefix = dirname_to_prefix(dirname) metadata_json = f'{prefix}-metadata.json' fn = rucio_path(self.root_dir, metadata_json, dirname) folder = osp.join('/', *fn.split('/')[:-1]) if not osp.exists(folder): raise strax.DataNotAvailable(f"No folder for matadata at {fn}") if not osp.exists(fn): raise strax.DataCorrupted(f"Folder exists but no matadata at {fn}") with open(fn, mode='r') as f: return json.loads(f.read())
def get_metadata(self, dirname): prefix = dirname_to_prefix(dirname) metadata_json = f'{prefix}-metadata.json' md_path = osp.join(dirname, metadata_json) if not osp.exists(md_path): # Try old-format metadata # (if it's not there, just let it raise FileNotFound # with the usual message in the next stage) old_md_path = osp.join(dirname, 'metadata.json') if not osp.exists(old_md_path): raise strax.DataCorrupted(f"Data in {dirname} has no metadata") md_path = old_md_path with open(md_path, mode='r') as f: return json.loads(f.read())
def get_metadata(self, did: str, **kwargs): scope, name = did.split(':') number, dtype, hsh = parse_did(did) metadata_json = f'{dtype}-{hsh}-metadata.json' metadata_did = f'{scope}:{metadata_json}' metadata_path = rucio_path(self.rucio_dir, metadata_did) folder = os.path.join('/', *metadata_path.split('/')[:-1]) if not os.path.exists(folder): raise strax.DataNotAvailable( f"No folder for metadata at {metadata_path}") if not os.path.exists(metadata_path): raise strax.DataCorrupted( f"Folder exists but no metadata at {metadata_path}") with open(metadata_path, mode='r') as f: return json.loads(f.read())
def get_metadata(self, backend_key: typing.Union[DataKey, str], **kwargs) -> dict: """ Get the metadata using the backend_key and the Backend specific _get_metadata method. When an unforeseen error occurs, raises an strax.DataCorrupted error. Any kwargs are passed on to _get_metadata :param backend_key: The key the backend should look for (can be string or strax.DataKey) :return: metadata for the data associated to the requested backend-key :raises strax.DataCorrupted: This backend is not able to read the metadata but it should exist :raises strax.DataNotAvailable: When there is no data associated with this backend-key """ try: return self._get_metadata(backend_key, **kwargs) except (strax.DataCorrupted, strax.DataNotAvailable, NotImplementedError): raise except Exception as e: raise strax.DataCorrupted(f'Cannot open metadata for {str(backend_key)}') from e
def get_iter(self, run_id: str, targets, save=tuple(), max_workers=None, time_range=None, seconds_range=None, time_within=None, time_selection='fully_contained', selection_str=None, keep_columns=None, _chunk_number=None, **kwargs) -> ty.Iterator[strax.Chunk]: """Compute target for run_id and iterate over results. Do NOT interrupt the iterator (i.e. break): it will keep running stuff in background threads... {get_docs} """ # If any new options given, replace the current context # with a temporary one if len(kwargs): # noinspection PyMethodFirstArgAssignment self = self.new_context(**kwargs) # Convert alternate time arguments to absolute range time_range = self.to_absolute_time_range(run_id=run_id, targets=targets, time_range=time_range, seconds_range=seconds_range, time_within=time_within) # If multiple targets of the same kind, create a MergeOnlyPlugin # to merge the results automatically if isinstance(targets, (list, tuple)) and len(targets) > 1: plugins = self._get_plugins(targets=targets, run_id=run_id) if len(set(plugins[d].data_kind_for(d) for d in targets)) == 1: temp_name = ( '_temp_' + ''.join(random.choices(string.ascii_lowercase, k=10))) p = type(temp_name, (strax.MergeOnlyPlugin, ), dict(depends_on=tuple(targets))) self.register(p) targets = (temp_name, ) else: raise RuntimeError("Cannot automerge different data kinds!") components = self.get_components(run_id, targets=targets, save=save, time_range=time_range, chunk_number=_chunk_number) # Cleanup the temp plugins for k in list(self._plugin_class_registry.keys()): if k.startswith('_temp'): del self._plugin_class_registry[k] seen_a_chunk = False generator = strax.ThreadedMailboxProcessor( components, max_workers=max_workers, allow_shm=self.context_config['allow_shm'], allow_multiprocess=self.context_config['allow_multiprocess'], allow_rechunk=self.context_config['allow_rechunk'], allow_lazy=self.context_config['allow_lazy'], max_messages=self.context_config['max_messages'], timeout=self.context_config['timeout']).iter() try: for result in strax.continuity_check(generator): seen_a_chunk = True if not isinstance(result, strax.Chunk): raise ValueError(f"Got type {type(result)} rather than " f"a strax Chunk from the processor!") result.data = self.apply_selection( result.data, selection_str=selection_str, keep_columns=keep_columns, time_range=time_range, time_selection=time_selection) yield result except GeneratorExit: generator.throw( OutsideException( "Terminating due to an exception originating from outside " "strax's get_iter (which we cannot retrieve).")) except Exception as e: generator.throw(e) raise if not seen_a_chunk: if time_range is None: raise strax.DataCorrupted("No data returned!") raise ValueError(f"Invalid time range: {time_range}, " "returned no chunks!")