Exemplo n.º 1
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 **kwargs) -> ty.Iterator[np.ndarray]:
        """Compute target for run_id and iterate over results
        {get_docs}
        TODO: This is not quite a normal iterator: if you break
        results will still accumulate in a background thread!
        """
        if len(kwargs):
            self = self.new_context(**kwargs)
        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets)
            if len(set(plugins[d].data_kind for d in targets)) == 1:
                temp_name = ''.join(
                    random.choices(string.ascii_lowercase, k=10))
                temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ),
                                  dict(depends_on=tuple(targets)))
                self.register(temp_merge)
                targets = temp_name
                # TODO: auto-unregister? Better to have a temp register
                # override option in get_components
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id, targets=targets, save=save)
        yield from strax.ThreadedMailboxProcessor(
            components, max_workers=max_workers).iter()
Exemplo n.º 2
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 time_range=None,
                 selection=None,
                 **kwargs) -> ty.Iterator[np.ndarray]:
        """Compute target for run_id and iterate over results.

        Do NOT interrupt the iterator (i.e. break): it will keep running stuff
        in background threads...
        {get_docs}
        """
        # If any new options given, replace the current context
        # with a temporary one
        if len(kwargs):
            # noinspection PyMethodFirstArgAssignment
            self = self.new_context(**kwargs)

        if isinstance(selection, (list, tuple)):
            selection = ' & '.join(f'({x})' for x in selection)

        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets, run_id=run_id)
            if len(set(plugins[d].data_kind for d in targets)) == 1:
                temp_name = ''.join(
                    random.choices(string.ascii_lowercase, k=10))
                temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ),
                                  dict(depends_on=tuple(targets)))
                self.register(temp_merge)
                targets = temp_name
                # TODO: auto-unregister? Better to have a temp register
                # override option in get_components
                # Or just always create new context, not only if new options
                # are given
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id,
                                         targets=targets,
                                         save=save,
                                         time_range=time_range)
        for x in strax.ThreadedMailboxProcessor(
                components, max_workers=max_workers).iter():
            if selection is not None:
                mask = numexpr.evaluate(
                    selection, local_dict={fn: x[fn]
                                           for fn in x.dtype.names})
                x = x[mask]
            if time_range:
                if 'time' not in x.dtype.names:
                    raise NotImplementedError(
                        "Time range selection requires time information, "
                        "but none of the required plugins provides it.")
                x = x[(time_range[0] <= x['time'])
                      & (x['time'] < time_range[1])]
            yield x
Exemplo n.º 3
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 time_range=None,
                 seconds_range=None,
                 selection=None,
                 **kwargs) -> ty.Iterator[np.ndarray]:
        """Compute target for run_id and iterate over results.

        Do NOT interrupt the iterator (i.e. break): it will keep running stuff
        in background threads...
        {get_docs}
        """
        # If any new options given, replace the current context
        # with a temporary one
        if len(kwargs):
            # noinspection PyMethodFirstArgAssignment
            self = self.new_context(**kwargs)

        if isinstance(selection, (list, tuple)):
            selection = ' & '.join(f'({x})' for x in selection)

        # Convert relative to absolute time range
        if seconds_range is not None:
            try:
                # Use run metadata, if it is available, to get
                # the run start time (floored to seconds)
                t0 = self.run_metadata(run_id, 'start')['start']
                t0 = int(t0.timestamp()) * int(1e9)
            except Exception:
                # Get an approx start from the data itself,
                # then floor it to seconds for consistency
                if isinstance(targets, (list, tuple)):
                    t = targets[0]
                else:
                    t = targets
                t0 = self.get_meta(run_id, t)['chunks'][0]['first_time']
                t0 = int(t0 / int(1e9)) * int(1e9)
            time_range = (t0 + int(1e9) * seconds_range[0],
                          t0 + int(1e9) * seconds_range[1])

        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets, run_id=run_id)
            if len(set(plugins[d].data_kind for d in targets)) == 1:
                temp_name = ''.join(
                    random.choices(string.ascii_lowercase, k=10))
                temp_merge = type(temp_name, (strax.MergeOnlyPlugin, ),
                                  dict(depends_on=tuple(targets)))
                self.register(temp_merge)
                targets = temp_name
                # TODO: auto-unregister? Better to have a temp register
                # override option in get_components
                # Or just always create new context, not only if new options
                # are given
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id,
                                         targets=targets,
                                         save=save,
                                         time_range=time_range)
        for x in strax.ThreadedMailboxProcessor(
                components,
                max_workers=max_workers,
                allow_shm=self.context_config['allow_shm'],
                allow_multiprocess=self.context_config['allow_multiprocess'],
                allow_rechunk=self.context_config['allow_rechunk']).iter():
            if selection is not None:
                mask = numexpr.evaluate(
                    selection, local_dict={fn: x[fn]
                                           for fn in x.dtype.names})
                x = x[mask]
            if time_range:
                if 'time' not in x.dtype.names:
                    raise NotImplementedError(
                        "Time range selection requires time information, "
                        "but none of the required plugins provides it.")
                x = x[(time_range[0] <= x['time'])
                      & (x['time'] < time_range[1])]
            yield x
Exemplo n.º 4
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 time_range=None,
                 seconds_range=None,
                 time_within=None,
                 time_selection='fully_contained',
                 selection_str=None,
                 **kwargs) -> ty.Iterator[np.ndarray]:
        """Compute target for run_id and iterate over results.

        Do NOT interrupt the iterator (i.e. break): it will keep running stuff
        in background threads...
        {get_docs}
        """
        # If any new options given, replace the current context
        # with a temporary one
        if len(kwargs):
            # noinspection PyMethodFirstArgAssignment
            self = self.new_context(**kwargs)

        if isinstance(selection_str, (list, tuple)):
            selection_str = ' & '.join(f'({x})' for x in selection_str)

        # Convert alternate time arguments to absolute range
        time_range = self.to_absolute_time_range(run_id=run_id,
                                                 targets=targets,
                                                 time_range=time_range,
                                                 seconds_range=seconds_range,
                                                 time_within=time_within)

        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # to merge the results automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets, run_id=run_id)
            if len(set(plugins[d].data_kind for d in targets)) == 1:
                temp_name = (
                    '_temp_' +
                    ''.join(random.choices(string.ascii_lowercase, k=10)))
                p = type(temp_name, (strax.MergeOnlyPlugin, ),
                         dict(depends_on=tuple(targets)))
                self.register(p)
                targets = (temp_name, )
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id,
                                         targets=targets,
                                         save=save,
                                         time_range=time_range)

        # Cleanup the temp plugins
        for k in list(self._plugin_class_registry.keys()):
            if k.startswith('_temp'):
                del self._plugin_class_registry[k]

        for x in strax.ThreadedMailboxProcessor(
                components,
                max_workers=max_workers,
                allow_shm=self.context_config['allow_shm'],
                allow_multiprocess=self.context_config['allow_multiprocess'],
                allow_rechunk=self.context_config['allow_rechunk']).iter():
            if not isinstance(x, np.ndarray):
                raise ValueError(f"Got type {type(x)} rather than numpy array "
                                 "from the processor!")
            x = self.apply_selection(x, selection_str, time_range,
                                     time_selection)
            yield x
Exemplo n.º 5
0
    def get_iter(self,
                 run_id: str,
                 targets,
                 save=tuple(),
                 max_workers=None,
                 time_range=None,
                 seconds_range=None,
                 time_within=None,
                 time_selection='fully_contained',
                 selection_str=None,
                 keep_columns=None,
                 _chunk_number=None,
                 **kwargs) -> ty.Iterator[strax.Chunk]:
        """Compute target for run_id and iterate over results.

        Do NOT interrupt the iterator (i.e. break): it will keep running stuff
        in background threads...
        {get_docs}
        """
        # If any new options given, replace the current context
        # with a temporary one
        if len(kwargs):
            # noinspection PyMethodFirstArgAssignment
            self = self.new_context(**kwargs)

        # Convert alternate time arguments to absolute range
        time_range = self.to_absolute_time_range(run_id=run_id,
                                                 targets=targets,
                                                 time_range=time_range,
                                                 seconds_range=seconds_range,
                                                 time_within=time_within)

        # If multiple targets of the same kind, create a MergeOnlyPlugin
        # to merge the results automatically
        if isinstance(targets, (list, tuple)) and len(targets) > 1:
            plugins = self._get_plugins(targets=targets, run_id=run_id)
            if len(set(plugins[d].data_kind_for(d) for d in targets)) == 1:
                temp_name = (
                    '_temp_' +
                    ''.join(random.choices(string.ascii_lowercase, k=10)))
                p = type(temp_name, (strax.MergeOnlyPlugin, ),
                         dict(depends_on=tuple(targets)))
                self.register(p)
                targets = (temp_name, )
            else:
                raise RuntimeError("Cannot automerge different data kinds!")

        components = self.get_components(run_id,
                                         targets=targets,
                                         save=save,
                                         time_range=time_range,
                                         chunk_number=_chunk_number)

        # Cleanup the temp plugins
        for k in list(self._plugin_class_registry.keys()):
            if k.startswith('_temp'):
                del self._plugin_class_registry[k]

        seen_a_chunk = False
        generator = strax.ThreadedMailboxProcessor(
            components,
            max_workers=max_workers,
            allow_shm=self.context_config['allow_shm'],
            allow_multiprocess=self.context_config['allow_multiprocess'],
            allow_rechunk=self.context_config['allow_rechunk'],
            allow_lazy=self.context_config['allow_lazy'],
            max_messages=self.context_config['max_messages'],
            timeout=self.context_config['timeout']).iter()

        try:
            for result in strax.continuity_check(generator):
                seen_a_chunk = True
                if not isinstance(result, strax.Chunk):
                    raise ValueError(f"Got type {type(result)} rather than "
                                     f"a strax Chunk from the processor!")
                result.data = self.apply_selection(
                    result.data,
                    selection_str=selection_str,
                    keep_columns=keep_columns,
                    time_range=time_range,
                    time_selection=time_selection)
                yield result

        except GeneratorExit:
            generator.throw(
                OutsideException(
                    "Terminating due to an exception originating from outside "
                    "strax's get_iter (which we cannot retrieve)."))

        except Exception as e:
            generator.throw(e)
            raise

        if not seen_a_chunk:
            if time_range is None:
                raise strax.DataCorrupted("No data returned!")
            raise ValueError(f"Invalid time range: {time_range}, "
                             "returned no chunks!")