示例#1
0
class CoordSet(HasTraits):
    # Hidden attributes containing the collection of objects
    _id = Unicode()
    _coords = List(allow_none=True)
    _references = Dict({})
    _updated = Bool(False)

    # Hidden id and name of the object
    _id = Unicode()
    _name = Unicode()

    # Hidden attribute to specify if the collection is for a single dimension
    _is_same_dim = Bool(False)

    # other settings
    _copy = Bool(False)
    _sorted = Bool(True)
    _html_output = Bool(False)

    # default coord index
    _default = Int(0)

    # ------------------------------------------------------------------------------------------------------------------
    # initialization
    # ------------------------------------------------------------------------------------------------------------------
    # ..................................................................................................................
    def __init__(self, *coords, **kwargs):
        """
        A collection of Coord objects for a NDArray object with validation.

        This object is an iterable containing a collection of Coord objects.

        Parameters
        ----------
        *coords : |NDarray|, |NDArray| subclass or |CoordSet| sequence of objects.
            If an instance of CoordSet is found, instead of an array, this means
            that all coordinates in this coords describe the same axis.
            It is assumed that the coordinates are passed in the order of the
            dimensions of a nD numpy array (
            `row-major <https://docs.scipy.org/doc/numpy-1.14.1/glossary.html#term-row-major>`_
            order), i.e., for a 3d object : 'z', 'y', 'x'.
        **kwargs: dict
            See other parameters.

        Other Parameters
        ----------------
        x : |NDarray|, |NDArray| subclass or |CoordSet|
            A single coordinate associated to the 'x'-dimension.
            If a coord was already passed in the argument, this will overwrite
            the previous. It is thus not recommended to simultaneously use
            both way to initialize the coordinates to avoid such conflicts.
        y, z, u, ... : |NDarray|, |NDArray| subclass or |CoordSet|
            Same as `x` for the others dimensions.
        dims : list of string, optional
            Names of the dims to use corresponding to the coordinates. If not given, standard names are used: x, y, ...

        See Also
        --------
        Coord : Explicit coordinates object.
        LinearCoord : Implicit coordinates object.
        NDDataset: The main object of SpectroChempy which makes use of CoordSet.

        Examples
        --------
        >>> from spectrochempy import Coord, CoordSet

        Define 4 coordinates, with two for the same dimension

        >>> coord0 = Coord.linspace(10., 100., 5, units='m', title='distance')
        >>> coord1 = Coord.linspace(20., 25., 4, units='K', title='temperature')
        >>> coord1b = Coord.linspace(1., 10., 4, units='millitesla', title='magnetic field')
        >>> coord2 = Coord.linspace(0., 1000., 6, units='hour', title='elapsed time')

        Now create a coordset

        >>> cs = CoordSet(t=coord0, u=coord2, v=[coord1, coord1b])

        Display some coordinates

        >>> cs.u
        Coord: [float64] hr (size: 6)

        >>> cs.v
        CoordSet: [_1:temperature, _2:magnetic field]

        >>> cs.v_1
        Coord: [float64] K (size: 4)
        """

        self._copy = kwargs.pop('copy', True)
        self._sorted = kwargs.pop('sorted', True)

        keepnames = kwargs.pop('keepnames', False)
        # if keepnames is false and the names of the dimensions are not passed in kwargs, then use dims if not none
        dims = kwargs.pop('dims', None)

        self.name = kwargs.pop('name', None)

        # initialise the coordinate list
        self._coords = []

        # First evaluate passed args
        # --------------------------

        # some cleaning
        if coords:

            if all([(isinstance(coords[i],
                                (np.ndarray, NDArray, list, CoordSet))
                     or coords[i] is None) for i in range(len(coords))]):
                # Any instance of a NDArray can be accepted as coordinates for a dimension.
                # If an instance of CoordSet is found, this means that all
                # coordinates in this set describe the same axis
                coords = tuple(coords)

            elif is_sequence(coords) and len(coords) == 1:
                # if isinstance(coords[0], list):
                #     coords = (CoordSet(*coords[0], sorted=False),)
                # else:
                coords = coords[0]

                if isinstance(coords, dict):
                    # we have passed a dict, postpone to the kwargs evaluation process
                    kwargs.update(coords)
                    coords = None

            else:
                raise ValueError('Did not understand the inputs')

        # now store the args coordinates in self._coords (validation is fired when this attribute is set)
        if coords:
            for coord in coords[::-1]:  # we fill from the end of the list
                # (in reverse order) because by convention when the
                # names are not specified, the order of the
                # coords follow the order of dims.
                if not isinstance(coord, CoordSet):
                    if isinstance(coord, list):
                        coord = CoordSet(*coord, sorted=False)
                    elif not isinstance(coord, LinearCoord):  # else
                        coord = Coord(coord, copy=True)
                else:
                    coord = cpy.deepcopy(coord)

                if not keepnames:
                    if dims is None:
                        # take the last available name of available names list
                        coord.name = self.available_names.pop(-1)
                    else:
                        # use the provided list of dims
                        coord.name = dims.pop(-1)

                self._append(coord)  # append the coord (but instead of append,
                # use assignation -in _append - to fire the validation process )

        # now evaluate keywords argument
        # ------------------------------

        for key, coord in list(kwargs.items())[:]:
            # remove the already used kwargs (Fix: deprecation warning in Traitlets - all args, kwargs must be used)
            del kwargs[key]

            # prepare values to be either Coord, LinearCoord or CoordSet
            if isinstance(coord, (list, tuple)):
                coord = CoordSet(
                    *coord, sorted=False
                )  # make sure in this case it becomes a CoordSet instance

            elif isinstance(coord, np.ndarray) or coord is None:
                coord = Coord(
                    coord, copy=True
                )  # make sure it's a Coord  # (even if it is None -> Coord(None)

            elif isinstance(coord, str) and coord in DEFAULT_DIM_NAME:
                # may be a reference to another coordinates (e.g. same coordinates for various dimensions)
                self._references[key] = coord  # store this reference
                continue

            # Populate the coords with coord and coord's name.
            if isinstance(coord,
                          (NDArray, Coord, LinearCoord, CoordSet)):  # NDArray,
                if key in self.available_names or (
                        len(key) == 2 and key.startswith('_')
                        and key[1] in list("123456789")):
                    # ok we can find it as a canonical name:
                    # this will overwrite any already defined coord value
                    # which means also that kwargs have priority over args
                    coord.name = key
                    self._append(coord)

                elif not self.is_empty and key in self.names:
                    # append when a coordinate with this name is already set in passed arg.
                    # replace it
                    idx = self.names.index(key)
                    coord.name = key
                    self._coords[idx] = coord

                else:
                    raise KeyError(
                        f'Probably an invalid key (`{key}`) for coordinates has been passed. '
                        f'Valid keys are among:{DEFAULT_DIM_NAME}')

            else:
                raise ValueError(
                    f'Probably an invalid type of coordinates has been passed: {key}:{coord} '
                )

        # store the item (validation will be performed)
        # self._coords = _coords

        # inform the parent about the update
        self._updated = True

        # set a notifier on the name traits name of each coordinates
        for coord in self._coords:
            if coord is not None:
                HasTraits.observe(coord, self._coords_update, '_name')

        # initialize the base class with the eventual remaining arguments
        super().__init__(**kwargs)

    # ..................................................................................................................
    def implements(self, name=None):
        """
        Utility to check if the current object implement `CoordSet`.

        Rather than isinstance(obj, CoordSet) use object.implements('CoordSet').

        This is useful to check type without importing the module
        """
        if name is None:
            return 'CoordSet'
        else:
            return name == 'CoordSet'

    # ------------------------------------------------------------------------------------------------------------------
    # Validation methods
    # ------------------------------------------------------------------------------------------------------------------
    # ..................................................................................................................
    @validate('_coords')
    def _coords_validate(self, proposal):
        coords = proposal['value']
        if not coords:
            return None

        for id, coord in enumerate(coords):
            if coord and not isinstance(coord, (Coord, LinearCoord, CoordSet)):
                raise TypeError(
                    'At this point all passed coordinates should be of type Coord or CoordSet!'
                )  # coord =  #
                # Coord(coord)
            coords[id] = coord

        for coord in coords:
            if isinstance(coord, CoordSet):
                # it must be a single dimension axis
                # in this case we must have same length for all coordinates
                coord._is_same_dim = True

                # check this is valid in term of size
                try:
                    coord.sizes
                except ValueError:
                    raise

                # change the internal names
                n = len(coord)
                coord._set_names([
                    f"_{i + 1}" for i in range(n)
                ])  # we must have  _1 for the first coordinates,
                # _2 the second, etc...
                coord._set_parent_dim(coord.name)

        # last check and sorting
        names = []
        for coord in coords:
            if coord.has_defined_name:
                names.append(coord.name)
            else:
                raise ValueError(
                    'At this point all passed coordinates should have a valid name!'
                )

        if coords:
            if self._sorted:
                _sortedtuples = sorted(
                    (coord.name, coord) for coord in coords)  # Final sort
                coords = list(zip(*_sortedtuples))[1]
            return list(coords)  # be sure its a list not a tuple
        else:
            return None

    # ..................................................................................................................
    @default('_id')
    def _id_default(self):
        # a unique id
        return f"{type(self).__name__}_{str(uuid.uuid1()).split('-')[0]}"

    # ------------------------------------------------------------------------------------------------------------------
    # Readonly Properties
    # ------------------------------------------------------------------------------------------------------------------

    # ..................................................................................................................
    @property
    def available_names(self):
        """
        Chars that can be used for dimension name (DEFAULT_DIM_NAMES less those already in use)
        """
        _available_names = DEFAULT_DIM_NAME.copy()
        for item in self.names:
            if item in _available_names:
                _available_names.remove(item)
        return _available_names

    # ..................................................................................................................
    @property
    def coords(self):
        """
        list -Coordinates in the coordset
        """
        return self._coords

    # ..................................................................................................................
    @property
    def has_defined_name(self):
        """
        bool - True is the name has been defined
        """
        return not (self.name == self.id)

    # ..................................................................................................................
    @property
    def id(self):
        """
        str - Object identifier (Readonly property).
        """
        return self._id

    # ..................................................................................................................
    @property
    def is_empty(self):
        """
        bool - True if there is no coords defined.
        """
        if self._coords:
            return len(self._coords) == 0
        else:
            return False

    # ..................................................................................................................
    @property
    def is_same_dim(self):
        """
        bool - True if the coords define a single dimension
        """
        return self._is_same_dim

    # ..................................................................................................................
    @property
    def references(self):
        return self._references

    # ..................................................................................................................
    @property
    def sizes(self):
        """int or tuple of int - Sizes of the coord object for each dimension
        (readonly property). If the set is for a single dimension return a
        single size as all coordinates must have the same.
        """
        _sizes = []
        for i, item in enumerate(self._coords):
            _sizes.append(item.size)  # recurrence if item is a CoordSet

        if self.is_same_dim:
            _sizes = list(set(_sizes))
            if len(_sizes) > 1:
                raise ValueError(
                    'Coordinates must be of the same size for a dimension with multiple coordinates'
                )
            return _sizes[0]
        return _sizes

    # alias
    size = sizes

    # ..................................................................................................................
    # @property
    # def coords(self):  #TODO: replace with itertiems, items etc ... to simulate a dict
    #     """list - list of the Coord objects in the current coords (readonly
    #     property).
    #     """
    #     return self._coords

    # ..................................................................................................................
    @property
    def names(self):
        """list - Names of the coords in the current coords (read only property)
        """
        _names = []
        if self._coords:
            for item in self._coords:
                if item.has_defined_name:
                    _names.append(item.name)
        return _names

    # ------------------------------------------------------------------------------------------------------------------
    # Mutable Properties
    # ------------------------------------------------------------------------------------------------------------------

    @property
    def default(self):
        """
        Coord - default coordinates
        """
        return self[self._default]

    @property
    def data(self):
        # in case data is called on a coordset for dimension with multiple coordinates
        # return the first coordinates
        return self.default.data

    # ..................................................................................................................
    @property
    def name(self):
        if self._name:
            return self._name
        else:
            return self._id

    @name.setter
    def name(self, value):
        if value is not None:
            self._name = value

    # ..................................................................................................................
    @property
    def titles(self):
        """list - Titles of the coords in the current coords
        """
        _titles = []
        for item in self._coords:
            if isinstance(item, NDArray):
                _titles.append(
                    item.title if item.title else item.name)  # TODO:name
            elif isinstance(item, CoordSet):
                _titles.append([
                    el.title if el.title else el.name for el in item
                ])  # TODO:name
            else:
                raise ValueError('Something wrong with the titles!')
        return _titles

    # ..................................................................................................................
    @property
    def labels(self):
        """list - Labels of the coordinates in the current coordset
        """
        return [item.labels for item in self]

    # ..................................................................................................................
    @property
    def units(self):
        """
        list - Units of the coords in the current coords
        """
        return [item.units for item in self]

    # ------------------------------------------------------------------------------------------------------------------
    # public methods
    # ------------------------------------------------------------------------------------------------------------------
    # ..................................................................................................................
    def copy(self, keepname=False):
        """
        Make a disconnected copy of the current coords.

        Returns
        -------
        object
            an exact copy of the current object
        """
        return self.__copy__()

    # ..................................................................................................................
    def keys(self):
        """
        Alias for names

        Returns
        -------
        out : list
            list of all coordinates names (including reference to other coordinates)
        """
        keys = []
        if self.names:
            keys.extend(self.names)
        if self._references:
            keys.extend(list(self.references.keys()))
        return keys

    # ..................................................................................................................
    def select(self, val):
        """
        Select the default coord index
        """
        self._default = min(max(0, int(val) - 1), len(self.names))

    # .................................................................................................................
    def set(self, *args, **kwargs):
        """
        Set one or more coordinates in the current CoordSet

        Parameters
        ----------
        args
        kwargs

        Returns
        -------
        """
        if not args and not kwargs:
            return

        if len(args) == 1 and (is_sequence(args[0])
                               or isinstance(args[0], CoordSet)):
            args = args[0]

        if isinstance(args, CoordSet):
            kwargs.update(args.to_dict())
            args = ()

        if args:
            self._coords = []  # reset

        for i, item in enumerate(args[::-1]):
            item.name = self.available_names.pop()
            self._append(item)

        for k, item in kwargs.items():
            if isinstance(item, CoordSet):
                # try to keep this parameter to True!
                item._is_same_dim = True
            self[k] = item

    # ..................................................................................................................
    def set_titles(self, *args, **kwargs):
        """
        Set one or more coord title at once

        Notes
        -----
        If the args are not named, then the attributions are made in coordinate's  name alhabetical order :
        e.g, the first title will be for the `x` coordinates, the second for the `y`, etc.

        Parameters
        ----------
        args : str(s)
            The list of titles to apply to the set of coordinates (they must be given according to the coordinate's name
            alphabetical order
        kwargs : str
            keyword attribution of the titles. The keys must be valid names among the coordinate's name list. This
            is the recommended way to set titles as this will be less prone to errors.
        """
        if len(args) == 1 and (is_sequence(args[0])
                               or isinstance(args[0], CoordSet)):
            args = args[0]

        for i, item in enumerate(args):
            if not isinstance(self[i], CoordSet):
                self[i].title = item
            else:
                if is_sequence(item):
                    for j, v in enumerate(self[i]):
                        v.title = item[j]

        for k, item in kwargs.items():
            self[k].title = item

    # ..................................................................................................................
    def set_units(self, *args, **kwargs):
        """
        Set one or more coord units at once.

        Notes
        -----
        If the args are not named, then the attributions are made in coordinate's name alhabetical order :
        e.g, the first units will be for the `x` coordinates, the second for the `y`, etc.

        Parameters
        ----------
        args : str(s)
            The list of units to apply to the set of coordinates (they must be given according to the coordinate's name
            alphabetical order
        kwargs : str
            keyword attribution of the units. The keys must be valid names among the coordinate's name list. This
            is the recommended way to set units as this will be less prone to errors.
        force : bool, optional, default=False
            whether or not the new units must be compatible with the current units. See the `Coord`.`to` method.
        """
        force = kwargs.pop('force', False)

        if len(args) == 1 and is_sequence(args[0]):
            args = args[0]

        for i, item in enumerate(args):
            if not isinstance(self[i], CoordSet):
                self[i].to(item, force=force, inplace=True)
            else:
                if is_sequence(item):
                    for j, v in enumerate(self[i]):
                        v.to(item[j], force=force, inplace=True)

        for k, item in kwargs.items():
            self[k].to(item, force=force, inplace=True)

    # ..................................................................................................................
    def to_dict(self):
        """
        Return a dict of the coordinates from the coordset

        Returns
        -------
        out : dict
            A dictionary where keys are the names of the coordinates, and the values the coordinates themselves
        """
        return dict(zip(self.names, self._coords))

    # ..................................................................................................................
    def update(self, **kwargs):
        """
        Update a specific coordinates in the CoordSet.

        Parameters
        ----------
        kwarg : Only keywords among the CoordSet.names are allowed - they denotes the name of a dimension.
        """
        dims = kwargs.keys()
        for dim in list(dims)[:]:
            if dim in self.names:
                # we can replace the given coordinates
                idx = self.names.index(dim)
                self[idx] = Coord(kwargs.pop(dim), name=dim)

    # ------------------------------------------------------------------------------------------------------------------
    # private methods
    # ------------------------------------------------------------------------------------------------------------------

    # ..................................................................................................................
    def _append(self, coord):
        # utility function to append coordinate with full validation
        if not isinstance(coord, tuple):
            coord = (coord, )
        if self._coords:
            # some coordinates already present, prepend the new one
            self._coords = (*coord, ) + tuple(
                self._coords)  # instead of append, fire the validation process
        else:
            # no coordinates yet, start a new tuple of coordinate
            self._coords = (*coord, )

    # ..................................................................................................................
    def _loc2index(self, loc):
        # Return the index of a location
        for coord in self.coords:
            try:
                return coord._loc2index(loc)
            except IndexError:
                continue
        # not found!
        raise IndexError

    # ..................................................................................................................
    def _set_names(self, names):
        # utility function to change names of coordinates (in batch)
        # useful when a coordinate is a CoordSet itself
        for coord, name in zip(self._coords, names):
            coord.name = name

    # ..................................................................................................................
    def _set_parent_dim(self, name):
        # utility function to set the paretn name for sub coordset
        for coord in self._coords:
            coord._parent_dim = name

    # ------------------------------------------------------------------------------------------------------------------
    # special methods
    # ------------------------------------------------------------------------------------------------------------------

    # ..................................................................................................................
    @staticmethod
    def __dir__():
        return ['coords', 'references', 'is_same_dim', 'name']

    # ..................................................................................................................
    def __call__(self, *args, **kwargs):
        # allow the following syntax: coords(), coords(0,2) or
        coords = []
        axis = kwargs.get('axis', None)
        if args:
            for idx in args:
                coords.append(self[idx])
        elif axis is not None:
            if not is_sequence(axis):
                axis = [axis]
            for i in axis:
                coords.append(self[i])
        else:
            coords = self._coords
        if len(coords) == 1:
            return coords[0]
        else:
            return CoordSet(*coords)

    # ..................................................................................................................
    def __hash__(self):
        # all instance of this class has same hash, so they can be compared
        return hash(tuple(self._coords))

    # ..................................................................................................................
    def __len__(self):
        return len(self._coords)

    def __delattr__(self, item):
        if 'notify_change' in item:
            pass

        else:
            try:
                return self.__delitem__(item)
            except (IndexError, KeyError):
                raise AttributeError

    # ..................................................................................................................
    def __getattr__(self, item):
        # when the attribute was not found
        if '_validate' in item or '_changed' in item:
            raise AttributeError

        try:
            return self.__getitem__(item)
        except (IndexError, KeyError):
            raise AttributeError

    # ..................................................................................................................
    def __getitem__(self, index):

        if isinstance(index, str):

            # find by name
            if index in self.names:
                idx = self.names.index(index)
                return self._coords.__getitem__(idx)

            # ok we did not find it!
            # let's try in references
            if index in self._references.keys():
                return self._references[index]

            # let's try in the title
            if index in self.titles:
                # selection by coord titles
                if self.titles.count(index) > 1:
                    warnings.warn(
                        f"Getting a coordinate from its title. However `{index}` occurs several time. Only"
                        f" the first occurence is returned!")
                return self._coords.__getitem__(self.titles.index(index))

            # may be it is a title or a name in a sub-coords
            for item in self._coords:
                if isinstance(item, CoordSet) and index in item.titles:
                    # selection by subcoord title
                    return item.__getitem__(item.titles.index(index))

            for item in self._coords:
                if isinstance(item, CoordSet) and index in item.names:
                    # selection by subcoord name
                    return item.__getitem__(item.names.index(index))

            try:
                # let try with the canonical dimension names
                if index[0] in self.names:
                    # ok we can find it a a canonical name:
                    c = self._coords.__getitem__(self.names.index(index[0]))
                    if len(index) > 1 and index[1] == '_':
                        if isinstance(c, CoordSet):
                            c = c.__getitem__(index[1:])
                        else:
                            c = c.__getitem__(index[2:])  # try on labels
                    return c
            except IndexError:
                pass

            raise KeyError(
                f"Could not find `{index}` in coordinates names or titles")

        try:
            self._coords.__getitem__(index)
        except TypeError:
            print()
        res = self._coords.__getitem__(index)
        if isinstance(index, slice):
            if isinstance(res, CoordSet):
                res = (res, )
            return CoordSet(*res, keepnames=True)
        else:
            return res

    # ..................................................................................................................
    def __setattr__(self, key, value):
        keyb = key[1:] if key.startswith('_') else key
        if keyb in [
                'parent', 'copy', 'sorted', 'coords', 'updated', 'name',
                'html_output', 'is_same_dim', 'parent_dim', 'trait_values',
                'trait_notifiers', 'trait_validators', 'cross_validation_lock',
                'notify_change'
        ]:
            super().__setattr__(key, value)
            return

        try:
            self.__setitem__(key, value)
        except Exception:
            super().__setattr__(key, value)

    # ..................................................................................................................
    def __setitem__(self, index, coord):
        try:
            coord = coord.copy(
                keepname=True)  # to avoid modifying the original
        except TypeError as e:
            if isinstance(coord, list):
                coord = [c.copy(keepname=True) for c in coord[:]]
            else:
                raise e

        if isinstance(index, str):
            # find by name
            if index in self.names:
                idx = self.names.index(index)
                coord.name = index
                self._coords.__setitem__(idx, coord)
                return

            # ok we did not find it!
            # let's try in the title
            if index in self.titles:
                # selection by coord titles
                if self.titles.count(index) > 1:
                    warnings.warn(
                        f"Getting a coordinate from its title. However `{index}` occurs several time. Only"
                        f" the first occurence is returned!")
                index = self.titles.index(index)
                coord.name = self.names[index]
                self._coords.__setitem__(index, coord)
                return

            # may be it is a title or a name in a sub-coords
            for item in self._coords:
                if isinstance(item, CoordSet) and index in item.titles:
                    # selection by subcoord title
                    index = item.titles.index(index)
                    coord.name = item.names[index]
                    item.__setitem__(index, coord)
                    return
            for item in self._coords:
                if isinstance(item, CoordSet) and index in item.names:
                    # selection by subcoord title
                    index = item.names.index(index)
                    coord.name = item.names[index]
                    item.__setitem__(index, coord)
                    return

            try:
                # let try with the canonical dimension names
                if index[0] in self.names:
                    # ok we can find it a a canonical name:
                    c = self._coords.__getitem__(self.names.index(index[0]))
                    if len(index) > 1 and index[1] == '_':
                        c.__setitem__(index[1:], coord)
                    return

            except KeyError:
                pass

            # add the new coordinates
            if index in self.available_names or (
                    len(index) == 2 and index.startswith('_')
                    and index[1] in list("123456789")):
                coord.name = index
                self._coords.append(coord)
                return

            else:
                raise KeyError(
                    f"Could not find `{index}` in coordinates names or titles")

        self._coords[index] = coord

    # ..................................................................................................................
    def __delitem__(self, index):

        if isinstance(index, str):

            # find by name
            if index in self.names:
                idx = self.names.index(index)
                del self._coords[idx]
                return

            # let's try in the title
            if index in self.titles:
                # selection by coord titles
                index = self.titles.index(index)
                self._coords.__delitem__(index)
                return

            # may be it is a title in a sub-coords
            for item in self._coords:
                if isinstance(item, CoordSet) and index in item.titles:
                    # selection by subcoord title
                    return item.__delitem__(index)

            # let try with the canonical dimension names
            if index[0] in self.names:
                # ok we can find it a a canonical name:
                c = self._coords.__getitem__(self.names.index(index[0]))
                if len(index) > 1 and index[1] == '_':
                    if isinstance(c, CoordSet):
                        return c.__delitem__(index[1:])

            raise KeyError(
                f"Could not find `{index}` in coordinates names or titles")

    # ..................................................................................................................
    # def __iter__(self):
    #    for item in self._coords:
    #        yield item

    # ..................................................................................................................
    def __repr__(self):
        out = "CoordSet: [" + ', '.join(['{}'] * len(self._coords)) + "]"
        s = []
        for item in self._coords:
            if isinstance(item, CoordSet):
                s.append(f"{item.name}:" +
                         repr(item).replace('CoordSet: ', ''))
            else:
                s.append(f"{item.name}:{item.title}")
        out = out.format(*s)
        return out

    # ..................................................................................................................
    def __str__(self):
        return repr(self)

    # ..................................................................................................................
    def _cstr(self, header='  coordinates: ... \n', print_size=True):

        txt = ''
        for idx, dim in enumerate(self.names):
            coord = getattr(self, dim)

            if coord:

                dimension = f'     DIMENSION `{dim}`'
                for k, v in self.references.items():
                    if dim == v:
                        # reference to this dimension
                        dimension += f'=`{k}`'
                txt += dimension + '\n'

                if isinstance(coord, CoordSet):
                    # txt += '        index: {}\n'.format(idx)
                    if not coord.is_empty:
                        if print_size:
                            txt += f'{coord[0]._str_shape().rstrip()}\n'

                        coord._html_output = self._html_output
                        for idx_s, dim_s in enumerate(coord.names):
                            c = getattr(coord, dim_s)
                            txt += f'          ({dim_s}) ...\n'
                            c._html_output = self._html_output
                            sub = c._cstr(header='  coordinates: ... \n',
                                          print_size=False
                                          )  # , indent=4, first_indent=-6)
                            txt += f"{sub}\n"

                elif not coord.is_empty:
                    # coordinates if available
                    # txt += '        index: {}\n'.format(idx)
                    coord._html_output = self._html_output
                    txt += '{}\n'.format(
                        coord._cstr(header=header, print_size=print_size))

        txt = txt.rstrip()  # remove the trailing '\n'

        if not self._html_output:
            return colored_output(txt.rstrip())
        else:
            return txt.rstrip()

    # ..................................................................................................................
    def _repr_html_(self):
        return convert_to_html(self)

    # ..................................................................................................................
    def __deepcopy__(self, memo):
        coords = self.__class__(tuple(
            cpy.deepcopy(ax, memo=memo) for ax in self),
                                keepnames=True)
        coords.name = self.name
        coords._is_same_dim = self._is_same_dim
        coords._default = self._default
        return coords

    # ..................................................................................................................
    def __copy__(self):
        coords = self.__class__(tuple(cpy.copy(ax) for ax in self),
                                keepnames=True)
        # name must be changed
        coords.name = self.name
        # and is_same_dim and default for coordset
        coords._is_same_dim = self._is_same_dim
        coords._default = self._default
        return coords

        # ..................................................................................................................

    def __eq__(self, other):
        if other is None:
            return False
        try:
            return self._coords == other._coords
        except Exception:
            return False

    # ..................................................................................................................
    def __ne__(self, other):
        return not self.__eq__(other)

    # ------------------------------------------------------------------------------------------------------------------
    # Events
    # ------------------------------------------------------------------------------------------------------------------
    # ..................................................................................................................
    def _coords_update(self, change):
        # when notified that a coord name have been updated
        self._updated = True

    # ..................................................................................................................
    @observe(All)
    def _anytrait_changed(self, change):
        # ex: change {
        #   'owner': object, # The HasTraits instance
        #   'new': 6, # The new value
        #   'old': 5, # The old value
        #   'name': "foo", # The name of the changed trait
        #   'type': 'change', # The event type of the notification, usually 'change'
        # }

        if change.name == '_updated' and change.new:
            self._updated = False  # reset
示例#2
0
class SqlMagic(Magics, Configurable):
    """Runs SQL statement on a database, specified by SQLAlchemy connect string.

    Provides the %%sql magic."""

    autolimit = Int(
        0,
        config=True,
        help="Automatically limit the size of the returned result sets")
    style = Unicode(
        'DEFAULT',
        config=True,
        help=
        "Set the table printing style to any of prettytable's defined styles (currently DEFAULT, MSWORD_FRIENDLY, PLAIN_COLUMNS, RANDOM)"
    )
    short_errors = Bool(
        True,
        config=True,
        help="Don't display the full traceback on SQL Programming Error")
    displaylimit = Int(
        0,
        config=True,
        help=
        "Automatically limit the number of rows displayed (full result set is still stored)"
    )
    autopandas = Bool(
        False,
        config=True,
        help="Return Pandas DataFrames instead of regular result sets")
    column_local_vars = Bool(
        False,
        config=True,
        help="Return data into local variables from column names")
    feedback = Bool(True,
                    config=True,
                    help="Print number of rows affected by DML")
    dsn_filename = Unicode('odbc.ini',
                           config=True,
                           help="Path to DSN file. "
                           "When the first argument is of the form [section], "
                           "a sqlalchemy connection string is formed from the "
                           "matching section in the DSN file.")

    def __init__(self, shell):
        Configurable.__init__(self, config=shell.config)
        Magics.__init__(self, shell=shell)

        # Add ourself to the list of module configurable via %config
        self.shell.configurables.append(self)

    @needs_local_scope
    @line_magic('sql')
    @cell_magic('sql')
    def execute(self, line, cell='', local_ns={}):
        """Runs SQL statement against a database, specified by SQLAlchemy connect string.

        If no database connection has been established, first word
        should be a SQLAlchemy connection string, or the user@db name
        of an established connection.

        Examples::

          %%sql postgresql://me:mypw@localhost/mydb
          SELECT * FROM mytable

          %%sql me@mydb
          DELETE FROM mytable

          %%sql
          DROP TABLE mytable

        SQLAlchemy connect string syntax examples:

          postgresql://me:mypw@localhost/mydb
          sqlite://
          mysql+pymysql://me:mypw@localhost/mydb

        """
        # save globals and locals so they can be referenced in bind vars
        user_ns = self.shell.user_ns.copy()
        user_ns.update(local_ns)

        parsed = sql.parse.parse('%s\n%s' % (line, cell), self)
        conn = sql.connection.Connection.get(parsed['connection'])
        first_word = parsed['sql'].split(None, 1)[:1]
        if first_word and first_word[0].lower() == 'persist':
            return self._persist_dataframe(parsed['sql'], conn, user_ns)

        try:
            result = sql.run.run(conn, parsed['sql'], self, user_ns)

            if result and ~isinstance(result, str) and self.column_local_vars:
                #Instead of returning values, set variables directly in the
                #users namespace. Variable names given by column names

                if self.autopandas:
                    keys = result.keys()
                else:
                    keys = result.keys
                    result = result.dict()

                if self.feedback:
                    print('Returning data to local variables [{}]'.format(
                        ', '.join(keys)))

                self.shell.user_ns.update(result)

                return None
            else:
                #Return results into the default ipython _ variable
                return result

        except (ProgrammingError, OperationalError) as e:
            # Sqlite apparently return all errors as OperationalError :/
            if self.short_errors:
                print(e)
            else:
                raise

    legal_sql_identifier = re.compile(r'^[A-Za-z0-9#_$]+')

    def _persist_dataframe(self, raw, conn, user_ns):
        if not DataFrame:
            raise ImportError("Must `pip install pandas` to use DataFrames")
        pieces = raw.split()
        if len(pieces) != 2:
            raise SyntaxError(
                "Format: %sql [connection] persist <DataFrameName>")
        frame_name = pieces[1].strip(';')
        frame = eval(frame_name, user_ns)
        if not isinstance(frame, DataFrame) and not isinstance(frame, Series):
            raise TypeError('%s is not a Pandas DataFrame or Series' %
                            frame_name)
        table_name = frame_name.lower()
        table_name = self.legal_sql_identifier.search(table_name).group(0)
        frame.to_sql(table_name, conn.session.engine)
        return 'Persisted %s' % table_name
示例#3
0
class Repo2Docker(Application):
    """An application for converting git repositories to docker images"""

    name = "jupyter-repo2docker"
    version = __version__
    description = __doc__

    @default("log_level")
    def _default_log_level(self):
        """The application's default log level"""
        return logging.INFO

    git_workdir = Unicode(
        None,
        config=True,
        allow_none=True,
        help="""
        Working directory to use for check out of git repositories.

        The default is to use the system's temporary directory. Should be
        somewhere ephemeral, such as /tmp.
        """,
    )

    subdir = Unicode(
        "",
        config=True,
        help="""
        Subdirectory of the git repository to examine.

        Defaults to ''.
        """,
    )

    cache_from = List(
        [],
        config=True,
        help="""
        List of images to try & re-use cached image layers from.

        Docker only tries to re-use image layers from images built locally,
        not pulled from a registry. We can ask it to explicitly re-use layers
        from non-locally built images by through the 'cache_from' parameter.
        """,
    )

    buildpacks = List(
        [
            LegacyBinderDockerBuildPack,
            DockerBuildPack,
            JuliaProjectTomlBuildPack,
            JuliaRequireBuildPack,
            NixBuildPack,
            RBuildPack,
            CondaBuildPack,
            PipfileBuildPack,
            PythonBuildPack,
        ],
        config=True,
        help="""
        Ordered list of BuildPacks to try when building a git repository.
        """,
    )

    extra_build_kwargs = Dict(
        {},
        help="""
        extra kwargs to limit CPU quota when building a docker image.
        Dictionary that allows the user to set the desired runtime flag
        to configure the amount of access to CPU resources your container has.
        Reference https://docs.docker.com/config/containers/resource_constraints/#cpu
        """,
        config=True,
    )

    extra_run_kwargs = Dict(
        {},
        help="""
        extra kwargs to limit CPU quota when running a docker image.
        Dictionary that allows the user to set the desired runtime flag
        to configure the amount of access to CPU resources your container has.
        Reference https://docs.docker.com/config/containers/resource_constraints/#cpu
        """,
        config=True,
    )

    default_buildpack = Any(
        PythonBuildPack,
        config=True,
        help="""
        The default build pack to use when no other buildpacks are found.
        """,
    )

    # Git is our content provider of last resort. This is to maintain the
    # old behaviour when git and local directories were the only supported
    # content providers. We can detect local directories from the path, but
    # detecting if something will successfully `git clone` is very hard if all
    # you can do is look at the path/URL to it.
    content_providers = List(
        [
            contentproviders.Local,
            contentproviders.Zenodo,
            contentproviders.Figshare,
            contentproviders.Dataverse,
            contentproviders.Hydroshare,
            contentproviders.Swhid,
            contentproviders.Mercurial,
            contentproviders.Git,
        ],
        config=True,
        help="""
        Ordered list by priority of ContentProviders to try in turn to fetch
        the contents specified by the user.
        """,
    )

    build_memory_limit = ByteSpecification(
        0,
        help="""
        Total memory that can be used by the docker image building process.

        Set to 0 for no limits.
        """,
        config=True,
    )

    volumes = Dict(
        {},
        help="""
        Volumes to mount when running the container.

        Only used when running, not during build process!

        Use a key-value pair, with the key being the volume source &
        value being the destination volume.

        Both source and destination can be relative. Source is resolved
        relative to the current working directory on the host, and
        destination is resolved relative to the working directory of the
        image - ($HOME by default)
        """,
        config=True,
    )

    user_id = Int(
        help="""
        UID of the user to create inside the built image.

        Should be a uid that is not currently used by anything in the image.
        Defaults to uid of currently running user, since that is the most
        common case when running r2d manually.

        Might not affect Dockerfile builds.
        """,
        config=True,
    )

    @default("user_id")
    def _user_id_default(self):
        """
        Default user_id to current running user.
        """
        return os.geteuid()

    user_name = Unicode(
        "jovyan",
        help="""
        Username of the user to create inside the built image.

        Should be a username that is not currently used by anything in the
        image, and should conform to the restrictions on user names for Linux.

        Defaults to username of currently running user, since that is the most
        common case when running repo2docker manually.
        """,
        config=True,
    )

    @default("user_name")
    def _user_name_default(self):
        """
        Default user_name to current running user.
        """
        return getpass.getuser()

    appendix = Unicode(
        config=True,
        help="""
        Appendix of Dockerfile commands to run at the end of the build.

        Can be used to customize the resulting image after all
        standard build steps finish.
        """,
    )

    json_logs = Bool(
        False,
        help="""
        Log output in structured JSON format.

        Useful when stdout is consumed by other tools
        """,
        config=True,
    )

    repo = Unicode(
        ".",
        help="""
        Specification of repository to build image for.

        Could be local path or git URL.
        """,
        config=True,
    )

    ref = Unicode(
        None,
        help="""
        Git ref that should be built.

        If repo is a git repository, this ref is checked out
        in a local clone before repository is built.
        """,
        config=True,
        allow_none=True,
    )

    swh_token = Unicode(
        None,
        help="""
        Token to use authenticated SWH API access.

        If unset, default to unauthenticated (limited) usage of the Software
        Heritage API.
        """,
        config=True,
        allow_none=True,
    )

    cleanup_checkout = Bool(
        False,
        help="""
        Delete source repository after building is done.

        Useful when repo2docker is doing the git cloning
        """,
        config=True,
    )

    output_image_spec = Unicode(
        "",
        help="""
        Docker Image name:tag to tag the built image with.

        Required parameter.
        """,
        config=True,
    )

    push = Bool(
        False,
        help="""
        Set to true to push docker image after building
        """,
        config=True,
    )

    run = Bool(
        False,
        help="""
        Run docker image after building
        """,
        config=True,
    )

    # FIXME: Refactor class to be able to do --no-build without needing
    #        deep support for it inside other code
    dry_run = Bool(
        False,
        help="""
        Do not actually build the docker image, just simulate it.
        """,
        config=True,
    )

    # FIXME: Refactor classes to separate build & run steps
    run_cmd = List(
        [],
        help="""
        Command to run when running the container

        When left empty, a jupyter notebook is run.
        """,
        config=True,
    )

    all_ports = Bool(
        False,
        help="""
        Publish all declared ports from container whiel running.

        Equivalent to -P option to docker run
        """,
        config=True,
    )

    ports = Dict(
        {},
        help="""
        Port mappings to establish when running the container.

        Equivalent to -p {key}:{value} options to docker run.
        {key} refers to port inside container, and {value}
        refers to port / host:port in the host
        """,
        config=True,
    )

    environment = List(
        [],
        help="""
        Environment variables to set when running the built image.

        Each item must be a string formatted as KEY=VALUE
        """,
        config=True,
    )

    target_repo_dir = Unicode(
        "",
        help="""
        Path inside the image where contents of the repositories are copied to,
        and where all the build operations (such as postBuild) happen.

        Defaults to ${HOME} if not set
        """,
        config=True,
    )

    engine = Unicode(
        "docker",
        config=True,
        help="""
        Name of the container engine.

        Defaults to 'docker'.
        """,
    )

    def get_engine(self):
        """Return an instance of the container engine.

        Currently no arguments are passed to the engine constructor.
        """
        engines = entrypoints.get_group_named("repo2docker.engines")
        try:
            entry = engines[self.engine]
        except KeyError:
            raise ContainerEngineException(
                "Container engine '{}' not found. Available engines: {}".
                format(self.engine, ",".join(engines.keys())))
        engine_class = entry.load()
        return engine_class(parent=self)

    def fetch(self, url, ref, checkout_path):
        """Fetch the contents of `url` and place it in `checkout_path`.

        The `ref` parameter specifies what "version" of the contents should be
        fetched. In the case of a git repository `ref` is the SHA-1 of a commit.

        Iterate through possible content providers until a valid provider,
        based on URL, is found.
        """
        picked_content_provider = None
        for ContentProvider in self.content_providers:
            cp = ContentProvider()
            spec = cp.detect(url, ref=ref)
            if spec is not None:
                picked_content_provider = cp
                self.log.info("Picked {cp} content "
                              "provider.\n".format(cp=cp.__class__.__name__))
                break

        if picked_content_provider is None:
            self.log.error("No matching content provider found for "
                           "{url}.".format(url=url))

        swh_token = self.config.get("swh_token", self.swh_token)
        if swh_token and isinstance(picked_content_provider,
                                    contentproviders.Swhid):
            picked_content_provider.set_auth_token(swh_token)

        for log_line in picked_content_provider.fetch(
                spec, checkout_path, yield_output=self.json_logs):
            self.log.info(log_line, extra=dict(phase="fetching"))

        if not self.output_image_spec:
            image_spec = "r2d" + self.repo
            # if we are building from a subdirectory include that in the
            # image name so we can tell builds from different sub-directories
            # apart.
            if self.subdir:
                image_spec += self.subdir
            if picked_content_provider.content_id is not None:
                image_spec += picked_content_provider.content_id
            else:
                image_spec += str(int(time.time()))
            self.output_image_spec = escapism.escape(image_spec,
                                                     escape_char="-").lower()

    def json_excepthook(self, etype, evalue, traceback):
        """Called on an uncaught exception when using json logging

        Avoids non-JSON output on errors when using --json-logs
        """
        self.log.error(
            "Error during build: %s",
            evalue,
            exc_info=(etype, evalue, traceback),
            extra=dict(phase="failed"),
        )

    def initialize(self):
        """Init repo2docker configuration before start"""
        # FIXME: Remove this function, move it to setters / traitlet reactors
        if self.json_logs:
            # register JSON excepthook to avoid non-JSON output on errors
            sys.excepthook = self.json_excepthook
            # Need to reset existing handlers, or we repeat messages
            logHandler = logging.StreamHandler()
            formatter = jsonlogger.JsonFormatter()
            logHandler.setFormatter(formatter)
            self.log = logging.getLogger("repo2docker")
            self.log.handlers = []
            self.log.addHandler(logHandler)
            self.log.setLevel(self.log_level)
        else:
            # due to json logger stuff above,
            # our log messages include carriage returns, newlines, etc.
            # remove the additional newline from the stream handler
            self.log.handlers[0].terminator = ""
            # We don't want a [Repo2Docker] on all messages
            self.log.handlers[0].formatter = logging.Formatter(
                fmt="%(message)s")

        if self.dry_run and (self.run or self.push):
            raise ValueError(
                "Cannot push or run image if we are not building it")

        if self.volumes and not self.run:
            raise ValueError("Cannot mount volumes if container is not run")

    def push_image(self):
        """Push docker image to registry"""
        client = self.get_engine()
        # Build a progress setup for each layer, and only emit per-layer
        # info every 1.5s
        progress_layers = {}
        layers = {}
        last_emit_time = time.time()
        for chunk in client.push(self.output_image_spec):
            if client.string_output:
                self.log.info(chunk, extra=dict(phase="pushing"))
                continue
            # else this is Docker output

            # each chunk can be one or more lines of json events
            # split lines here in case multiple are delivered at once
            for line in chunk.splitlines():
                line = line.decode("utf-8", errors="replace")
                try:
                    progress = json.loads(line)
                except Exception as e:
                    self.log.warning("Not a JSON progress line: %r", line)
                    continue
                if "error" in progress:
                    self.log.error(progress["error"],
                                   extra=dict(phase="failed"))
                    raise ImageLoadError(progress["error"])
                if "id" not in progress:
                    continue
                # deprecated truncated-progress data
                if "progressDetail" in progress and progress["progressDetail"]:
                    progress_layers[
                        progress["id"]] = progress["progressDetail"]
                else:
                    progress_layers[progress["id"]] = progress["status"]
                # include full progress data for each layer in 'layers' data
                layers[progress["id"]] = progress
                if time.time() - last_emit_time > 1.5:
                    self.log.info(
                        "Pushing image\n",
                        extra=dict(progress=progress_layers,
                                   layers=layers,
                                   phase="pushing"),
                    )
                    last_emit_time = time.time()
        self.log.info(
            "Successfully pushed {}".format(self.output_image_spec),
            extra=dict(phase="pushing"),
        )

    def run_image(self):
        """Run docker container from built image

        and wait for it to finish.
        """
        container = self.start_container()
        self.wait_for_container(container)

    def start_container(self):
        """Start docker container from built image

        Returns running container
        """
        client = self.get_engine()

        docker_host = os.environ.get("DOCKER_HOST")
        if docker_host:
            host_name = urlparse(docker_host).hostname
        else:
            host_name = "127.0.0.1"
        self.hostname = host_name

        if not self.run_cmd:
            port = str(self._get_free_port())
            self.port = port
            # To use the option --NotebookApp.custom_display_url
            # make sure the base-notebook image is updated:
            # docker pull jupyter/base-notebook
            run_cmd = [
                "jupyter",
                "notebook",
                "--ip",
                "0.0.0.0",
                "--port",
                port,
                "--NotebookApp.custom_display_url=http://{}:{}".format(
                    host_name, port),
            ]
            ports = {"%s/tcp" % port: port}
        else:
            # run_cmd given by user, if port is also given then pass it on
            run_cmd = self.run_cmd
            if self.ports:
                ports = self.ports
            else:
                ports = {}
        # store ports on self so they can be retrieved in tests
        self.ports = ports

        container_volumes = {}
        if self.volumes:
            image = client.inspect_image(self.output_image_spec)
            image_workdir = image.config["WorkingDir"]

            for k, v in self.volumes.items():
                container_volumes[os.path.abspath(k)] = {
                    "bind":
                    v if v.startswith("/") else os.path.join(image_workdir, v),
                    "mode":
                    "rw",
                }

        run_kwargs = dict(
            publish_all_ports=self.all_ports,
            ports=ports,
            command=run_cmd,
            volumes=container_volumes,
            environment=self.environment,
        )

        run_kwargs.update(self.extra_run_kwargs)

        container = client.run(self.output_image_spec, **run_kwargs)

        while container.status == "created":
            time.sleep(0.5)
            container.reload()

        return container

    def wait_for_container(self, container):
        """Wait for a container to finish

        Displaying logs while it's running
        """

        last_timestamp = None
        try:
            for line in container.logs(stream=True, timestamps=True):
                line = line.decode("utf-8")
                last_timestamp, line = line.split(" ", maxsplit=1)
                self.log.info(line, extra=dict(phase="running"))

        finally:
            container.reload()
            if container.status == "running":
                self.log.info("Stopping container...\n",
                              extra=dict(phase="running"))
                container.kill()
            exit_code = container.exitcode

            container.wait()

            self.log.info("Container finished running.\n".upper(),
                          extra=dict(phase="running"))
            # are there more logs? Let's send them back too
            late_logs = container.logs(since=last_timestamp).decode("utf-8")
            for line in late_logs.split("\n"):
                self.log.debug(line + "\n", extra=dict(phase="running"))

            container.remove()
            if exit_code:
                sys.exit(exit_code)

    def _get_free_port(self):
        """
        Hacky method to get a free random port on local host
        """
        import socket

        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.bind(("", 0))
        port = s.getsockname()[1]
        s.close()
        return port

    def find_image(self):
        # if this is a dry run it is Ok for dockerd to be unreachable so we
        # always return False for dry runs.
        if self.dry_run:
            return False
        # check if we already have an image for this content
        client = self.get_engine()
        for image in client.images():
            for tag in image.tags:
                if tag == self.output_image_spec + ":latest":
                    return True
        return False

    def build(self):
        """
        Build docker image
        """
        # Check if r2d can connect to docker daemon
        if not self.dry_run:
            try:
                docker_client = self.get_engine()
            except ContainerEngineException as e:
                self.log.error("\nContainer engine initialization error: %s\n",
                               e)
                self.exit(1)

        # If the source to be executed is a directory, continue using the
        # directory. In the case of a local directory, it is used as both the
        # source and target. Reusing a local directory seems better than
        # making a copy of it as it might contain large files that would be
        # expensive to copy.
        if os.path.isdir(self.repo):
            checkout_path = self.repo
        else:
            if self.git_workdir is None:
                checkout_path = tempfile.mkdtemp(prefix="repo2docker")
            else:
                checkout_path = self.git_workdir

        try:
            self.fetch(self.repo, self.ref, checkout_path)

            if self.find_image():
                self.log.info("Reusing existing image ({}), not "
                              "building.".format(self.output_image_spec))
                # no need to build, so skip to the end by `return`ing here
                # this will still execute the finally clause and let's us
                # avoid having to indent the build code by an extra level
                return

            if self.subdir:
                checkout_path = os.path.join(checkout_path, self.subdir)
                if not os.path.isdir(checkout_path):
                    self.log.error(
                        "Subdirectory %s does not exist",
                        self.subdir,
                        extra=dict(phase="failure"),
                    )
                    raise FileNotFoundError(
                        "Could not find {}".format(checkout_path))

            with chdir(checkout_path):
                for BP in self.buildpacks:
                    bp = BP()
                    if bp.detect():
                        picked_buildpack = bp
                        break
                else:
                    picked_buildpack = self.default_buildpack()

                picked_buildpack.appendix = self.appendix
                # Add metadata labels
                picked_buildpack.labels["repo2docker.version"] = self.version
                repo_label = "local" if os.path.isdir(self.repo) else self.repo
                picked_buildpack.labels["repo2docker.repo"] = repo_label
                picked_buildpack.labels["repo2docker.ref"] = self.ref

                if self.dry_run:
                    print(picked_buildpack.render())
                else:
                    self.log.debug(picked_buildpack.render(),
                                   extra=dict(phase="building"))
                    if self.user_id == 0:
                        raise ValueError(
                            "Root as the primary user in the image is not permitted."
                        )

                    build_args = {
                        "NB_USER": self.user_name,
                        "NB_UID": str(self.user_id),
                    }
                    if self.target_repo_dir:
                        build_args["REPO_DIR"] = self.target_repo_dir
                    self.log.info(
                        "Using %s builder\n",
                        bp.__class__.__name__,
                        extra=dict(phase="building"),
                    )

                    for l in picked_buildpack.build(
                            docker_client,
                            self.output_image_spec,
                            self.build_memory_limit,
                            build_args,
                            self.cache_from,
                            self.extra_build_kwargs,
                    ):
                        if docker_client.string_output:
                            self.log.info(l, extra=dict(phase="building"))
                        # else this is Docker output
                        elif "stream" in l:
                            self.log.info(l["stream"],
                                          extra=dict(phase="building"))
                        elif "error" in l:
                            self.log.info(l["error"],
                                          extra=dict(phase="failure"))
                            raise BuildError(l["error"])
                        elif "status" in l:
                            self.log.info("Fetching base image...\r",
                                          extra=dict(phase="building"))
                        else:
                            self.log.info(json.dumps(l),
                                          extra=dict(phase="building"))

        finally:
            # Cleanup checkout if necessary
            if self.cleanup_checkout:
                shutil.rmtree(checkout_path, ignore_errors=True)

    def start(self):
        self.build()

        if self.push:
            self.push_image()

        if self.run:
            self.run_image()
示例#4
0
class Widget(LoggingHasTraits):
    #-------------------------------------------------------------------------
    # Class attributes
    #-------------------------------------------------------------------------
    _widget_construction_callback = None

    # widgets is a dictionary of all active widget objects
    widgets = {}

    # widget_types is a registry of widgets by module, version, and name:
    widget_types = WidgetRegistry()

    @classmethod
    def close_all(cls):
        for widget in list(cls.widgets.values()):
            widget.close()


    @staticmethod
    def on_widget_constructed(callback):
        """Registers a callback to be called when a widget is constructed.

        The callback must have the following signature:
        callback(widget)"""
        Widget._widget_construction_callback = callback

    @staticmethod
    def _call_widget_constructed(widget):
        """Static method, called when a widget is constructed."""
        if Widget._widget_construction_callback is not None and callable(Widget._widget_construction_callback):
            Widget._widget_construction_callback(widget)

    @staticmethod
    def handle_comm_opened(comm, msg):
        """Static method, called when a widget is constructed."""
        version = msg.get('metadata', {}).get('version', '')
        if version.split('.')[0] != PROTOCOL_VERSION_MAJOR:
            raise ValueError("Incompatible widget protocol versions: received version %r, expected version %r"%(version, __protocol_version__))
        data = msg['content']['data']
        state = data['state']

        # Find the widget class to instantiate in the registered widgets
        widget_class = Widget.widget_types.get(state['_model_module'],
                                               state['_model_module_version'],
                                               state['_model_name'],
                                               state['_view_module'],
                                               state['_view_module_version'],
                                               state['_view_name'])
        widget = widget_class(comm=comm)
        if 'buffer_paths' in data:
            _put_buffers(state, data['buffer_paths'], msg['buffers'])
        widget.set_state(state)

    @staticmethod
    def get_manager_state(drop_defaults=False, widgets=None):
        """Returns the full state for a widget manager for embedding

        :param drop_defaults: when True, it will not include default value
        :param widgets: list with widgets to include in the state (or all widgets when None)
        :return:
        """
        state = {}
        if widgets is None:
            widgets = Widget.widgets.values()
        for widget in widgets:
            state[widget.model_id] = widget._get_embed_state(drop_defaults=drop_defaults)
        return {'version_major': 2, 'version_minor': 0, 'state': state}


    def _get_embed_state(self, drop_defaults=False):
        state = {
            'model_name': self._model_name,
            'model_module': self._model_module,
            'model_module_version': self._model_module_version
        }
        model_state, buffer_paths, buffers = _remove_buffers(self.get_state(drop_defaults=drop_defaults))
        state['state'] = model_state
        if len(buffers) > 0:
            state['buffers'] = [{'encoding': 'base64',
                                 'path': p,
                                 'data': standard_b64encode(d).decode('ascii')}
                                for p, d in zip(buffer_paths, buffers)]
        return state

    def get_view_spec(self):
        return dict(version_major=2, version_minor=0, model_id=self._model_id)

    #-------------------------------------------------------------------------
    # Traits
    #-------------------------------------------------------------------------
    _model_name = Unicode('WidgetModel',
        help="Name of the model.", read_only=True).tag(sync=True)
    _model_module = Unicode('@jupyter-widgets/base',
        help="The namespace for the model.", read_only=True).tag(sync=True)
    _model_module_version = Unicode(__jupyter_widgets_base_version__,
        help="A semver requirement for namespace version containing the model.", read_only=True).tag(sync=True)
    _view_name = Unicode(None, allow_none=True,
        help="Name of the view.").tag(sync=True)
    _view_module = Unicode(None, allow_none=True,
        help="The namespace for the view.").tag(sync=True)
    _view_module_version = Unicode('',
        help="A semver requirement for the namespace version containing the view.").tag(sync=True)

    _view_count = Int(None, allow_none=True,
        help="EXPERIMENTAL: The number of views of the model displayed in the frontend. This attribute is experimental and may change or be removed in the future. None signifies that views will not be tracked. Set this to 0 to start tracking view creation/deletion.").tag(sync=True)
    comm = Instance('ipykernel.comm.Comm', allow_none=True)

    keys = List(help="The traits which are synced.")

    @default('keys')
    def _default_keys(self):
        return [name for name in self.traits(sync=True)]

    _property_lock = Dict()
    _holding_sync = False
    _states_to_send = Set()
    _display_callbacks = Instance(CallbackDispatcher, ())
    _msg_callbacks = Instance(CallbackDispatcher, ())

    #-------------------------------------------------------------------------
    # (Con/de)structor
    #-------------------------------------------------------------------------
    def __init__(self, **kwargs):
        """Public constructor"""
        self._model_id = kwargs.pop('model_id', None)
        super(Widget, self).__init__(**kwargs)

        Widget._call_widget_constructed(self)
        self.open()

    def __del__(self):
        """Object disposal"""
        self.close()

    #-------------------------------------------------------------------------
    # Properties
    #-------------------------------------------------------------------------

    def open(self):
        """Open a comm to the frontend if one isn't already open."""
        if self.comm is None:
            state, buffer_paths, buffers = _remove_buffers(self.get_state())

            args = dict(target_name='jupyter.widget',
                        data={'state': state, 'buffer_paths': buffer_paths},
                        buffers=buffers,
                        metadata={'version': __protocol_version__}
                        )
            if self._model_id is not None:
                args['comm_id'] = self._model_id

            self.comm = Comm(**args)

    @observe('comm')
    def _comm_changed(self, change):
        """Called when the comm is changed."""
        if change['new'] is None:
            return
        self._model_id = self.model_id

        self.comm.on_msg(self._handle_msg)
        Widget.widgets[self.model_id] = self

    @property
    def model_id(self):
        """Gets the model id of this widget.

        If a Comm doesn't exist yet, a Comm will be created automagically."""
        return self.comm.comm_id

    #-------------------------------------------------------------------------
    # Methods
    #-------------------------------------------------------------------------

    def close(self):
        """Close method.

        Closes the underlying comm.
        When the comm is closed, all of the widget views are automatically
        removed from the front-end."""
        if self.comm is not None:
            Widget.widgets.pop(self.model_id, None)
            self.comm.close()
            self.comm = None
            self._ipython_display_ = None

    def send_state(self, key=None):
        """Sends the widget state, or a piece of it, to the front-end, if it exists.

        Parameters
        ----------
        key : unicode, or iterable (optional)
            A single property's name or iterable of property names to sync with the front-end.
        """
        state = self.get_state(key=key)
        if len(state) > 0:
            state, buffer_paths, buffers = _remove_buffers(state)
            msg = {'method': 'update', 'state': state, 'buffer_paths': buffer_paths}
            self._send(msg, buffers=buffers)

    def get_state(self, key=None, drop_defaults=False):
        """Gets the widget state, or a piece of it.

        Parameters
        ----------
        key : unicode or iterable (optional)
            A single property's name or iterable of property names to get.

        Returns
        -------
        state : dict of states
        metadata : dict
            metadata for each field: {key: metadata}
        """
        if key is None:
            keys = self.keys
        elif isinstance(key, string_types):
            keys = [key]
        elif isinstance(key, collections.Iterable):
            keys = key
        else:
            raise ValueError("key must be a string, an iterable of keys, or None")
        state = {}
        traits = self.traits()
        for k in keys:
            to_json = self.trait_metadata(k, 'to_json', self._trait_to_json)
            value = to_json(getattr(self, k), self)
            if not PY3 and isinstance(traits[k], Bytes) and isinstance(value, bytes):
                value = memoryview(value)
            if not drop_defaults or not self._compare(value, traits[k].default_value):
                state[k] = value
        return state

    def _is_numpy(self, x):
        return x.__class__.__name__ == 'ndarray' and x.__class__.__module__ == 'numpy'

    def _compare(self, a, b):
        if self._is_numpy(a) or self._is_numpy(b):
            import numpy as np
            return np.array_equal(a, b)
        else:
            return a == b

    def set_state(self, sync_data):
        """Called when a state is received from the front-end."""
        # The order of these context managers is important. Properties must
        # be locked when the hold_trait_notification context manager is
        # released and notifications are fired.
        with self._lock_property(**sync_data), self.hold_trait_notifications():
            for name in sync_data:
                if name in self.keys:
                    from_json = self.trait_metadata(name, 'from_json',
                                                    self._trait_from_json)
                    self.set_trait(name, from_json(sync_data[name], self))

    def send(self, content, buffers=None):
        """Sends a custom msg to the widget model in the front-end.

        Parameters
        ----------
        content : dict
            Content of the message to send.
        buffers : list of binary buffers
            Binary buffers to send with message
        """
        self._send({"method": "custom", "content": content}, buffers=buffers)

    def on_msg(self, callback, remove=False):
        """(Un)Register a custom msg receive callback.

        Parameters
        ----------
        callback: callable
            callback will be passed three arguments when a message arrives::

                callback(widget, content, buffers)

        remove: bool
            True if the callback should be unregistered."""
        self._msg_callbacks.register_callback(callback, remove=remove)

    def on_displayed(self, callback, remove=False):
        """(Un)Register a widget displayed callback.

        Parameters
        ----------
        callback: method handler
            Must have a signature of::

                callback(widget, **kwargs)

            kwargs from display are passed through without modification.
        remove: bool
            True if the callback should be unregistered."""
        self._display_callbacks.register_callback(callback, remove=remove)

    def add_traits(self, **traits):
        """Dynamically add trait attributes to the Widget."""
        super(Widget, self).add_traits(**traits)
        for name, trait in traits.items():
            if trait.get_metadata('sync'):
                self.keys.append(name)
                self.send_state(name)

    def notify_change(self, change):
        """Called when a property has changed."""
        # Send the state to the frontend before the user-registered callbacks
        # are called.
        name = change['name']
        if self.comm is not None and self.comm.kernel is not None:
            # Make sure this isn't information that the front-end just sent us.
            if name in self.keys and self._should_send_property(name, getattr(self, name)):
                # Send new state to front-end
                self.send_state(key=name)
        super(Widget, self).notify_change(change)

    def __repr__(self):
        return self._gen_repr_from_keys(self._repr_keys())

    #-------------------------------------------------------------------------
    # Support methods
    #-------------------------------------------------------------------------
    @contextmanager
    def _lock_property(self, **properties):
        """Lock a property-value pair.

        The value should be the JSON state of the property.

        NOTE: This, in addition to the single lock for all state changes, is
        flawed.  In the future we may want to look into buffering state changes
        back to the front-end."""
        self._property_lock = properties
        try:
            yield
        finally:
            self._property_lock = {}

    @contextmanager
    def hold_sync(self):
        """Hold syncing any state until the outermost context manager exits"""
        if self._holding_sync is True:
            yield
        else:
            try:
                self._holding_sync = True
                yield
            finally:
                self._holding_sync = False
                self.send_state(self._states_to_send)
                self._states_to_send.clear()

    def _should_send_property(self, key, value):
        """Check the property lock (property_lock)"""
        to_json = self.trait_metadata(key, 'to_json', self._trait_to_json)
        if key in self._property_lock:
            # model_state, buffer_paths, buffers
            split_value = _remove_buffers({ key: to_json(value, self)})
            split_lock = _remove_buffers({ key: self._property_lock[key]})
            # A roundtrip conversion through json in the comparison takes care of
            # idiosyncracies of how python data structures map to json, for example
            # tuples get converted to lists.
            if (jsonloads(jsondumps(split_value[0])) == split_lock[0]
                and split_value[1] == split_lock[1]
                and _buffer_list_equal(split_value[2], split_lock[2])):
                return False
        if self._holding_sync:
            self._states_to_send.add(key)
            return False
        else:
            return True

    # Event handlers
    @_show_traceback
    def _handle_msg(self, msg):
        """Called when a msg is received from the front-end"""
        data = msg['content']['data']
        method = data['method']

        if method == 'update':
            if 'state' in data:
                state = data['state']
                if 'buffer_paths' in data:
                    _put_buffers(state, data['buffer_paths'], msg['buffers'])
                self.set_state(state)

        # Handle a state request.
        elif method == 'request_state':
            self.send_state()

        # Handle a custom msg from the front-end.
        elif method == 'custom':
            if 'content' in data:
                self._handle_custom_msg(data['content'], msg['buffers'])

        # Catch remainder.
        else:
            self.log.error('Unknown front-end to back-end widget msg with method "%s"' % method)

    def _handle_custom_msg(self, content, buffers):
        """Called when a custom msg is received."""
        self._msg_callbacks(self, content, buffers)

    def _handle_displayed(self, **kwargs):
        """Called when a view has been displayed for this widget instance"""
        self._display_callbacks(self, **kwargs)

    @staticmethod
    def _trait_to_json(x, self):
        """Convert a trait value to json."""
        return x

    @staticmethod
    def _trait_from_json(x, self):
        """Convert json values to objects."""
        return x

    def _ipython_display_(self, **kwargs):
        """Called when `IPython.display.display` is called on the widget."""
        if self._view_name is not None:

            plaintext = repr(self)
            if len(plaintext) > 110:
                plaintext = plaintext[:110] + '…'
            # The 'application/vnd.jupyter.widget-view+json' mimetype has not been registered yet.
            # See the registration process and naming convention at
            # http://tools.ietf.org/html/rfc6838
            # and the currently registered mimetypes at
            # http://www.iana.org/assignments/media-types/media-types.xhtml.
            data = {
                'text/plain': plaintext,
                'application/vnd.jupyter.widget-view+json': {
                    'version_major': 2,
                    'version_minor': 0,
                    'model_id': self._model_id
                }
            }
            display(data, raw=True)

            self._handle_displayed(**kwargs)

    def _send(self, msg, buffers=None):
        """Sends a message to the model in the front-end."""
        if self.comm is not None and self.comm.kernel is not None:
            self.comm.send(data=msg, buffers=buffers)

    def _repr_keys(self):
        traits = self.traits()
        for key in sorted(self.keys):
            # Exclude traits that start with an underscore
            if key[0] == '_':
                continue
            # Exclude traits who are equal to their default value
            value = getattr(self, key)
            trait = traits[key]
            if self._compare(value, trait.default_value):
                continue
            elif (isinstance(trait, (Container, Dict)) and
                  trait.default_value == Undefined and
                  (value is None or len(value) == 0)):
                # Empty container, and dynamic default will be empty
                continue
            yield key

    def _gen_repr_from_keys(self, keys):
        class_name = self.__class__.__name__
        signature = ', '.join(
            '%s=%r' % (key, getattr(self, key))
            for key in keys
        )
        return '%s(%s)' % (class_name, signature)
class TriggerEffiencyGenerator(Tool):
    name = "TriggerEffiencyGenerator"
    description = "Generate the a pickle file of TriggerEffiency for " \
                  "either MC or data files."

    telescopes = Int(1,
                     help='Telescopes to include from the event file. '
                     'Default = 1').tag(config=True)
    output_name = Unicode('trigger_efficiency',
                          help='Name of the output trigger efficiency hdf5 '
                          'file').tag(config=True)
    input_path = Unicode(help='Path to directory containing data').tag(
        config=True)

    max_events = Int(1000,
                     help='Maximum number of events to use').tag(config=True)

    plot_cam = Bool(False,
                    "enable plotting of individual camera").tag(config=True)

    use_true_pe = Bool(False, "Use true mc p.e.").tag(config=True)

    aliases = Dict(
        dict(input_path='TriggerEffiencyGenerator.input_path',
             output_name='TriggerEffiencyGenerator.output_name',
             max_events='TriggerEffiencyGenerator.max_events',
             clip_amplitude='CameraDL1Calibrator.clip_amplitude',
             radius='CameraDL1Calibrator.radius',
             max_pe='TriggerEffiencyGenerator.max_pe',
             T='TriggerEffiencyGenerator.telescopes',
             plot_cam='TriggerEffiencyGenerator.plot_cam',
             use_true_pe='TriggerEffiencyGenerator.use_true_pe'))
    classes = List([EventSourceFactory, CameraDL1Calibrator, CameraCalibrator])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.eventsource = None
        self.r1 = None
        self.dl0 = None
        self.dl1 = None
        self.cal = None
        self.trig_eff_array = []
        self.disc_array = []

    def setup(self):
        kwargs = dict(config=self.config, tool=self)
        self.dl0 = CameraDL0Reducer(**kwargs)

        self.dl1 = CameraDL1Calibrator(**kwargs)

        self.cal = CameraCalibrator()

    def start(self):
        run_list = np.loadtxt('%s/../runlist.txt' % self.input_path,
                              unpack=True)
        file_list = listdir('%s' % self.input_path)
        file_list.sort()
        plot_cam = False
        plot_delay = 0.5
        disp = None
        n_events = []
        trig_eff = []

        n_pe = []
        if debug:
            fig = plt.figure(1)
            ax = fig.add_subplot(111)
        for n, run in enumerate(run_list[0]):
            n_events.append(run_list[5][n])
            n_pe.append(run_list[3][n])

            if str(int(run)) not in file_list[n]:
                print(str(int(run)), file_list[n])
                print('check runlist.txt order, needs to be sorted?')
                exit()
            file_name = "%s/%s" % (self.input_path, file_list[n])
            print(file_name)

            n_trig = 0
            try:
                print('trying to open file')
                source = EventSourceFactory.produce(input_url=file_name,
                                                    max_events=self.max_events)
                for event in tqdm(source):
                    n_trig = n_trig + 1

            except FileNotFoundError:
                print('file_not_found')
            print(run_list[7][n], n_trig, run_list[5][n],
                  n_trig / run_list[5][n])
            trig_eff.append(n_trig / run_list[5][n])
            self.trig_eff_array.append(n_trig / run_list[5][n])
            self.disc_array.append(run_list[7][n])
            # exit()

        if debug:
            plt.plot(self.disc_array, self.trig_eff)
            plt.show()
            plt.show()

    def finish(self):
        out_file = open(self.output_name, 'w')
        for n, i in enumerate(self.trig_eff_array):
            out_file.write('%s\t%s\n' % (self.disc_array[n], i))
        out_file.close()
        print('done')
示例#6
0
class UrlAuthenticator(Authenticator):
    """
    Class for authenticating to jupyterhub against a remote URL.
    """

    # config values

    # address of the server hosting the login service
    server_address = Unicode(default_value='http://127.0.0.1',
                             config=True,
                             help='Address of the server with the login route')

    # port the service is exposed on
    server_port = Int(
        default_value=8080,
        config=True,
        help='Port on which to contact login server',
    )

    # route to the service on the server_address:port
    # TODO: make this smarter about leading slashes...
    login_route = Unicode(
        default_value='/login',
        config=True,
        help='Route for the login service (assumes leading slash)')

    @gen.coroutine
    def authenticate(self, handler, data):
        """
        Authenticate against a URL that provisdes an authentication service.
        Args:
            handler - the RequestHandler from Jupyter
            data - the data from the hub login form.
        """
        resp = self.do_request(data)
        return self.process_response(resp)

    def do_request(self, data):
        """
        send the request with the user creds to the logon server. return the
        response.
        """
        url = '%s:%s%s' % (self.server_address, self.server_port,
                           self.login_route)

        # get an httprequest with the headers and such using the provided data
        r = UrlAuthenticator.create_request(url, data)
        resp = None

        # hit the url and hopefully get a good response
        try:
            with urllib.request.urlopen(r) as f:
                resp = f.read()
                f.close()
        except urllib.error.HTTPError:
            return None

        return resp

    def process_response(self, resp):
        """
        do whatever checks are necessary against the response to determine if
        the user should be authenticated.
        """
        # if we had a good response, get the user name out of it (if there) and
        # return that. otherwise, return None (indicated bad login attempt)
        if resp:
            d = json.loads(resp.decode())
            return d.get('username', None)

        return None

    @staticmethod
    def create_request(url, data):
        """
        Make a Request object to hit the URL. Fills in some boilerplate stuff
        for a Request object.

        url is the full url (constructed from address, port, and route values)
        data is the data from a POST to the login form of the hub
        """
        r = None

        conttype = 'application/json; charset=UTF-8'
        jdata = json.dumps(data).encode('utf-8')

        headers = {
            'Content-Type': conttype,
            'Content-Length': len(jdata),
        }

        return urllib.request.Request(url, jdata, headers)
class TF_editor(widgets.DOMWidget):
    _view_name = Unicode('TransferFunctionView').tag(sync=True)
    _model_name = Unicode('TransferFunctionModel').tag(sync=True)
    _view_module = Unicode('k3d').tag(sync=True)
    _model_module = Unicode('k3d').tag(sync=True)

    _view_module_version = Unicode(version).tag(sync=True)
    _model_module_version = Unicode(version).tag(sync=True)

    # readonly (specified at creation)
    height = Int().tag(sync=True)

    # read-write
    color_map = Array(dtype=np.float32).tag(
        sync=True, **array_serialization_wrap('color_map'))
    opacity_function = Array(dtype=np.float32).tag(
        sync=True, **array_serialization_wrap('opacity_function'))

    def __init__(self, height, color_map, opacity_function, *args, **kwargs):
        super(TF_editor, self).__init__()

        self.height = height

        with self.hold_trait_notifications():
            self.color_map = color_map
            self.opacity_function = opacity_function

        self.outputs = []

    def display(self, **kwargs):
        output = widgets.Output()

        with output:
            display(self, **kwargs)

        self.outputs.append(output)

        display(output)

    def close(self):
        for output in self.outputs:
            output.clear_output()

        self.outputs = []

    def __getitem__(self, name):
        return getattr(self, name)

    @validate('color_map')
    def _validate_color_map(self, proposal):
        if proposal['value'].shape == ():
            return proposal['value']

        cm_min, cm_max = np.min(proposal['value'][::4]), np.max(
            proposal['value'][::4])

        if cm_min != 0.0 or cm_max != 1.0:
            proposal['value'][::4] = (proposal['value'][::4] -
                                      cm_min) / (cm_max - cm_min)

        return proposal['value']

    @validate('opacity_function')
    def _validate_opacity_function(self, proposal):
        if proposal['value'].shape == ():
            return proposal['value']

        of_min, of_max = np.min(proposal['value'][::2]), np.max(
            proposal['value'][::2])

        if of_min != 0.0 or of_max != 1.0:
            proposal['value'][::2] = (proposal['value'][::2] -
                                      of_min) / (of_max - of_min)

        return proposal['value']
示例#8
0
class VoilaConfiguration(traitlets.config.Configurable):
    """Common configuration options between the server extension and the application."""
    allow_template_override = Enum(['YES', 'NOTEBOOK', 'NO'],
                                   'YES',
                                   help='''
    Allow overriding the template (YES), or not (NO), or only from the notebook metadata.
    ''',
                                   config=True)
    allow_theme_override = Enum(['YES', 'NOTEBOOK', 'NO'],
                                'YES',
                                help='''
    Allow overriding the theme (YES), or not (NO), or only from the notebook metadata.
    ''',
                                config=True)
    template = Unicode('lab',
                       config=True,
                       allow_none=True,
                       help=('template name to be used by voila.'))
    resources = Dict(allow_none=True,
                     config=True,
                     help="""
        extra resources used by templates;
        example use with --template=reveal
        --VoilaConfiguration.resources="{'reveal': {'transition': 'fade', 'scroll': True}}"
        """)
    theme = Unicode('light', config=True)
    strip_sources = Bool(True,
                         config=True,
                         help='Strip sources from rendered html')
    enable_nbextensions = Bool(
        False,
        config=True,
        help=('Set to True for Voilà to load notebook extensions'))

    file_whitelist = List(
        Unicode(),
        [r'.*\.(png|jpg|gif|svg)'],
        config=True,
        help=r"""
    List of regular expressions for controlling which static files are served.
    All files that are served should at least match 1 whitelist rule, and no blacklist rule
    Example: --VoilaConfiguration.file_whitelist="['.*\.(png|jpg|gif|svg)', 'public.*']"
    """,
    )

    file_blacklist = List(Unicode(), [r'.*\.(ipynb|py)'],
                          config=True,
                          help=r"""
    List of regular expressions for controlling which static files are forbidden to be served.
    All files that are served should at least match 1 whitelist rule, and no blacklist rule
    Example:
    --VoilaConfiguration.file_whitelist="['.*']" # all files
    --VoilaConfiguration.file_blacklist="['private.*', '.*\.(ipynb)']" # except files in the private dir and notebook files
    """)

    language_kernel_mapping = Dict(
        {},
        config=True,
        help="""Mapping of language name to kernel name
        Example mapping python to use xeus-python, and C++11 to use xeus-cling:
        --VoilaConfiguration.extension_language_mapping='{"python": "xpython", "C++11": "xcpp11"}'
        """,
    )

    extension_language_mapping = Dict(
        {},
        config=True,
        help='''Mapping of file extension to kernel language
        Example mapping .py files to a python language kernel, and .cpp to a C++11 language kernel:
        --VoilaConfiguration.extension_language_mapping='{".py": "python", ".cpp": "C++11"}'
        ''',
    )

    http_keep_alive_timeout = Int(10,
                                  config=True,
                                  help="""
    When a cell takes a long time to execute, the http connection can timeout (possibly because of a proxy).
    Voila sends a 'heartbeat' message after the timeout is passed to keep the http connection alive.
    """)

    show_tracebacks = Bool(
        False,
        config=True,
        help=('Whether to send tracebacks to clients on exceptions.'))

    multi_kernel_manager_class = Type(
        config=True,
        default_value=
        'jupyter_server.services.kernels.kernelmanager.AsyncMappingKernelManager',
        # default_value='voila.voila_kernel_manager.VoilaKernelManager',
        klass='jupyter_client.multikernelmanager.MultiKernelManager',
        help=
        """The kernel manager class. This is useful to specify a different kernel manager,
        for example a kernel manager with support for pooling.
        """)

    http_header_envs = List(
        Unicode(),
        [],
        help=r"""
    List of HTTP Headers that should be passed as env vars to the kernel.
    Example: --VoilaConfiguration.http_header_envs="['X-CDSDASHBOARDS-JH-USER']"
    """,
    ).tag(config=True)

    preheat_kernel = Bool(
        False,
        config=True,
        help="""Flag to enable or disable pre-heat kernel option.
        """)
    default_pool_size = Int(
        1,
        config=True,
        help=
        """Size of pre-heated kernel pool for each notebook. Zero or negative number means disabled.
        """)
示例#9
0
class DisplayIntegrator(Tool):
    name = "DisplayIntegrator"
    description = "Calibrate dl0 data to dl1, and plot the various camera " \
                  "images that characterise the event and calibration. Also " \
                  "plot some examples of waveforms with the " \
                  "integration window."

    event_index = Int(0, help='Event index to view.').tag(config=True)
    use_event_id = Bool(False,
                        help='event_index will obtain an event using'
                        'event_id instead of '
                        'index.').tag(config=True)
    telescope = Int(None,
                    allow_none=True,
                    help='Telescope to view. Set to None to display the first'
                    'telescope with data.').tag(config=True)
    channel = Enum([0, 1], 0, help='Channel to view').tag(config=True)

    aliases = Dict(
        dict(r='EventFileReaderFactory.reader',
             f='EventFileReaderFactory.input_path',
             max_events='EventFileReaderFactory.max_events',
             extractor='ChargeExtractorFactory.extractor',
             window_width='ChargeExtractorFactory.window_width',
             window_shift='ChargeExtractorFactory.window_shift',
             sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG',
             sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG',
             lwt='ChargeExtractorFactory.lwt',
             clip_amplitude='CameraDL1Calibrator.clip_amplitude',
             radius='CameraDL1Calibrator.radius',
             E='DisplayIntegrator.event_index',
             T='DisplayIntegrator.telescope',
             C='DisplayIntegrator.channel',
             O='IntegratorPlotter.output_dir'))
    flags = Dict(
        dict(id=({
            'DisplayDL1Calib': {
                'use_event_index': True
            }
        }, 'event_index will obtain an event using '
                 'event_id instead of index.')))
    classes = List([
        EventFileReaderFactory, ChargeExtractorFactory, CameraDL1Calibrator,
        IntegratorPlotter
    ])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.file_reader = None
        self.r1 = None
        self.dl0 = None
        self.extractor = None
        self.dl1 = None
        self.plotter = None

    def setup(self):
        self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]"
        kwargs = dict(config=self.config, tool=self)

        reader_factory = EventFileReaderFactory(**kwargs)
        reader_class = reader_factory.get_class()
        self.file_reader = reader_class(**kwargs)

        extractor_factory = ChargeExtractorFactory(**kwargs)
        extractor_class = extractor_factory.get_class()
        self.extractor = extractor_class(**kwargs)

        r1_factory = CameraR1CalibratorFactory(origin=self.file_reader.origin,
                                               **kwargs)
        r1_class = r1_factory.get_class()
        self.r1 = r1_class(**kwargs)

        self.dl0 = CameraDL0Reducer(**kwargs)

        self.dl1 = CameraDL1Calibrator(extractor=self.extractor, **kwargs)

        self.plotter = IntegratorPlotter(**kwargs)

    def start(self):
        event = self.file_reader.get_event(self.event_index, self.use_event_id)

        # Calibrate
        self.r1.calibrate(event)
        self.dl0.reduce(event)
        self.dl1.calibrate(event)

        # Select telescope
        tels = list(event.r0.tels_with_data)
        telid = self.telescope
        if telid is None:
            telid = tels[0]
        if telid not in tels:
            self.log.error("[event] please specify one of the following "
                           "telescopes for this event: {}".format(tels))
            exit()

        extractor_name = self.extractor.name

        self.plotter.plot(self.file_reader, event, telid, self.channel,
                          extractor_name)

    def finish(self):
        pass
示例#10
0
class Evaluate(App):
    dataset_type = Enum(('dblp', 'pubmed', 'oc'), default_value='pubmed')
    candidate_selector_type = Enum(('bm25', 'ann', 'oracle'),
                                   default_value='bm25')
    metric = Enum(('precision', 'recall', 'f1'), default_value='recall')
    split = Enum(('train', 'test', 'valid'), default_value='valid')

    # ann options
    paper_embedder_dir = Unicode(default_value=None, allow_none=True)

    # Candidate selector options
    num_candidates = Int(default_value=None, allow_none=True)

    ranker_type = Enum(('none', 'neural'), default_value='none')
    n_eval = Int(default_value=None, allow_none=True)

    # ranker options
    citation_ranker_dir = Unicode(default_value=None, allow_none=True)

    _embedder = None
    _ann = None

    def embedder(self, featurizer, embedding_model) -> EmbeddingModel:
        if self._embedder is None:
            self._embedder = EmbeddingModel(featurizer, embedding_model)
        return self._embedder

    def ann(self, embedder, corpus) -> ANN:
        if corpus.corpus_type == 'oc' and os.path.exists(
                DatasetPaths.OC_ANN_FILE + ".pickle"):
            self._ann = ANN.load(DatasetPaths.OC_ANN_FILE)
            return self._ann
        if self._ann is None:
            self._ann = ANN.build(embedder, corpus, ann_trees=100)
            if self.dataset_type == 'oc':
                self._ann.save(DatasetPaths.OC_ANN_FILE)
        return self._ann

    def _make_ann_candidate_selector(self, corpus, featurizer, embedding_model,
                                     num_candidates):
        e = self.embedder(featurizer, embedding_model)
        return ANNCandidateSelector(corpus=corpus,
                                    ann=self.ann(e, corpus),
                                    paper_embedding_model=e,
                                    top_k=num_candidates,
                                    extend_candidate_citations=True)

    def main(self, args):
        dp = DatasetPaths()
        if self.dataset_type == 'oc':
            corpus = Corpus.load_pkl(dp.get_pkl_path(self.dataset_type))
        else:
            corpus = Corpus.load(dp.get_db_path(self.dataset_type))

        if self.ranker_type == 'none':
            citation_ranker = NoneRanker()
        elif self.ranker_type == 'neural':
            assert self.citation_ranker_dir is not None
            ranker_featurizer, ranker_models = model_from_directory(
                self.citation_ranker_dir, on_cpu=True)
            citation_ranker = Ranker(
                corpus=corpus,
                featurizer=ranker_featurizer,
                citation_ranker=ranker_models['citeomatic'],
                num_candidates_to_rank=100)
        else:
            assert False

        candidate_results_map = {}
        if self.num_candidates is None:
            if self.dataset_type == 'oc':
                num_candidates_list = [100]
            else:
                num_candidates_list = [1, 5, 10, 15, 25, 50, 75, 100]
        else:
            num_candidates_list = [self.num_candidates]

        for num_candidates in num_candidates_list:

            if self.candidate_selector_type == 'bm25':
                index_path = dp.get_bm25_index_path(self.dataset_type)
                candidate_selector = BM25CandidateSelector(
                    corpus, index_path, num_candidates, False)
            elif self.candidate_selector_type == 'ann':
                assert self.paper_embedder_dir is not None
                featurizer, models = model_from_directory(
                    self.paper_embedder_dir, on_cpu=True)
                candidate_selector = self._make_ann_candidate_selector(
                    corpus=corpus,
                    featurizer=featurizer,
                    embedding_model=models['embedding'],
                    num_candidates=num_candidates)
            elif self.candidate_selector_type == 'oracle':
                candidate_selector = OracleCandidateSelector(corpus)
            else:
                assert False

            results = eval_text_model(corpus,
                                      candidate_selector,
                                      citation_ranker,
                                      papers_source=self.split,
                                      n_eval=self.n_eval)
            candidate_results_map[num_candidates] = results

        best_k = -1
        best_metric = 0.0
        metric_key = self.metric + "_1"
        for k, v in candidate_results_map.items():
            if best_metric < v[metric_key][EVAL_DATASET_KEYS[
                    self.dataset_type]]:
                best_k = k
                best_metric = v[metric_key][EVAL_DATASET_KEYS[
                    self.dataset_type]]

        print(json.dumps(candidate_results_map, indent=4, sort_keys=True))
        print(best_k)
        print(best_metric)
示例#11
0
class ResourceUseDisplay(Configurable):
    """
    Holds server-side configuration for nbresuse
    """

    process_cpu_metrics = List(
        trait=PSUtilMetric(),
        default_value=[{
            "name": "cpu_percent",
            "kwargs": {
                "interval": 0.05
            }
        }],
    )

    system_cpu_metrics = List(trait=PSUtilMetric(),
                              default_value=[{
                                  "name": "cpu_count"
                              }])

    mem_limit = Union(
        trait_types=[Int(), Callable()],
        help="""
        Memory limit to display to the user, in bytes.
        Can also be a function which calculates the memory limit.

        Note that this does not actually limit the user's memory usage!

        Defaults to reading from the `MEM_LIMIT` environment variable. If
        set to 0, the max memory available is displayed.
        """,
    ).tag(config=True)

    @default("mem_limit")
    def _mem_limit_default(self):
        return int(os.environ.get("MEM_LIMIT", 0))

    track_cpu_percent = Bool(
        default_value=True,
        help="""
        Set to True in order to enable reporting of CPU usage statistics.
        """,
    ).tag(config=True)

    cpu_limit = Union(
        trait_types=[Float(), Callable()],
        default_value=0,
        help="""
        CPU usage limit to display to the user.

        Note that this does not actually limit the user's CPU usage!

        Defaults to reading from the `CPU_LIMIT` environment variable. If
        set to 0, the total CPU count available is displayed.
        """,
    ).tag(config=True)

    @default("cpu_limit")
    def _cpu_limit_default(self):
        return float(os.environ.get("CPU_LIMIT", 0))

    track_disk_usage = Bool(
        default_value=True,
        help="""
        Set to True in order to enable reporting of Disk usage statistics.
        """,
    ).tag(config=True)

    disk_limit = Union(
        trait_types=[Int(), Callable()],
        default_value=0,
        help="""
        Disk usage limit to display to the user.

        Note that this does not actually limit the user's Disk space!

        Defaults to reading from the `DISK_LIMIT` environment variable. If
        set to 0, the total partition space available is displayed.
        """,
    ).tag(config=True)

    @default("disk_limit")
    def _disk_limit_default(self):
        return int(os.environ.get("DISK_LIMIT", 0))

    disk_dir = Union(
        trait_types=[Unicode(), Callable()],
        default_value=os.getcwd(),
        help="""
        The directory that is on the partition to get the size of.

        Note that this does not actually limit the user's Disk space!

        Defaults to reading from the `DISK_DIR` environment variable. If
        not defined, it effectively defaults to /home/jovyan.
        """,
    ).tag(config=True)

    @default("disk_dir")
    def _disk_dir_default(self):
        return str(os.environ.get("DISK_DIR", os.getcwd()))
示例#12
0
class KeplerGl(widgets.DOMWidget):
    """An example widget."""
    _view_name = Unicode('KeplerGlView').tag(sync=True)
    _model_name = Unicode('KeplerGlModal').tag(sync=True)
    _view_module = Unicode('keplergl-jupyter').tag(sync=True)
    _model_module = Unicode('keplergl-jupyter').tag(sync=True)
    _view_module_version = Unicode(EXTENSION_SPEC_VERSION).tag(sync=True)
    _model_module_version = Unicode(EXTENSION_SPEC_VERSION).tag(sync=True)
    value = Unicode('Hello World!').tag(sync=True)

    data = Dict({}).tag(sync=True, **data_serialization)
    config = Dict({}).tag(sync=True)
    height = Int(400).tag(sync=True)

    def __init__(self, **kwargs):
        super(KeplerGl, self).__init__(**kwargs)
        print('User Guide: {}'.format(documentation))

    @validate('data')
    def _validate_data(self, proposal):
        '''Validate data input.

        Makes sure data is a dict, and each value should be either a df, a geojson dictionary / string or csv string
        layers list.
        '''

        if type(proposal.value) is not dict:
            raise DataException(
                '[data type error]: Expecting a dictionary mapping from id to value, but got {}'
                .format(type(proposal.value)))

        else:
            for key, value in proposal.value.items():
                if not isinstance(value, pd.DataFrame) and (
                        type(value) is not str) and (type(value) is not dict):
                    raise DataException(
                        '[data type error]: value of {} should be a DataFrame, a Geojson Dictionary or String, a csv String, but got {}'
                        .format(key, type(value)))

        return proposal.value

    def add_data(self, data, name="unnamed"):
        ''' Send data to Voyager

        Inputs:
        - data string, can be a csv string or json string
        - name string

        Example of use:
            keplergl.add_data(data_string, name="data_1")
        '''

        normalized = _normalize_data(data)
        copy = self.data.copy()
        copy.update({name: normalized})

        self.data = copy

    def save_to_html(self,
                     data=None,
                     config=None,
                     file_name='keplergl_map.html',
                     read_only=False):
        ''' Save current map to an interactive html

        Inputs:
        - data: a data dictionary {"name": data}, if not provided, will use current map data
        - config: map config dictionary, if not provided, will use current map config
        - file_name: the html file name, default is keplergl_map.html
        - read_only: if read_only is True, hide side panel to disable map customization

        Returns:
        - an html file will be saved to your notebook

        Example of use:
            # this will save map with provided data and config
            keplergl.save_to_html(data={"data_1": df}, config=config, file_name='first_map.html')

            # this will save current map
            keplergl.save_to_html(file_name='first_map.html')

        '''
        keplergl_html = resource_string(__name__,
                                        'static/keplergl.html').decode('utf-8')
        # find open of body
        k = keplergl_html.find("<body>")

        data_to_add = data_to_json(
            self.data, None) if data == None else data_to_json(data, None)
        config_to_add = self.config if config == None else config

        # for key in data_to_add:
        #     print(type(data_to_add[key]))

        keplergl_data = json.dumps({
            "config": config_to_add,
            "data": data_to_add,
            "options": {
                "readOnly": read_only
            }
        })

        cmd = """window.__keplerglDataConfig = {};""".format(keplergl_data)
        frame_txt = keplergl_html[:
                                  k] + "<body><script>" + cmd + "</script>" + keplergl_html[
                                      k + 6:]

        with open(file_name, 'wb') as f:
            f.write(frame_txt.encode('utf-8'))

        print("Map saved to {}!".format(file_name))
示例#13
0
class DataGrid(DOMWidget):
    """A Grid Widget with filter, sort and selection capabilities.

    Attributes
    ----------
    base_row_size : int (default: 20)
        Default row height
    base_column_size : int (default: 64)
        Default column width
    base_row_header_size : int (default: 64)
        Default row header width
    base_column_header_size : int (default: 20)
        Default column header height
    header_visibility : {'all', 'row', 'column', 'none'} (default: 'all')
        Header visibility mode
        'all': both row and column headers visible
        'row': only row headers visible
        'column': only column headers visible
        'none': neither row and column headers visible
    dataframe : pandas dataframe
        Data to display on Data Grid.
    renderers : dict
        Custom renderers to use for cell rendering. Keys of dictionary specify
        column name, and value specifies the renderer
    default_renderer : CellRenderer (default: TextRenderer)
        Default renderer to use for cell rendering
    header_renderer : CellRenderer (default: TextRenderer)
        Renderer to use for header cell rendering
    corner_renderer : CellRenderer (default: TextRenderer)
        Renderer to use for corner header cell rendering
    selection_mode : {'row', 'column', 'cell', 'none'} (default: 'none')
        Selection mode used when user clicks on grid or makes selections
        programmatically.
        'row': Selecting a cell will select all the cells on the same row
        'column': Selecting a cell will select all the cells on the same column
        'cell': Individual cell selection
        'none': Selection disabled
    selections : list of dict
        List of all selections. Selections are represented as rectangular
        regions. Rectangles are defined as dictionaries with keys:
        'r1': start row, 'c1': start column, 'r2': end row, 'c2': end column.
        Start of rectangle is top-left corner and end is bottom-right corner
    editable : boolean (default: false)
        Boolean indicating whether cell grid can be directly edited
    column_widths : Dict of strings to int (default: {})
        Dict to specify custom column sizes
        The keys (strings) indicate the names of the columns
        The values (integers) indicate the widths
    auto_fit_columns : Bool (default: True)
        Specify whether column width should automatically be
        determined by the grid
    auto_fit_params : Dict. Specify column auto fit parameters.
        Supported parameters:
        1) area: where to resize column widths - 'row-header',
                'body' or 'all' (default)
        2) padding: add padding to resized column widths (15 pixels by default)
        3) numCols: cap the number of columns to be resized (None)
    grid_style : Dict of {propertyName: string | VegaExpr | Dict}
        Dict to specify global grid styles.
        The keys (strings) indicate the styling property
        The values (css color properties or Vega Expression) indicate the values
        See below for all supported styling properties
    index_name : str (default: "key")
        String to specify the index column name. **Only set when the grid
        is constructed and is not an observable traitlet**

    Accessors (not observable traitlets)
    ---------
    selected_cells : list of dict
        List of selected cells. Each cell is represented as a dictionary
        with keys 'r': row and 'c': column
    selected_cell_values : list
        List of values for all selected cells.
    selected_cell_iterator : iterator
        An iterator to traverse selected cells one by one.

    Supported styling properties:
        void_color : color of the area where the grid is not painted
            on the canvas
        background_color : background color for all body cells
        row_background_color : row-wise background color (can take
            a string or Vega Expression)
        column_background_color : column-wise background color (can take a
            string of Vega Expression)
        grid_line_color : color of both vertical and horizontal grid lines
        vertical_grid_line_color : vertical grid line color
        horizontal_grid_line_color : horizontal grid line color
        header_background_color : background color for all non-body cells
            (index and columns)
        header_grid_line_color : grid line color for all non-body
            cells (index and columns)
        header_vertical_grid_line_color : vertical grid line color
            for all non-body cells
        header_horizontal_grid_line_color : horizontal grid line color
            for all non-body cells
        selection_fill_color : fill color of selected area
        selection_border_color : border color of selected area
        header_selection_fill_color : fill color of headers intersecting with
            selected area at column or row
        header_selection_border_color : border color of headers
            intersecting with selected area at column or row
        cursor_fill_color : fill color of cursor
        cursor_border_color : border color of cursor
        scroll_shadow : Dict of color parameters for scroll shadow (vertical and
            horizontal). Takes three paramaters:
            size : size of shadow in pixels
            color1 : gradient color 1
            color2 : gradient color 2
            color3 : gradient color 3
    """

    _model_name = Unicode("DataGridModel").tag(sync=True)
    _model_module = Unicode(module_name).tag(sync=True)
    _model_module_version = Unicode(module_version).tag(sync=True)
    _view_name = Unicode("DataGridView").tag(sync=True)
    _view_module = Unicode(module_name).tag(sync=True)
    _view_module_version = Unicode(module_version).tag(sync=True)

    base_row_size = Int(20).tag(sync=True)
    base_column_size = Int(64).tag(sync=True)
    base_row_header_size = Int(64).tag(sync=True)
    base_column_header_size = Int(20).tag(sync=True)

    header_visibility = Enum(default_value="all",
                             values=["all", "row", "column",
                                     "none"]).tag(sync=True)

    _transforms = List(Dict()).tag(sync=True, **widget_serialization)
    _visible_rows = List(Int()).tag(sync=True)
    _data = Dict().tag(sync=True, **_data_serialization)

    renderers = Dict(Instance(CellRenderer)).tag(sync=True,
                                                 **widget_serialization)
    default_renderer = Instance(CellRenderer).tag(sync=True,
                                                  **widget_serialization)
    header_renderer = Instance(CellRenderer,
                               allow_none=True).tag(sync=True,
                                                    **widget_serialization)
    corner_renderer = Instance(CellRenderer,
                               allow_none=True).tag(sync=True,
                                                    **widget_serialization)
    selection_mode = Enum(default_value="none",
                          values=["row", "column", "cell",
                                  "none"]).tag(sync=True)
    selections = List(Dict()).tag(sync=True, **widget_serialization)
    editable = Bool(False).tag(sync=True)
    column_widths = Dict({}).tag(sync=True)
    grid_style = Dict(allow_none=True).tag(sync=True, **widget_serialization)
    auto_fit_columns = Bool(False).tag(sync=True)
    auto_fit_params = Dict({
        "area": "all",
        "padding": 30,
        "numCols": None
    },
                           allow_none=False).tag(sync=True,
                                                 **widget_serialization)

    def __init__(self, dataframe, **kwargs):
        # Setting default index name if not explicitly
        # set by the user.
        if "index_name" in kwargs:
            self._index_name = kwargs["index_name"]
        else:
            self._index_name = None

        self.data = dataframe
        super().__init__(**kwargs)
        self._cell_click_handlers = CallbackDispatcher()
        self._cell_change_handlers = CallbackDispatcher()
        self.on_msg(self.__handle_custom_msg)

    def __handle_custom_msg(self, _, content, buffers):  # noqa: U101,U100
        if content["event_type"] == "cell-changed":
            row = content["row"]
            column = self._column_index_to_name(self._data,
                                                content["column_index"])
            value = content["value"]
            # update data on kernel
            self._data["data"][row][column] = value
            # notify python listeners
            self._cell_change_handlers({
                "row": row,
                "column": column,
                "column_index": content["column_index"],
                "value": value,
            })
        elif content["event_type"] == "cell-click":
            # notify python listeners
            self._cell_click_handlers({
                "region":
                content["region"],
                "column":
                content["column"],
                "column_index":
                content["column_index"],
                "row":
                content["row"],
                "primary_key_row":
                content["primary_key_row"],
                "cell_value":
                content["cell_value"],
            })

    @property
    def data(self):
        trimmed_primary_key = self._data["schema"]["primaryKey"][:-1]
        df = pd.DataFrame(self._data["data"])
        final_df = df.set_index(trimmed_primary_key)
        final_df = final_df[final_df.columns[:-1]]
        return final_df

    @staticmethod
    def generate_data_object(dataframe,
                             guid_key="ipydguuid",
                             index_name="key"):
        dataframe[guid_key] = pd.RangeIndex(0, dataframe.shape[0])

        # Renaming default index name from 'index' to 'key' on
        # single index DataFrames. This allows users to use
        # 'index' as a column name. If 'key' exists, we add _x
        # suffix to id, where { x | 0 <= x < inf }
        if not isinstance(dataframe.index, pd.MultiIndex):
            if index_name in dataframe.columns:
                index = 0
                new_index_name = f"{index_name}_{index}"
                while new_index_name in dataframe.columns:
                    index += 1
                    new_index_name = f"{index_name}_{index}"
                dataframe = dataframe.rename_axis(new_index_name)
            else:
                dataframe = dataframe.rename_axis(index_name)

        schema = pd.io.json.build_table_schema(dataframe)
        reset_index_dataframe = dataframe.reset_index()
        data = reset_index_dataframe.to_dict(orient="records")

        # Check for multiple primary keys
        key = reset_index_dataframe.columns[:dataframe.index.nlevels].tolist()

        num_index_levels = len(key) if isinstance(key, list) else 1

        # Check for nested columns in schema, if so, we need to update the
        # schema to represent the actual column name values
        if isinstance(schema["fields"][-1]["name"], tuple):
            num_column_levels = len(dataframe.columns.levels)
            primary_key = key.copy()

            for i in range(num_index_levels):
                new_name = [""] * num_column_levels
                new_name[0] = schema["fields"][i]["name"]
                schema["fields"][i]["name"] = tuple(new_name)
                primary_key[i] = tuple(new_name)

            schema["primaryKey"] = primary_key
            uuid_pk = list(key[-1])
            uuid_pk[0] = guid_key
            schema["primaryKey"].append(tuple(uuid_pk))

        else:
            schema["primaryKey"] = key
            schema["primaryKey"].append(guid_key)

        schema["primaryKeyUuid"] = guid_key

        return {
            "data": data,
            "schema": schema,
            "fields": [{
                field["name"]: None
            } for field in schema["fields"]],
        }

    @data.setter
    def data(self, dataframe):
        # Reference for the original frame column and index names
        # This is used to when returning the view data model
        self.__dataframe_reference_index_names = dataframe.index.names
        self.__dataframe_reference_columns = dataframe.columns
        dataframe = dataframe.copy()

        # Primary key used
        index_key = self.get_dataframe_index(dataframe)

        self._data = self.generate_data_object(dataframe, "ipydguuid",
                                               index_key)

    def get_dataframe_index(self, dataframe):
        """Returns a primary key to be used in ipydatagrid's
        view of the passed DataFrame"""

        # Passed index_name takes highest priority
        if self._index_name is not None:
            return self._index_name

        # Dataframe with names index used by default
        if dataframe.index.name is not None:
            return dataframe.index.name

        # If no index_name param, nor named-index DataFrame
        # have been passed, revert to default "key"
        return "key"

    def get_cell_value(self, column_name, primary_key_value):
        """Gets the value for a single or multiple cells by column name and index name.

        Tuples should be used to index into multi-index columns."""
        row_indices = self._get_row_index_of_primary_key(primary_key_value)
        return [self._data["data"][row][column_name] for row in row_indices]

    def set_cell_value(self, column_name, primary_key_value, new_value):
        """Sets the value for a single cell by column name and primary key.

        Note: This method returns a boolean to indicate if the operation
        was successful.
        """
        row_indices = self._get_row_index_of_primary_key(primary_key_value)
        # Bail early if key could not be found
        if not row_indices:
            return False

        # Iterate over all indices
        outcome = True
        for row_index in row_indices:
            has_column = column_name in self._data["data"][row_index]
            if has_column and row_index is not None:
                self._data["data"][row_index][column_name] = new_value
                self._notify_cell_change(row_index, column_name, new_value)
            else:
                outcome = False
        return outcome

    def get_cell_value_by_index(self, column_name, row_index):
        """Gets the value for a single cell by column name and row index."""
        return self._data["data"][row_index][column_name]

    def set_cell_value_by_index(self, column_name, row_index, new_value):
        """Sets the value for a single cell by column name and row index.

        Note: This method returns a boolean to indicate if the operation
        was successful.
        """
        has_column = column_name in self._data["data"][row_index]
        if has_column and 0 <= row_index < len(self._data["data"]):
            self._data["data"][row_index][column_name] = new_value
            self._notify_cell_change(row_index, column_name, new_value)
            return True
        return False

    def _notify_cell_change(self, row, column, value):
        column_index = self._column_name_to_index(column)
        # notify python listeners
        self._cell_change_handlers({
            "row": row,
            "column": column,
            "column_index": column_index,
            "value": value,
        })
        # notify front-end
        self.comm.send(
            data={
                "method": "custom",
                "content": {
                    "event_type": "cell-changed",
                    "row": row,
                    "column": column,
                    "column_index": column_index,
                    "value": value,
                },
            })

    def get_visible_data(self):
        """Returns a dataframe of the current View."""
        data = deepcopy(self._data)
        if self._visible_rows:
            data["data"] = [data["data"][i] for i in self._visible_rows]

        at = self._data["schema"]["primaryKey"]
        return_df = pd.DataFrame(data["data"]).set_index(at)
        return_df.index = return_df.index.droplevel(return_df.index.nlevels -
                                                    1)
        return_df.index.names = self.__dataframe_reference_index_names
        return_df.columns = self.__dataframe_reference_columns
        return return_df

    def transform(self, transforms):
        """Apply a list of transformation to this DataGrid."""
        # TODO: Validate this input, or let it fail on view side?
        self._transforms = transforms

    def revert(self):
        """Revert all transformations."""
        self._transforms = []

    @default("default_renderer")
    def _default_renderer(self):
        return TextRenderer()

    def clear_selection(self):
        """Clears all selections."""
        self.selections.clear()
        self.send_state("selections")

    def select(self,
               row1,
               column1,
               row2=None,
               column2=None,
               clear_mode="none"):
        """
        Select an individual cell or rectangular cell region.
        Parameters
        ----------
        row1 : int
            Row index for individual cell selection or
            start row index for rectangular region selection.
        column1 : int
            Column index for individual cell selection or
            start column index for rectangular region selection.
        row2 : int or None, optional (default: None)
            End row index for rectangular region selection.
        column2 : int or None, optional (default: None)
            End column index for rectangular region selection.
        clear_mode : string, optional, {'all', 'current', 'none'}
                    (default: 'none')
            Clear mode to use when there are pre-existing selections.
            'all' removes all pre-existing selections
            'current' removes last pre-existing selection
            'none' keeps pre-existing selections
        """
        if row2 is None or column2 is None:
            row2, column2 = row1, column1

        if clear_mode == "all":
            self.selections.clear()
        elif clear_mode == "current" and len(self.selections) > 0:
            self.selections.pop()

        self.selections.append({
            "r1": min(row1, row2),
            "c1": min(column1, column2),
            "r2": max(row1, row2),
            "c2": max(column1, column2),
        })
        self.send_state("selections")

    @property
    def selected_cells(self):
        """
        List of selected cells. Each cell is represented as a dictionary
        with keys 'r': row and 'c': column
        """
        return SelectionHelper(self._data, self.selections,
                               self.selection_mode).all()

    @property
    def selected_cell_values(self):
        """
        List of values for all selected cells.
        """
        # Copy of the front-end data model
        view_data = self.get_visible_data()

        # Get primary key from dataframe
        index_key = self.get_dataframe_index(view_data)

        # Serielize to JSON table schema
        view_data_object = self.generate_data_object(view_data, "ipydguuid",
                                                     index_key)

        return SelectionHelper(view_data_object, self.selections,
                               self.selection_mode).all_values()

    @property
    def selected_cell_iterator(self):
        """
        An iterator to traverse selected cells one by one.
        """
        return SelectionHelper(self._data, self.selections,
                               self.selection_mode)

    @validate("selections")
    def _validate_selections(self, proposal):
        selections = proposal["value"]

        for rectangle in selections:
            r1 = min(rectangle["r1"], rectangle["r2"])
            c1 = min(rectangle["c1"], rectangle["c2"])
            r2 = max(rectangle["r1"], rectangle["r2"])
            c2 = max(rectangle["c1"], rectangle["c2"])
            rectangle["r1"] = r1
            rectangle["c1"] = c1
            rectangle["r2"] = r2
            rectangle["c2"] = c2

        return selections

    @validate("editable")
    def _validate_editable(self, proposal):
        value = proposal["value"]
        if value and self.selection_mode == "none":
            self.selection_mode = "cell"
        return value

    @validate("_transforms")
    def _validate_transforms(self, proposal):
        transforms = proposal["value"]
        field_len = len(self._data["schema"]["fields"])
        for transform in transforms:
            if transform["columnIndex"] > field_len:
                raise ValueError("Column index is out of bounds.")
        return transforms

    @validate("_data")
    def _validate_data(self, proposal):
        table_schema = proposal["value"]
        column_list = [f["name"] for f in table_schema["schema"]["fields"]]
        if len(column_list) != len(set(column_list)):
            msg = "The dataframe must not contain duplicate column names."
            raise ValueError(msg)
        return table_schema

    def on_cell_change(self, callback, remove=False):
        """Register a callback to execute when a cell value changed.

        The callback will be called with one argument, the dictionary
        containing cell information with keys
        "row", "column", "column_index", "value".

        Parameters
        ----------
        remove: bool (optional)
            Set to true to remove the callback from the list of callbacks.
        """
        self._cell_change_handlers.register_callback(callback, remove=remove)

    def on_cell_click(self, callback, remove=False):
        """Register a callback to execute when a cell is clicked.

        The callback will be called with one argument, the dictionary
        containing cell information with following keys:
          "region", "column", "column_index", "row", "primary_key_row",
          "cell_value"

        Parameters
        ----------
        remove: bool (optional)
            Set to true to remove the callback from the list of callbacks.
        """
        self._cell_click_handlers.register_callback(callback, remove=remove)

    @staticmethod
    def _column_index_to_name(data, column_index):
        if "schema" not in data or "fields" not in data["schema"]:
            return None
        col_headers = DataGrid._get_col_headers(data)
        return (None if len(col_headers) <= column_index else
                col_headers[column_index])

    @staticmethod
    def _get_col_headers(data):
        primary_keys = ([] if "primaryKey" not in data["schema"] else
                        data["schema"]["primaryKey"])
        col_headers = [
            field["name"] for field in data["schema"]["fields"]
            if field["name"] not in primary_keys
        ]
        return col_headers

    def _column_name_to_index(self, column_name):
        if "schema" not in self._data or "fields" not in self._data["schema"]:
            return None
        col_headers = self._get_col_headers(self._data)
        try:
            return col_headers.index(column_name)
        except ValueError:
            pass

    def _get_row_index_of_primary_key(self, value):
        value = value if isinstance(value, list) else [value]
        schema = self._data["schema"]
        key = schema["primaryKey"][:-1]  # Omitting ipydguuid
        if len(value) != len(key):
            raise ValueError(
                "The provided primary key value must be the same length "
                "as the primary key.")

        row_indices = [
            at for at, row in enumerate(self._data["data"])
            if all(row[key[j]] == value[j] for j in range(len(key)))
        ]
        return row_indices

    @staticmethod
    def _get_cell_value_by_numerical_index(data, column_index, row_index):
        """Gets the value for a single cell by column index and row index."""
        column = DataGrid._column_index_to_name(data, column_index)
        if column is None:
            return None
        return data["data"][row_index][column]
示例#14
0
class LDAPAuthenticator(Authenticator):
    """
    LDAP Authenticator for Jupyterhub
    """

    server_hosts = Union([List(), Unicode()],
                         config=True,
                         help="""
        List of Names, IPs, or the complete URLs in the scheme://hostname:hostport
        format of the server (required).
        """)

    server_port = Int(allow_none=True,
                      default_value=None,
                      config=True,
                      help="""
        The port where the LDAP server is listening. Typically 389, for a
        cleartext connection, and 636 for a secured connection (defaults to None).
        """)

    server_use_ssl = Bool(default_value=False,
                          config=True,
                          help="""
        Boolean specifying if the connection is on a secure port (defaults to False).
        """)

    server_connect_timeout = Int(allow_none=True,
                                 default_value=None,
                                 config=True,
                                 help="""
        Timeout in seconds permitted when establishing an ldap connection before
        raising an exception (defaults to None).
        """)

    server_receive_timeout = Int(allow_none=True,
                                 default_value=None,
                                 config=True,
                                 help="""
        Timeout in seconds permitted for responses from established ldap
        connections before raising an exception (defaults to None).
        """)

    server_pool_strategy = Unicode(default_value='FIRST',
                                   config=True,
                                   help="""
        Available Pool HA strategies (defaults to 'FIRST').

        FIRST: Gets the first server in the pool, if 'server_pool_active' is
            set to True gets the first available server.
        ROUND_ROBIN: Each time the connection is open the subsequent server in
            the pool is used. If 'server_pool_active' is set to True unavailable
            servers will be discarded.
        RANDOM: each time the connection is open a random server is chosen in the
            pool. If 'server_pool_active' is set to True unavailable servers
            will be discarded.
        """)

    server_pool_active = Union([Bool(), Int()],
                               default_value=True,
                               config=True,
                               help="""
        If True the ServerPool strategy will check for server availability. Set
        to Integer for maximum number of cycles to try before giving up
        (defaults to True).
        """)

    server_pool_exhaust = Union([Bool(), Int()],
                                default_value=False,
                                config=True,
                                help="""
        If True, any inactive servers will be removed from the pool. If set to
        an Integer, this will be the number of seconds an unreachable server is
        considered offline. When this timeout expires the server is reinserted
        in the pool and checked again for availability (defaults to False).
        """)

    bind_user_dn = Unicode(allow_none=True,
                           default_value=None,
                           config=True,
                           help="""
        The account of the user to log in for simple bind (defaults to None).
        """)

    bind_user_password = Unicode(allow_none=True,
                                 default_value=None,
                                 config=True,
                                 help="""
        The password of the user for simple bind (defaults to None)
        """)

    user_search_base = Unicode(config=True,
                               help="""
        The location in the Directory Information Tree where the user search
        will start.
        """)

    user_search_filter = Unicode(config=True,
                                 help="""
        LDAP search filter to validate that the authenticating user exists
        within the organization. Search filters containing '{username}' will
        have that value substituted with the username of the authenticating user.
        """)

    filter_by_group = Bool(default_value=True,
                           config=True,
                           help="""
        Boolean specifying if the group membership filtering is enabled or not.
        """)

    user_membership_attribute = Unicode(default_value='memberOf',
                                        config=True,
                                        help="""
        LDAP Attribute used to associate user group membership
        (defaults to 'memberOf').
        """)

    group_search_base = Unicode(config=True,
                                help="""
        The location in the Directory Information Tree where the group search
        will start. Search string containing '{group}' will be substituted
        with entries taken from allow_nested_groups.
        """)

    group_search_filter = Unicode(config=True,
                                  help="""
        LDAP search filter to return members of groups defined in the
        allowed_groups parameter. Search filters containing '{group}' will
        have that value substituted with the group dns provided in the
        allowed_groups parameter.
        """)

    allowed_groups = Union([Unicode(), List()],
                           config=True,
                           help="""
        List of LDAP group DNs that users must be a member of in order to be granted
        login.
        """)

    allow_nested_groups = Bool(default_value=False,
                               config=True,
                               help="""
        Boolean allowing for recursive search of members within nested groups of
        allowed_groups (defaults to False).
        """)

    username_pattern = Unicode(config=True,
                               help="""
        Regular expression pattern that all valid usernames must match. If a
        username does not match the pattern specified here, authentication will
        not be attempted. If not set, allow any username (defaults to None).
        """)

    username_regex = Any(help="""
        Compiled regex kept in sync with `username_pattern`
        """)

    @observe('username_pattern')
    def _username_pattern_changed(self, change):
        if not change['new']:
            self.username_regex = None
        self.username_regex = re.compile(change['new'])

    create_user_home_dir = Bool(default_value=False,
                                config=True,
                                help="""
        If set to True, will attempt to create a user's home directory
        locally if that directory does not exist already.
        """)

    create_user_home_dir_cmd = Command(config=True,
                                       help="""
        Command to create a users home directory.
        """)

    @default('create_user_home_dir_cmd')
    def _default_create_user_home_dir_cmd(self):
        if sys.platform == 'linux':
            home_dir_cmd = ['mkhomedir_helper']
        else:
            self.log.debug(
                "Not sure how to create a home directory on '%s' system",
                sys.platform)
            home_dir_cmd = ['']
        return home_dir_cmd

    @gen.coroutine
    def add_user(self, user):
        username = user.name
        user_exists = yield gen.maybe_future(
            self.user_home_dir_exists(username))
        if not user_exists:
            if self.create_user_home_dir:
                yield gen.maybe_future(self.add_user_home_dir(username))
            else:
                raise KeyError("Domain user '%s' does not exists locally." %
                               username)
        yield gen.maybe_future(super().add_user(user))

    def user_home_dir_exists(self, username):
        """
        Verify user home directory exists
        """
        user = pwd.getpwnam(username)
        home_dir = user[5]
        return bool(os.path.isdir(home_dir))

    def add_user_home_dir(self, username):
        """
        Creates user home directory
        """
        cmd = [
            arg.replace('USERNAME', username)
            for arg in self.create_user_home_dir_cmd
        ] + [username]
        self.log.info("Creating '%s' user home directory using command '%s'",
                      username, ' '.join(map(pipes.quote, cmd)))
        create_dir = Popen(cmd, stdout=PIPE, stderr=STDOUT)
        create_dir.wait()
        if create_dir.returncode:
            err = create_dir.stdout.read().decode('utf8', 'replace')
            raise RuntimeError("Failed to create system user %s: %s" %
                               (username, err))

    def normalize_username(self, username):
        """
        Normalize username for ldap query

        modifications:
         - format to lowercase
         - escape filter characters (ldap3)
        """
        username = username.lower()
        username = escape_filter_chars(username)
        return username

    def validate_username(self, username):
        """
        Validate a normalized username
        Return True if username is valid, False otherwise.
        """
        if '/' in username:
            # / is not allowed in usernames
            return False
        if not username:
            # empty usernames are not allowed
            return False
        if not self.username_regex:
            return True
        return bool(self.username_regex.match(username))

    def validate_host(self, host):
        """
        Validate hostname
        Return True if host is valid, False otherwise.
        """
        host_ip_regex = re.compile(
            r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$'
        )
        host_name_regex = re.compile(
            r'^((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}$'
        )
        host_url_regex = re.compile(
            r'^(ldaps?://)(((?!-)[a-z0-9\-]{1,63}(?<!-)\.){1,}((?!-)[a-z0-9\-]{1,63}(?<!-)){1}):([0-9]{3})$'
        )
        if bool(host_ip_regex.match(host)):
            # using ipv4 address
            valid = True
        elif bool(host_name_regex.match(host)):
            # using a hostname address
            valid = True
        elif bool(host_url_regex.match(host)):
            # using host url address
            valid = True
        else:
            # unsupported host format
            valid = False
        return valid

    def create_ldap_server_pool_obj(self, ldap_servers=None):
        """
        Create ldap3 ServerPool Object
        """
        server_pool = ldap3.ServerPool(
            ldap_servers,
            pool_strategy=self.server_pool_strategy.upper(),
            active=self.server_pool_active,
            exhaust=self.server_pool_exhaust)
        return server_pool

    def create_ldap_server_obj(self, host):
        """
        Create ldap3 Server Object
        """
        server = ldap3.Server(host,
                              port=self.server_port,
                              use_ssl=self.server_use_ssl,
                              connect_timeout=self.server_connect_timeout)
        return server

    def ldap_connection(self, server_pool, username, password):
        """
        Create ldaps Connection Object
        """
        try:
            conn = ldap3.Connection(
                server_pool,
                user=username,
                password=password,
                auto_bind=ldap3.AUTO_BIND_TLS_BEFORE_BIND,
                read_only=True,
                receive_timeout=self.server_receive_timeout)
        except ldap3.core.exceptions.LDAPBindError as exc:
            msg = '\n{exc_type}: {exc_msg}'.format(
                exc_type=exc.__class__.__name__,
                exc_msg=exc.args[0] if exc.args else '')
            self.log.error("Failed to connect to ldap: %s", msg)
            return None
        return conn

    def get_nested_groups(self, conn, group):
        """
        Recursively search group for nested memberships
        """
        nested_groups = list()
        conn.search(search_base=self.group_search_base,
                    search_filter=self.group_search_filter.format(group=group),
                    search_scope=ldap3.SUBTREE)
        if conn.response:
            for nested_group in conn.response:
                nested_groups.extend([nested_group['dn']])
                groups = self.get_nested_groups(conn, nested_group['dn'])
                nested_groups.extend(groups)
        nested_groups = list(set(nested_groups))
        return nested_groups

    @gen.coroutine
    def authenticate(self, handler, data):

        # define vars
        username = data['username']
        password = data['password']
        server_pool = self.create_ldap_server_pool_obj()
        conn_servers = list()

        # validate credentials
        username = self.normalize_username(username)
        if not self.validate_username(username):
            self.log.error('Unsupported username supplied')
            return None
        if password is None or password.strip() == '':
            self.log.error('Empty password supplied')
            return None

        # cast server_hosts to list
        if isinstance(self.server_hosts, str):
            self.server_hosts = self.server_hosts.split()

        # validate hosts and populate server_pool object
        for host in self.server_hosts:
            host = host.strip().lower()
            if not self.validate_host(host):
                self.log.warning(
                    "Host '%s' not supplied in approved format. Removing host from Server Pool",
                    host)
                break
            server = self.create_ldap_server_obj(host)
            server_pool.add(server)
            conn_servers.extend([host])

        # verify ldap connection object parameters are defined
        if len(server_pool.servers) < 1:
            self.log.error(
                "No hosts provided. ldap connection requires at least 1 host to connect to."
            )
            return None
        if not self.bind_user_dn or self.bind_user_dn.strip() == '':
            self.log.error(
                "'bind_user_dn' config value undefined. requried for ldap connection"
            )
            return None
        if not self.bind_user_password or self.bind_user_password.strip(
        ) == '':
            self.log.error(
                "'bind_user_password' config value undefined. requried for ldap connection"
            )
            return None

        # verify ldap search object parameters are defined
        if not self.user_search_base or self.user_search_base.strip() == '':
            self.log.error(
                "'user_search_base' config value undefined. requried for ldap search"
            )
            return None
        if not self.user_search_filter or self.user_search_filter.strip(
        ) == '':
            self.log.error(
                "'user_search_filter' config value undefined. requried for ldap search"
            )
            return None

        # open ldap connection and authenticate
        self.log.debug("Attempting ldap connection to %s with user '%s'",
                       conn_servers, self.bind_user_dn)
        conn = self.ldap_connection(server_pool, self.bind_user_dn,
                                    self.bind_user_password)

        # proceed if connection has been established
        if not conn or not conn.bind():
            self.log.error(
                "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.",
                conn_servers, self.bind_user_dn)
            return None
        else:
            self.log.debug(
                "Successfully established connection to %s with user '%s'",
                conn_servers, self.bind_user_dn)

            # compile list of permitted groups
            permitted_groups = copy.deepcopy(self.allowed_groups)
            if self.allow_nested_groups:
                for group in self.allowed_groups:
                    nested_groups = self.get_nested_groups(conn, group)
                permitted_groups.extend(nested_groups)

            # format user search filter
            auth_user_search_filter = self.user_search_filter.format(
                username=username)

            # search for authenticating user in ldap
            self.log.debug("Attempting LDAP search using search_filter '%s'.",
                           auth_user_search_filter)
            conn.search(search_base=self.user_search_base,
                        search_filter=auth_user_search_filter,
                        search_scope=ldap3.SUBTREE,
                        attributes=self.user_membership_attribute,
                        paged_size=2)

            # handle abnormal search results
            if not conn.response or 'attributes' not in conn.response[0].keys(
            ):
                self.log.error("LDAP search '%s' found %i result(s).",
                               auth_user_search_filter, len(conn.response))
                return None
            elif len(conn.response) > 1:
                self.log.error(
                    "LDAP search '%s' found %i result(s). Please narrow search to 1 result.",
                    auth_user_search_filter, len(conn.response))
                return None
            else:
                self.log.debug("LDAP search '%s' found %i result(s).",
                               auth_user_search_filter, len(conn.response))

                # copy response to var
                search_response = copy.deepcopy(conn.response[0])

                # get authenticating user's ldap attributes
                if not search_response['dn'] or search_response[
                        'dn'].strip == '':
                    self.log.error(
                        "Search results for user '%s' returned 'dn' attribute with undefined or null value.",
                        username)
                    conn.unbind()
                    return None
                else:
                    self.log.debug(
                        "Search results for user '%s' returned 'dn' attribute as '%s'",
                        username, search_response['dn'])
                    auth_user_dn = search_response['dn']
                if not search_response['attributes'][
                        self.user_membership_attribute]:
                    self.log.error(
                        "Search results for user '%s' returned '%s' attribute with undefned or null value.",
                        username, self.user_membership_attribute)
                    conn.unbind()
                    return None
                else:
                    self.log.debug(
                        "Search results for user '%s' returned '%s' attribute as %s",
                        username, self.user_membership_attribute,
                        search_response['attributes'][
                            self.user_membership_attribute])
                    auth_user_memberships = search_response['attributes'][
                        self.user_membership_attribute]

                # is authenticating user a member of permitted_groups
                allowed_memberships = list(
                    set(auth_user_memberships).intersection(permitted_groups))
                if bool(allowed_memberships) or not self.filter_by_group:
                    self.log.debug(
                        "User '%s' found in the following allowed ldap groups %s. Proceeding with authentication.",
                        username, allowed_memberships)

                    # rebind ldap connection with authenticating user, gather results, and close connection
                    conn.rebind(user=auth_user_dn, password=password)
                    auth_bound = copy.deepcopy(conn.bind())
                    conn.unbind()
                    if not auth_bound:
                        self.log.error(
                            "Could not establish ldap connection to %s using '%s' and supplied bind_user_password.",
                            conn_servers, self.bind_user_dn)
                        auth_response = None
                    else:
                        self.log.info(
                            "User '%s' sucessfully authenticated against ldap server %r.",
                            username, conn_servers)
                        auth_response = username
                else:
                    self.log.error(
                        "User '%s' is not a member of any permitted groups %s",
                        username, permitted_groups)
                    auth_response = None

                permitted_groups = None
                return auth_response
示例#15
0
class _Selection(DescriptionWidget, ValueWidget, CoreWidget):
    """Base class for Selection widgets

    ``options`` can be specified as a list of values, list of (label, value)
    tuples, or a dict of {label: value}. The labels are the strings that will be
    displayed in the UI, representing the actual Python choices, and should be
    unique. If labels are not specified, they are generated from the values.

    When programmatically setting the value, a reverse lookup is performed
    among the options to check that the value is valid. The reverse lookup uses
    the equality operator by default, but another predicate may be provided via
    the ``equals`` keyword argument. For example, when dealing with numpy arrays,
    one may set equals=np.array_equal.
    """

    value = Any(None, help="Selected value", allow_none=True)
    label = Unicode(None, help="Selected label", allow_none=True)
    index = Int(None, help="Selected index", allow_none=True).tag(sync=True)

    options = Any(
        (),
        help=
        """Iterable of values, (label, value) pairs, or a mapping of {label: value} pairs that the user can select.

    Any assigned value is converted to a tuple of ('label', value) pairs.

    The labels are the strings that will be displayed in the UI, representing the
    actual Python choices, and should be unique.
    """)
    # This being read-only means that it cannot be changed from the frontend!
    _options_labels = Tuple(read_only=True,
                            help="The labels for the options.").tag(sync=True)

    disabled = Bool(help="Enable or disable user changes").tag(sync=True)

    def __init__(self, *args, **kwargs):
        self.equals = kwargs.pop('equals', lambda x, y: x == y)

        # We have to make the basic options bookkeeping consistent
        # so we don't have errors the first time validators run
        self._initializing_traits_ = True
        options = _make_options(kwargs.get('options', ()))
        self.set_trait('_options_labels', tuple(i[0] for i in options))
        self._options_values = tuple(i[1] for i in options)

        # Select the first item by default, if we can
        if 'index' not in kwargs and 'value' not in kwargs and 'label' not in kwargs:
            kwargs['index'] = 0 if len(options) > 0 else None
            kwargs['label'], kwargs['value'] = options[0] if len(
                options) > 0 else (None, None)

        super(_Selection, self).__init__(*args, **kwargs)
        self._initializing_traits_ = False

    @validate('options')
    def _validate_options(self, proposal):
        return _make_options(proposal.value)

    @observe('options')
    def _propagate_options(self, change):
        "Unselect any option if we aren't initializing"
        self.set_trait('_options_labels', tuple(i[0] for i in change.new))
        self._options_values = tuple(i[1] for i in change.new)
        if self._initializing_traits_ is not True:
            self.index = 0 if len(change.new) > 0 else None

    @validate('index')
    def _validate_index(self, proposal):
        if proposal.value is None or 0 <= proposal.value < len(
                self._options_labels):
            return proposal.value
        else:
            raise TraitError('Invalid selection: index out of bounds')

    @observe('index')
    def _propagate_index(self, change):
        "Propagate changes in index to the value and label properties"
        label = self._options_labels[
            change.new] if change.new is not None else None
        value = self._options_values[
            change.new] if change.new is not None else None
        if self.label is not label:
            self.label = label
        if self.value is not value:
            self.value = value

    @validate('value')
    def _validate_value(self, proposal):
        value = proposal.value
        try:
            return findvalue(self._options_values, value,
                             self.equals) if value is not None else None
        except ValueError:
            raise TraitError('Invalid selection: value not found')

    @observe('value')
    def _propagate_value(self, change):
        index = self._options_values.index(
            change.new) if change.new is not None else None
        if self.index != index:
            self.index = index

    @validate('label')
    def _validate_label(self, proposal):
        if (proposal.value is not None) and (proposal.value
                                             not in self._options_labels):
            raise TraitError('Invalid selection: label not found')
        return proposal.value

    @observe('label')
    def _propagate_label(self, change):
        index = self._options_labels.index(
            change.new) if change.new is not None else None
        if self.index != index:
            self.index = index

    def _repr_keys(self):
        keys = super(_Selection, self)._repr_keys()
        # Include options manually, as it isn't marked as synced:
        for key in sorted(chain(keys, ('options', ))):
            if key == 'index' and self.index == 0:
                # Index 0 is default when there are options
                continue
            yield key
示例#16
0
class NamespacedResourceReflector(LoggingConfigurable):
    """
    Base class for keeping a local up-to-date copy of a set of kubernetes resources.

    Must be subclassed once per kind of resource that needs watching.
    """
    labels = Dict({},
                  config=True,
                  help="""
        Labels to reflect onto local cache
        """)

    fields = Dict({},
                  config=True,
                  help="""
        Fields to restrict the reflected objects
        """)

    namespace = Unicode(None,
                        allow_none=True,
                        help="""
        Namespace to watch for resources in
        """)

    resources = Dict({},
                     help="""
        Dictionary of resource names to the appropriate resource objects.

        This can be accessed across threads safely.
        """)

    kind = Unicode('resource',
                   help="""
        Human readable name for kind of object we're watching for.

        Used for diagnostic messages.
        """)

    list_method_name = Unicode("",
                               help="""
        Name of function (on apigroup respresented by `api_group_name`) that is to be called to list resources.

        This will be passed a namespace & a label selector. You most likely want something
        of the form list_namespaced_<resource> - for example, `list_namespaced_pod` will
        give you a PodReflector.

        This must be set by a subclass.
        """)

    api_group_name = Unicode('CoreV1Api',
                             help="""
        Name of class that represents the apigroup on which `list_method_name` is to be found.

        Defaults to CoreV1Api, which has everything in the 'core' API group. If you want to watch Ingresses,
        for example, you would have to use ExtensionsV1beta1Api
        """)

    request_timeout = Int(0,
                          config=True,
                          help="""
        Network timeout for kubernetes watch.

        Trigger watch reconnect when no traffic has been received for this time.
        This can be used to restart the watch periodically.
        """)

    timeout_seconds = Int(10,
                          config=True,
                          help="""
        Timeout for kubernetes watch.

        Trigger watch reconnect when no watch event has been received.
        This will cause a full reload of the currently existing resources
        from the API server.
        """)

    on_failure = Any(
        help="""Function to be called when the reflector gives up.""")

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Load kubernetes config here, since this is a Singleton and
        # so this __init__ will be run way before anything else gets run.
        try:
            config.load_incluster_config()
        except config.ConfigException:
            config.load_kube_config()
        self.api = shared_client(self.api_group_name)

        # FIXME: Protect against malicious labels?
        self.label_selector = ','.join(
            ['{}={}'.format(k, v) for k, v in self.labels.items()])
        self.field_selector = ','.join(
            ['{}={}'.format(k, v) for k, v in self.fields.items()])

        self.first_load_future = Future()
        self._stop_event = threading.Event()

        self.start()

    def __del__(self):
        self.stop()

    def _list_and_update(self):
        """
        Update current list of resources by doing a full fetch.

        Overwrites all current resource info.
        """
        initial_resources = getattr(self.api, self.list_method_name)(
            self.namespace,
            label_selector=self.label_selector,
            field_selector=self.field_selector,
            _request_timeout=self.request_timeout,
        )
        # This is an atomic operation on the dictionary!
        self.resources = {p.metadata.name: p for p in initial_resources.items}
        # return the resource version so we can hook up a watch
        return initial_resources.metadata.resource_version

    def _watch_and_update(self):
        """
        Keeps the current list of resources up-to-date

        This method is to be run not on the main thread!

        We first fetch the list of current resources, and store that. Then we
        register to be notified of changes to those resources, and keep our
        local store up-to-date based on these notifications.

        We also perform exponential backoff, giving up after we hit 32s
        wait time. This should protect against network connections dropping
        and intermittent unavailability of the api-server. Every time we
        recover from an exception we also do a full fetch, to pick up
        changes that might've been missed in the time we were not doing
        a watch.

        Note that we're playing a bit with fire here, by updating a dictionary
        in this thread while it is probably being read in another thread
        without using locks! However, dictionary access itself is atomic,
        and as long as we don't try to mutate them (do a 'fetch / modify /
        update' cycle on them), we should be ok!
        """
        cur_delay = 0.1
        self.log.info(
            "CHANGED CODE watching for %s with label selector %s / field selector %s in namespace %s",
            self.kind, self.label_selector, self.field_selector,
            self.namespace)
        while True:
            w = watch.Watch()
            try:
                resource_version = self._list_and_update()
                if not self.first_load_future.done():
                    # signal that we've loaded our initial data
                    self.first_load_future.set_result(None)
                watch_args = {
                    'namespace': self.namespace,
                    'label_selector': self.label_selector,
                    'field_selector': self.field_selector,
                    'resource_version': resource_version,
                }
                if self.request_timeout:
                    # set network receive timeout
                    watch_args['_request_timeout'] = self.request_timeout
                if self.timeout_seconds:
                    # set watch timeout
                    watch_args['timeout_seconds'] = self.timeout_seconds
                # in case of timeout_seconds, the w.stream just exits (no exception thrown)
                # -> we stop the watcher and start a new one
                for ev in w.stream(getattr(self.api, self.list_method_name),
                                   **watch_args):
                    cur_delay = 0.1
                    resource = ev['object']
                    if ev['type'] == 'DELETED':
                        # This is an atomic delete operation on the dictionary!
                        self.resources.pop(resource.metadata.name, None)
                    else:
                        # This is an atomic operation on the dictionary!
                        self.resources[resource.metadata.name] = resource
                    if self._stop_event.is_set():
                        break
            except ReadTimeoutError:
                # network read time out, just continue and restart the watch
                continue
            except Exception:
                cur_delay = cur_delay * 2
                if cur_delay > 30:
                    self.log.exception(
                        "Watching resources never recovered, giving up")
                    if self.on_failure:
                        self.on_failure()
                    return
                self.log.exception(
                    "Error when watching resources, retrying in %ss",
                    cur_delay)
                time.sleep(cur_delay)
                continue
            finally:
                w.stop()
                if self._stop_event.is_set():
                    self.log.info("%s watcher stopped", self.kind)
                    break

    def start(self):
        """
        Start the reflection process!

        We'll do a blocking read of all resources first, so that we don't
        race with any operations that are checking the state of the pod
        store - such as polls. This should be called only once at the
        start of program initialization (when the singleton is being created),
        and not afterwards!
        """
        if hasattr(self, 'watch_thread'):
            raise ValueError(
                'Thread watching for resources is already running')

        self._list_and_update()
        self.watch_thread = threading.Thread(target=self._watch_and_update)
        # If the watch_thread is only thread left alive, exit app
        self.watch_thread.daemon = True
        self.watch_thread.start()

    def stop(self):
        self._stop_event.set()

    def stopped(self):
        return self._stop_event.is_set()
示例#17
0
class DataGenerator(Configurable):
    title_in = Unicode(
        '/bos/data1/sogou16/data/training/1m_title.pad_t50',
        help='titles term id csv, must be padded').tag(config=True)
    max_q_len = Int(10, help='max q len').tag(config=True)
    max_d_len = Int(50, help='max document len').tag(config=True)
    q_name = Unicode('q')
    d_name = Unicode('d')
    aux_d_name = Unicode('d_aux')
    idf_name = Unicode('idf')
    neg_sample = Int(1, help='negative sample').tag(config=True)
    load_litle_pool = Bool(False,
                           help='load little pool at beginning').tag(conf=True)
    min_score_diff = Float(
        0, help='min score difference for click data generated pairs').tag(
            config=True)
    vocabulary_size = Int(2000000).tag(config=True)

    def __init__(self, **kwargs):
        super(DataGenerator, self).__init__(**kwargs)
        #TODO check this
        self.m_title_pool = np.array(None)
        if self.load_litle_pool and self.neg_sample:
            self._load_title_pool()
        print("min_score_diff: ", self.min_score_diff)
        print("generator's vocabulary size: ", self.vocabulary_size)

    def _load_title_pool(self):
        if self.title_in:
            logging.info('start loading title pool [%s]', self.title_in)
            self.m_title_pool = genfromtxt(
                self.title_in,
                delimiter=',',
                dtype=int,
            )
            logging.info('loaded [%d] title pool', self.m_title_pool.shape[0])

    def pointwise_generate(self,
                           pair_stream,
                           batch_size,
                           with_label=True,
                           with_idf=False):
        """
        to use: initial the generator = ClickDataGenerator(config=conf)
            and then for X,Y in generator.pointwise_generator(pair_stream, batch_size)
        :param pair_stream: the (probably infinite) stream of query \t clicked url
            e.g. itertools.cycle(open(file))
        :param batch_size: int, a batch size
        :param with_label: if True, then there is a third column in pair_stream: \t label (int)
        :param with_idf: if True, the third col in pair_stream is the query term idf
        :return: yield a batched X and Y
        """
        l_q = []
        l_d = []
        l_idf = []
        l_y = []
        for line in pair_stream:
            cols = line.split('\t')
            q = np.array(
                [int(t) for t in cols[0].split(',')]
            )  #np.loadtxt(StringIO(unicode(cols[0])), delimiter=',',  dtype=int,)
            doc = np.array(
                [int(t) for t in cols[1].split(',')]
            )  #np.loadtxt(StringIO(unicode(cols[1])), delimiter=',',  dtype=int,)

            if with_idf:
                idf = np.ones(len(q))
            else:
                idf = np.array([int(t) for t in cols[2].split(',')])
            y = 0
            if with_label:
                y = int(cols[-1])
            v_q = np.ones(self.max_q_len) * -1
            v_d = np.ones(self.max_d_len) * -1
            v_q[:min(q.shape[0], self.max_q_len
                     )] = q[0:min(q.shape[0], self.max_q_len)]
            v_d[:min(doc.shape[0], self.max_d_len
                     )] = doc[0:min(doc.shape[0], self.max_d_len)]

            l_q.append(v_q)
            l_d.append(v_d)
            l_y.append(y)

            if with_idf:
                v_idf = np.zeros(self.max_q_len)
                v_idf[:idf.shape[0]] = idf[0:min(q.shape[0], self.max_q_len)]
                l_idf.append(v_idf)

            if len(l_q) >= batch_size:
                Q = np.array(
                    l_q,
                    dtype=int,
                )
                D = np.array(
                    l_d,
                    dtype=int,
                )
                IDF = None
                if with_idf:
                    IDF = np.array(l_idf, dtype=float)
                Y = np.array(
                    l_y,
                    dtype=int,
                )
                X = {self.q_name: Q, self.d_name: D, self.idf_name: IDF}
                yield X, Y
                l_q, l_d, l_y, l_idf = [], [], [], []
        if l_q:
            Q = np.array(
                l_q,
                dtype=int,
            )
            D = np.array(
                l_d,
                dtype=int,
            )
            IDF = None
            if with_idf:
                IDF = np.array(l_idf, dtype=float)
            Y = np.array(
                l_y,
                dtype=int,
            )
            X = {self.q_name: Q, self.d_name: D, self.idf_name: IDF}
            yield X, Y
        logging.info('point wise generator to an end')

    def pairwise_generate(self, pair_stream, batch_size, with_idf=False):
        """
        to use: initial the generator = ClickDataGenerator(config=conf)
            and then for X,Y in generator.pairwise_generate(pair_stream, batch_size)
        :param pair_stream: the (probably infinite) stream of query \t clicked url
            e.g. itertools.cycle(open(file))
        :param batch_size: must be neg_sample * k size
        :param with_idf: if True, the third col in pair_stream is the query term idf
        :return: yield a batched X and Y
            NOTE: the Y is always 1, the order of pos and neg docs are not shuffled yet.
        """

        assert batch_size % self.neg_sample == 0
        pos_batch_size = batch_size / self.neg_sample

        for pos_X, pos_Y in self.pointwise_generate(pair_stream,
                                                    pos_batch_size,
                                                    with_label=False,
                                                    with_idf=with_idf):
            idx = np.random.randint(self.m_title_pool.shape[0],
                                    size=batch_size)
            aux_D = self.m_title_pool[idx, :]
            new_Q = np.repeat(pos_X[self.q_name], self.neg_sample, axis=0)
            new_D = np.repeat(pos_X[self.d_name], self.neg_sample, axis=0)

            new_IDF = None
            if with_idf:
                new_IDF = np.repeat(pos_X[self.idf_name],
                                    self.neg_sample,
                                    axis=0)

            X = {
                self.q_name: new_Q,
                self.d_name: new_D,
                self.aux_d_name: aux_D,
                self.idf_name: new_IDF
            }
            Y = np.ones(batch_size)
            yield X, Y

    def pairwise_reader(self, pair_stream, batch_size, with_idf=False):
        l_q = []
        l_d = []
        l_d_aux = []
        l_idf = []
        l_y = []
        for line in pair_stream:
            cols = line.strip().split('\t')
            if len(cols) < 4: continue
            flag = True
            for col in cols:
                if not col.strip():
                    flag = False
                    break
            if not flag:
                print(line)
                continue
            y = float(cols[3])
            if abs(y) < self.min_score_diff:
                continue
            q = np.array([
                int(t) for t in cols[0].split(',')
                if int(t) < self.vocabulary_size
            ])
            t1 = np.array([
                int(t) for t in cols[1].split(',')
                if int(t) < self.vocabulary_size
            ])
            t2 = np.array([
                int(t) for t in cols[2].split(',')
                if int(t) < self.vocabulary_size
            ])
            if y > 0:
                y = 1
            else:
                t1, t2 = t2, t1  # make the first always positive
                y = 1
            if with_idf:
                if len(cols) < 5:
                    idf = np.ones(len(q))
                else:
                    idf = np.array([float(t) for t in cols[4].split(',')])
            v_q = np.ones(self.max_q_len) * -1
            v_d = np.ones(self.max_d_len) * -1
            v_d_aux = np.ones(self.max_d_len) * -1
            v_q[:min(q.shape[0], self.max_q_len
                     )] = q[:min(q.shape[0], self.max_q_len)]
            v_d[:min(t1.shape[0], self.max_d_len
                     )] = t1[:min(t1.shape[0], self.max_d_len)]
            v_d_aux[:min(t2.shape[0], self.max_d_len
                         )] = t2[:min(t2.shape[0], self.max_d_len)]

            l_q.append(v_q)
            l_d.append(v_d)
            l_d_aux.append(v_d_aux)
            l_y.append(y)

            if with_idf:
                v_idf = np.zeros(self.max_q_len)
                v_idf[:min(idf.shape[0], self.max_q_len
                           )] = idf[:min(idf.shape[0], self.max_q_len)]
                l_idf.append(v_idf)

            if len(l_q) >= batch_size:
                Q = np.array(
                    l_q,
                    dtype=int,
                )
                D = np.array(
                    l_d,
                    dtype=int,
                )
                D_aux = np.array(l_d_aux, dtype=int)
                IDF = None
                if with_idf:
                    IDF = np.array(l_idf, dtype=float)
                Y = np.array(
                    l_y,
                    dtype=int,
                )
                X = {
                    self.q_name: Q,
                    self.d_name: D,
                    self.idf_name: IDF,
                    self.aux_d_name: D_aux
                }
                yield X, Y
                l_q, l_d, l_d_aux, l_y, l_idf = [], [], [], [], []
        if l_q:
            Q = np.array(
                l_q,
                dtype=int,
            )
            D = np.array(
                l_d,
                dtype=int,
            )
            D_aux = np.array(
                l_d_aux,
                dtype=int,
            )
            IDF = None
            if with_idf:
                IDF = np.array(l_idf, dtype=float)
            Y = np.array(
                l_y,
                dtype=int,
            )
            X = {
                self.q_name: Q,
                self.d_name: D,
                self.idf_name: IDF,
                self.aux_d_name: D_aux
            }
            yield X, Y
        logging.info('pair wise reader to an end')
示例#18
0
class Map(DOMWidget, InteractMixin):
    _view_name = Unicode('LeafletMapView').tag(sync=True)
    _model_name = Unicode('LeafletMapModel').tag(sync=True)
    _view_module = Unicode('jupyter-leaflet').tag(sync=True)
    _model_module = Unicode('jupyter-leaflet').tag(sync=True)

    _view_module_version = Unicode(EXTENSION_VERSION).tag(sync=True)
    _model_module_version = Unicode(EXTENSION_VERSION).tag(sync=True)

    # Map options
    center = List(def_loc).tag(sync=True, o=True)
    zoom_start = Int(12).tag(sync=True, o=True)
    zoom = Int(12).tag(sync=True, o=True)
    max_zoom = Int(18).tag(sync=True, o=True)
    min_zoom = Int(1).tag(sync=True, o=True)
    interpolation = Unicode('bilinear').tag(sync=True, o=True)

    # Specification of the basemap
    basemap = Dict(default_value=dict(
        url='https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png',
        max_zoom=19,
        attribution=
        'Map data (c) <a href="https://openstreetmap.org">OpenStreetMap</a> contributors'
    )).tag(sync=True, o=True)
    modisdate = Unicode('yesterday').tag(sync=True)

    # Interaction options
    dragging = Bool(True).tag(sync=True, o=True)
    touch_zoom = Bool(True).tag(sync=True, o=True)
    scroll_wheel_zoom = Bool(False).tag(sync=True, o=True)
    double_click_zoom = Bool(True).tag(sync=True, o=True)
    box_zoom = Bool(True).tag(sync=True, o=True)
    tap = Bool(True).tag(sync=True, o=True)
    tap_tolerance = Int(15).tag(sync=True, o=True)
    world_copy_jump = Bool(False).tag(sync=True, o=True)
    close_popup_on_click = Bool(True).tag(sync=True, o=True)
    bounce_at_zoom_limits = Bool(True).tag(sync=True, o=True)
    keyboard = Bool(True).tag(sync=True, o=True)
    keyboard_pan_offset = Int(80).tag(sync=True, o=True)
    keyboard_zoom_offset = Int(1).tag(sync=True, o=True)
    inertia = Bool(True).tag(sync=True, o=True)
    inertia_deceleration = Int(3000).tag(sync=True, o=True)
    inertia_max_speed = Int(1500).tag(sync=True, o=True)
    # inertia_threshold = Int(?, o=True).tag(sync=True)
    zoom_control = Bool(True).tag(sync=True, o=True)
    attribution_control = Bool(True).tag(sync=True, o=True)
    # fade_animation = Bool(?).tag(sync=True, o=True)
    # zoom_animation = Bool(?).tag(sync=True, o=True)
    zoom_animation_threshold = Int(4).tag(sync=True, o=True)
    # marker_zoom_animation = Bool(?).tag(sync=True, o=True)

    options = List(trait=Unicode).tag(sync=True)

    @default('options')
    def _default_options(self):
        return [name for name in self.traits(o=True)]

    south = Float(def_loc[0], read_only=True).tag(sync=True)
    north = Float(def_loc[0], read_only=True).tag(sync=True)
    east = Float(def_loc[1], read_only=True).tag(sync=True)
    west = Float(def_loc[1], read_only=True).tag(sync=True)

    layers = Tuple(trait=Instance(Layer)).tag(sync=True,
                                              **widget_serialization)

    @default('layers')
    def _default_layers(self):
        return (basemap_to_tiles(self.basemap, self.modisdate, base=True), )

    bounds = Tuple(read_only=True)
    bounds_polygon = Tuple(read_only=True)

    @observe('south', 'north', 'east', 'west')
    def _observe_bounds(self, change):
        self.set_trait('bounds',
                       ((self.south, self.west), (self.north, self.east)))
        self.set_trait('bounds_polygon',
                       ((self.north, self.west), (self.north, self.east),
                        (self.south, self.east), (self.south, self.west)))

    def __init__(self, **kwargs):
        super(Map, self).__init__(**kwargs)
        self.on_displayed(self._fire_children_displayed)
        self.on_msg(self._handle_leaflet_event)

    def _fire_children_displayed(self, widget, **kwargs):
        for layer in self.layers:
            layer._handle_displayed(**kwargs)
        for control in self.controls:
            control._handle_displayed(**kwargs)

    _layer_ids = List()

    @validate('layers')
    def _validate_layers(self, proposal):
        '''Validate layers list.

        Makes sure only one instance of any given layer can exist in the
        layers list.
        '''
        self._layer_ids = [l.model_id for l in proposal.value]
        if len(set(self._layer_ids)) != len(self._layer_ids):
            raise LayerException(
                'duplicate layer detected, only use each layer once')
        return proposal.value

    def add_layer(self, layer):
        if isinstance(layer, dict):
            layer = basemap_to_tiles(layer)
        if layer.model_id in self._layer_ids:
            raise LayerException('layer already on map: %r' % layer)
        self.layers = tuple([l for l in self.layers] + [layer])

    def remove_layer(self, layer):
        if layer.model_id not in self._layer_ids:
            raise LayerException('layer not on map: %r' % layer)
        self.layers = tuple(
            [l for l in self.layers if l.model_id != layer.model_id])

    def substitute_layer(self, old, new):
        if isinstance(new, dict):
            new = basemap_to_tiles(new)
        if old.model_id not in self._layer_ids:
            raise LayerException(
                'Could not substitute layer: layer not on map.')
        self.layers = tuple(
            [new if l.model_id == old.model_id else l for l in self.layers])

    def clear_layers(self):
        self.layers = ()

    controls = Tuple(trait=Instance(Control)).tag(sync=True,
                                                  **widget_serialization)
    _control_ids = List()

    @validate('controls')
    def _validate_controls(self, proposal):
        '''Validate controls list.

        Makes sure only one instance of any given layer can exist in the
        controls list.
        '''
        self._control_ids = [c.model_id for c in proposal.value]
        if len(set(self._control_ids)) != len(self._control_ids):
            raise ControlException(
                'duplicate control detected, only use each control once')
        return proposal.value

    def add_control(self, control):
        if control.model_id in self._control_ids:
            raise ControlException('control already on map: %r' % control)
        self.controls = tuple([c for c in self.controls] + [control])

    def remove_control(self, control):
        if control.model_id not in self._control_ids:
            raise ControlException('control not on map: %r' % control)
        self.controls = tuple(
            [c for c in self.controls if c.model_id != control.model_id])

    def clear_controls(self):
        self.controls = ()

    def __iadd__(self, item):
        if isinstance(item, Layer):
            self.add_layer(item)
        elif isinstance(item, Control):
            self.add_control(item)
        return self

    def __isub__(self, item):
        if isinstance(item, Layer):
            self.remove_layer(item)
        elif isinstance(item, Control):
            self.remove_control(item)
        return self

    def __add__(self, item):
        if isinstance(item, Layer):
            self.add_layer(item)
        elif isinstance(item, Control):
            self.add_control(item)
        return self

    # Event handling
    _interaction_callbacks = Instance(CallbackDispatcher, ())

    def _handle_leaflet_event(self, _, content, buffers):
        if content.get('event', '') == 'interaction':
            self._interaction_callbacks(**content)

    def on_interaction(self, callback, remove=False):
        self._interaction_callbacks.register_callback(callback, remove=remove)
示例#19
0
class DisplayDL1Calib(Tool):
    name = "DisplayDL1Calib"
    description = "Calibrate dl0 data to dl1, and plot the photoelectron " \
                  "images."

    telescope = Int(None,
                    allow_none=True,
                    help='Telescope to view. Set to None to display all '
                    'telescopes.').tag(config=True)

    aliases = Dict(
        dict(f='EventFileReaderFactory.input_path',
             r='EventFileReaderFactory.reader',
             max_events='EventFileReaderFactory.max_events',
             extractor='ChargeExtractorFactory.extractor',
             window_width='ChargeExtractorFactory.window_width',
             t0='ChargeExtractorFactory.t0',
             window_shift='ChargeExtractorFactory.window_shift',
             sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG',
             sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG',
             lwt='ChargeExtractorFactory.lwt',
             clip_amplitude='CameraDL1Calibrator.clip_amplitude',
             T='DisplayDL1Calib.telescope',
             O='ImagePlotter.output_path'))
    flags = Dict(
        dict(D=({
            'ImagePlotter': {
                'display': True
            }
        }, "Display the photoelectron images on-screen as they "
                "are produced.")))
    classes = List([
        EventFileReaderFactory, ChargeExtractorFactory, CameraDL1Calibrator,
        ImagePlotter
    ])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.reader = None
        self.calibrator = None
        self.plotter = None

    def setup(self):
        self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]"
        kwargs = dict(config=self.config, tool=self)

        reader_factory = EventFileReaderFactory(**kwargs)
        reader_class = reader_factory.get_class()
        self.reader = reader_class(**kwargs)

        self.calibrator = CameraCalibrator(origin=self.reader.origin, **kwargs)

        self.plotter = ImagePlotter(**kwargs)

    def start(self):
        source = self.reader.read()
        for event in source:
            self.calibrator.calibrate(event)

            tel_list = event.r0.tels_with_data

            if self.telescope:
                if self.telescope not in tel_list:
                    continue
                tel_list = [self.telescope]
            for telid in tel_list:
                self.plotter.plot(event, telid)

    def finish(self):
        self.plotter.finish()
class ChargeResolutionGenerator(Tool):
    name = "ChargeResolutionGenerator"
    description = "Generate the a pickle file of ChargeResolutionFile for " \
                  "either MC or data files."

    telescopes = Int(1,
                     help='Telescopes to include from the event file. '
                     'Default = 1').tag(config=True)
    output_name = Unicode('charge_resolution',
                          help='Name of the output charge resolution hdf5 '
                          'file').tag(config=True)
    input_path = Unicode(help='Path to directory containing data').tag(
        config=True)

    max_events = Int(1,
                     help='Maximum number of events to use').tag(config=True)

    plot_cam = Bool(False,
                    "enable plotting of individual camera").tag(config=True)

    use_true_pe = Bool(False, "Use true mc p.e.").tag(config=True)

    calibrator = Unicode(
        'HESSIOR1Calibrator',
        help='which calibrator to use, default = HESSIOR1Calibrator').tag(
            config=True)

    aliases = Dict(
        dict(input_path='ChargeResolutionGenerator.input_path',
             calibrator='ChargeResolutionGenerator.calibrator',
             max_events='ChargeResolutionGenerator.max_events',
             extractor='ChargeExtractorFactory.product',
             window_width='ChargeExtractorFactory.window_width',
             t0='ChargeExtractorFactory.t0',
             window_shift='ChargeExtractorFactory.window_shift',
             sig_amp_cut_HG='ChargeExtractorFactory.sig_amp_cut_HG',
             sig_amp_cut_LG='ChargeExtractorFactory.sig_amp_cut_LG',
             lwt='ChargeExtractorFactory.lwt',
             clip_amplitude='CameraDL1Calibrator.clip_amplitude',
             radius='CameraDL1Calibrator.radius',
             max_pe='ChargeResolutionCalculator.max_pe',
             T='ChargeResolutionGenerator.telescopes',
             o='ChargeResolutionGenerator.output_name',
             plot_cam='ChargeResolutionGenerator.plot_cam',
             use_true_pe='ChargeResolutionGenerator.use_true_pe'))
    classes = List([
        EventSourceFactory, HESSIOEventSource, TargetIOEventSource,
        ChargeExtractorFactory, CameraDL1Calibrator,
        ChargeResolutionCalculator, CameraCalibrator
    ])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.eventsource = None
        self.r1 = None
        self.dl0 = None
        self.dl1 = None
        self.calculator = None
        self.cal = None

    def setup(self):
        kwargs = dict(config=self.config, tool=self)
        self.dl0 = CameraDL0Reducer(**kwargs)

        self.dl1 = CameraDL1Calibrator(**kwargs)

        self.cal = CameraCalibrator(r1_product=self.calibrator)

        self.calculator = ChargeResolutionCalculator(**kwargs)

    def start(self):
        run_list = np.loadtxt('%s/runlist.txt' % self.input_path, unpack=True)
        plot_cam = False
        plot_delay = 0.5
        disp = None
        n_events = []
        trig_eff = []

        n_pe = []
        if debug:
            fig = plt.figure(1)
            ax = fig.add_subplot(111)
        for n, run in enumerate(run_list[0]):
            n_events.append(run_list[3][n])
            n_pe.append(run_list[2][n])
            # TODO remove need for hardcoded file name
            if self.calibrator == "TargetIOR1Calibrator":
                file_name = "%s/Run%05d_r1.tio" % (self.input_path, int(run))
                print(file_name)
            elif self.calibrator == "HESSIOR1Calibrator":
                file_name = "%s/Run%05d_mc.simtel.gz" % (self.input_path,
                                                         int(run))
                print(file_name)

            try:
                source = EventSourceFactory.produce(input_url=file_name,
                                                    max_events=self.max_events)
                true_pe = []
                # lab_pe = []
                peds_all = []
                n_trig = 0
                for event in tqdm(source):
                    n_trig = +1
                    # true_pe.append()
                #     self.cal.calibrate(event)
                #     self.dl0.reduce(event)
                #     self.dl1.calibrate(event)
                #     input_pe = run_list[2][n]
                #     try:
                #         input_nsb = run_list[5][n]
                #     except IndexError:
                #         print('File has no column for NSB, setting to 0')
                #         input_nsb = 0
                #     if self.plot_cam == True:
                #         if disp is None:
                #             geom = event.inst.subarray.tel[self.telescopes].camera
                #             disp = CameraDisplay(geom)
                #             disp.add_colorbar()
                #             plt.show(block=False)
                #         im = event.dl1.tel[self.telescopes].image[0]
                #         disp.image = im
                #         plt.pause(plot_delay)
                #
                #     teldata = event.r0.tel[self.telescopes].waveform[0]
                #     peds = teldata[:, 0:10].mean(axis=1)
                #     peds2 = teldata[:, 0:10].std(axis=1)
                #     peds_all.append(teldata[:, 0:90])
                #     # plt.hist(peds,bins=50, alpha=0.4)
                #     # plt.show()
                #     # print(teldata)
                #     # plt.plot(range(len(teldata[100])), teldata[100])
                #     # plt.show()
                #     # exit()
                # # print(np.mean(peds_all), np.std(peds_all))
                # # exit()
                #     # true_charge_mc = event.mc.tel[self.telescopes].photo_electron_image
                #     # measured_charge = event.dl1.tel[self.telescopes].image[0]
                #     # true_charge_lab = np.asarray([input_pe]*len(measured_charge))
                #     # true_pe.append(true_charge_mc)
                #     # if self.use_true_pe:
                #     #     true_charge=true_charge_mc
                #     # else:
                #     #     true_charge=true_charge_lab.astype(int)
                #     #
                #     # self.calculator.add_charges(true_charge, measured_charge)
                #
                # if debug:
                #     # plt.errorbar(input_nsb, np.mean(peds_all), np.std(peds_all),color='k')
                #     plt.scatter(input_nsb, np.std(peds_all), marker ='x',color='k')
            except FileNotFoundError:
                stop = 0
                print('file_not_found')
            trig_eff.append(n_trig / run_list[3][n])

        plt.plot(n_pe, trig_eff)
        plt.show()
        if debug:
            plt.xscale('log')
            plt.yscale('log')
            plt.plot([0, 1000], [0, 1000], 'k:')
            plt.xlabel('Input p.e.')
            plt.ylabel('True mc p.e.')
            plt.show()

    def finish(self):
        out_file = '%s/charge_resolution_test.h5' % self.input_path
        self.calculator.save(self.output_name)
示例#21
0
class Paperboy(Application):
    """Base class for paperboy applications"""
    name = 'paperboy'
    description = 'paperboy'

    ############
    # Gunicorn #
    ############
    workers = Int(default_value=1,
                  help="Number of gunicorn workers").tag(config=True)
    port = Unicode(default_value='8080',
                   help="Port to run on").tag(config=True)
    ############

    ##########
    # Falcon #
    ##########
    api = Instance(falcon.API, help="A Falcon API instance").tag(config=True)
    ##########

    ########
    # URLs #
    ########
    baseurl = Unicode(default_value='/',
                      help="Base URL (for reverse proxies)").tag(config=True)
    apiurl = Unicode(
        default_value='/api/v1/',
        help="API base URL (for reverse proxies)").tag(config=True)
    loginurl = Unicode(default_value='login',
                       help="login url").tag(config=True)
    logouturl = Unicode(default_value='logout',
                        help="logout url").tag(config=True)
    registerurl = Unicode(default_value='register',
                          help="register url").tag(config=True)
    ########

    ########
    # Auth #
    ########
    http = Bool(
        default_value=True,
        help="Running on HTTP (as opposed to https, so token is insecure)"
    ).tag(config=True)
    include_password = Bool(default_value=False).tag(config=True)
    include_register = Bool(default_value=True).tag(config=True)
    token_timeout = Int(default_value=600).tag(config=True)
    #############

    ##########
    # Config #
    ##########
    # FIXME doesnt allow default_value yet
    user_config = UserConfig
    notebook_config = NotebookConfig
    job_config = JobConfig
    report_config = ReportConfig
    ##########

    ##############
    # Middleware #
    ##############
    essential_middleware = [
        CORSMiddleware(allow_all_origins=True).middleware,
        MultipartMiddleware()
    ]
    extra_middleware = List(
        default_value=[])  # List of extra middlewares to install
    auth_required_middleware = Instance(object)
    load_user_middleware = Instance(object)
    ##############

    ##################
    # Custom handler #
    ##################
    extra_handlers = List(
        trait=Tuple(), default_value=[]
    )  # List of tuples (route, handler) of handlers to install
    ##################

    ##########################################
    #        Predefined Configurations       #
    #
    ##########################################
    backend = Unicode(
        default_value='dummy',
        help="Backend set to use, options are {sqla, custom}").tag(config=True)
    scheduler = Unicode(
        default_value='dummy',
        help="Scheduler type to use, options are {dummy, airflow, luigi}").tag(
            config=True)
    auth = Unicode(
        default_value='dummy',
        help=
        "Authentication backend set to use, options are {none, sqla, custom}"
    ).tag(config=True)
    secret = Unicode()

    @validate('backend')
    def _validate_backend(self, proposed):
        if proposed['value'] not in (
                'custom',
                'dummy',
                'git',
                'sqla',
        ):
            raise TraitError('backend not recognized: {}'.format(
                proposed['value']))
        return proposed['value']

    @validate('auth')
    def _validate_auth(self, proposed):
        if proposed['value'] not in (
                'custom',
                'none',
                'sqla',
        ):
            raise TraitError('backend not recognized: {}'.format(
                proposed['value']))
        return proposed['value']

    ##########################################

    ###########
    # Storage #
    ###########
    # FIXME doesnt allow default_value yet
    storage = SQLAStorageConfig()
    dev = Bool(default_value=False)
    ###########

    #############
    # Scheduler #
    #############
    # FIXME doesnt allow default_value yet
    scheduler_config = Instance(klass=SchedulerConfig, args=(), kwargs={})
    #############

    ##################
    # Output         #
    ##################
    output = Instance(klass=LocalOutputConfig, args=(), kwargs={})

    ##################

    def start(self):
        """Start the whole thing"""
        self.port = os.environ.get('PORT', self.port)

        options = {
            'bind': '0.0.0.0:{}'.format(self.port),
            'workers': self.workers
        }
        self.secret = str(uuid4())

        if self.dev:
            self.sql_url = 'sqlite:///:memory:'
            logging.critical('Using SQL in memory backend')

            self.storage.engine = create_engine(self.storage.sql_url,
                                                echo=False)
            Base.metadata.create_all(self.storage.engine)

            self.sessionmaker = sessionmaker(bind=self.storage.engine)
            self.backend = 'sqla'
            self.auth = 'sqla'
            self.extra_middleware = self.extra_middleware + [
                SQLAlchemySessionMiddleware(self.storage.sessionmaker)
            ]
            self.storage.notebook_storage = NotebookSQLStorage
            self.storage.job_storage = JobSQLStorage
            self.storage.report_storage = ReportSQLStorage
            self.storage.user_storage = UserSQLStorage
            self.storage.sql_user = True

            logging.critical('Using SQL auth')
            self.auth_required_middleware = SQLAuthRequiredMiddleware
            self.load_user_middleware = SQLUserMiddleware

            logging.critical('Using Dummy scheduler')
            self.scheduler = 'dummy'
            self.scheduler_config = DummySchedulerConfig()

        else:

            # Preconfigured storage backends
            if self.backend == 'git':
                logging.critical('Using Git backend')
                raise NotImplementedError()

            # default to sqla
            # elif self.backend == 'sqla':
            else:
                logging.critical('Using SQL backend')

                self.storage.engine = create_engine(
                    os.environ.get('PAPERBOY_SQL_URL') or self.storage.sql_url,
                    echo=False)
                Base.metadata.create_all(self.storage.engine)

                self.storage.sessionmaker = sessionmaker(
                    bind=self.storage.engine)
                self.extra_middleware = self.extra_middleware + [
                    SQLAlchemySessionMiddleware(self.storage.sessionmaker)
                ]
                self.storage.notebook_storage = NotebookSQLStorage
                self.storage.job_storage = JobSQLStorage
                self.storage.report_storage = ReportSQLStorage
                self.storage.user_storage = UserSQLStorage
                self.storage.sql_user = True
                self.auth = 'sqla'

            # Preconfigured auth backends
            if self.auth == 'none':
                logging.critical('Using No auth')
                self.auth_required_middleware = NoAuthRequiredMiddleware
                self.load_user_middleware = NoUserMiddleware

            elif self.auth == 'sqla':
                logging.critical('Using SQL auth')
                self.auth_required_middleware = SQLAuthRequiredMiddleware
                self.load_user_middleware = SQLUserMiddleware

            if self.scheduler == 'dummy':
                logging.critical('Using dummy scheduler')
                self.scheduler_config = DummySchedulerConfig()

            elif self.scheduler == 'airflow':
                logging.critical('Using Airflow scheduler')
                self.scheduler_config = AirflowSchedulerConfig()

            elif self.scheduler == 'luigi':
                logging.critical('Using Luigi scheduler')
                self.scheduler_config = LuigiSchedulerConfig()

            elif self.scheduler == 'local':
                logging.critical('Using Local scheduler')
                self.scheduler_config = LocalSchedulerConfig()

            else:
                raise Exception('Must specify a valid scheduler!')

        FalconDeploy(FalconAPI(self), options).run()

    @classmethod
    def launch_instance(cls, argv=None, **kwargs):
        """Launch an instance of a Paperboy Application"""
        return super(Paperboy, cls).launch_instance(argv=argv, **kwargs)

    def to_dict(self):
        return {
            'name': self.name,
            'description': self.description,
            'workers': self.workers,
            'port': self.port
        }

    aliases = {
        'workers': 'Paperboy.workers',
        'port': 'Paperboy.port',
        'baseurl': 'Paperboy.baseurl',
        'backend': 'Paperboy.backend',
        'scheduler': 'Paperboy.scheduler',
        'auth': 'Paperboy.auth',
        'sql_url': 'Paperboy.storage.sql_url',
    }

    def _login_redirect(config, *args, **kwargs):
        raise falcon.HTTPFound(urljoin(config.baseurl, config.loginurl))
示例#22
0
class Envelope(AudioNode, ScheduleObserveMixin):
    """ADSR envelope generator.

    Envelope outputs a signal which can be connected to a :class:`Signal`.

    """

    _model_name = Unicode("EnvelopeModel").tag(sync=True)

    attack = Float(0.01, help="Envelope attack").tag(sync=True)
    decay = Float(0.1, help="Envelope decay").tag(sync=True)
    sustain = Float(1.0, help="Envelope sustain").tag(sync=True)
    release = Float(0.5, help="Envelope release").tag(sync=True)

    attack_curve = Union([Enum(CURVES), List(Float())],
                         default_value="linear").tag(sync=True)
    decay_curve = Enum(BASIC_CURVES,
                       default_value="exponential").tag(sync=True)
    release_curve = Union([Enum(CURVES), List(Float())],
                          default_value="exponential").tag(sync=True)

    array = Array(allow_none=True,
                  default_value=None,
                  read_only=True,
                  help="Envelope data").tag(sync=True,
                                            **data_array_serialization)
    array_length = Int(
        1024, help="Envelope data resolution (array length)").tag(sync=True)
    sync_array = Bool(False,
                      help="If True, synchronize envelope data").tag(sync=True)

    _observable_traits = List(["value"])

    def __init__(self, **kwargs):
        if "_output" not in kwargs:
            out_node = Signal(units="normalRange", _create_node=False)
            kwargs.update({"_output": out_node})

        super().__init__(**kwargs)

    def trigger_attack(self, time=None, velocity=1):
        add_or_send_event("triggerAttack", self, {
            "time": time,
            "velocity": velocity
        })
        return self

    def trigger_release(self, time=None):
        add_or_send_event("triggerRelease", self, {"time": time})
        return self

    def trigger_attack_release(self, duration, time=None, velocity=1):
        args = {"duration": duration, "time": time, "velocity": velocity}
        add_or_send_event("triggerAttackRelease", self, args)
        return self

    def _repr_keys(self):
        for key in super()._repr_keys():
            yield key
        for key in ["attack", "decay", "sustain", "release"]:
            yield key
示例#23
0
class BokehFileViewer(Tool):
    name = "BokehFileViewer"
    description = ("Interactively explore an event file using the bokeh "
                   "visualisation package")

    port = Int(5006, help="Port to open bokeh server onto").tag(config=True)
    disable_server = Bool(False,
                          help="Do not start the bokeh server "
                          "(useful for testing)").tag(config=True)

    default_url = get_dataset_path("gamma_test_large.simtel.gz")
    EventSource.input_url.default_value = default_url

    cleaner_product = tool_utils.enum_trait(WaveformCleaner,
                                            default='NullWaveformCleaner')
    extractor_product = tool_utils.enum_trait(
        ChargeExtractor, default='NeighbourPeakIntegrator')

    aliases = Dict(
        dict(
            port='BokehFileViewer.port',
            disable_server='BokehFileViewer.disable_server',
            f='EventSource.input_url',
            max_events='EventSource.max_events',
            extractor='BokehFileViewer.extractor_product',
            cleaner='BokehFileViewer.cleaner_product',
            simpleintegrator_t0='SimpleIntegrator.t0',
            window_width='WindowIntegrator.window_width',
            window_shift='WindowIntegrator.window_shift',
            sig_amp_cut_HG='PeakFindingIntegrator.sig_amp_cut_HG',
            sig_amp_cut_LG='PeakFindingIntegrator.sig_amp_cut_LG',
            lwt='NeighbourPeakIntegrator.lwt',
        ))

    classes = List([
        EventSource,
        CameraDL1Calibrator,
    ] + tool_utils.classes_with_traits(WaveformCleaner) +
                   tool_utils.classes_with_traits(ChargeExtractor) +
                   tool_utils.classes_with_traits(CameraR1Calibrator))

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._event = None
        self._event_index = None
        self._event_id = None
        self._telid = None
        self._channel = None

        self.w_next_event = None
        self.w_previous_event = None
        self.w_event_index = None
        self.w_event_id = None
        self.w_goto_event_index = None
        self.w_goto_event_id = None
        self.w_telid = None
        self.w_channel = None
        self.w_dl1_dict = None
        self.wb_extractor = None
        self.layout = None

        self.reader = None
        self.seeker = None
        self.extractor = None
        self.cleaner = None
        self.r1 = None
        self.dl0 = None
        self.dl1 = None
        self.viewer = None

        self._updating_dl1 = False

    def setup(self):
        self.log_format = "%(levelname)s: %(message)s [%(name)s.%(funcName)s]"

        self.reader = EventSource.from_config(parent=self)
        self.seeker = EventSeeker(self.reader, parent=self)

        self.extractor = ChargeExtractor.from_name(self.extractor_product,
                                                   parent=self)
        self.cleaner = WaveformCleaner.from_name(self.cleaner_product,
                                                 parent=self)
        self.r1 = CameraR1Calibrator.from_eventsource(eventsource=self.reader,
                                                      parent=self)
        self.dl0 = CameraDL0Reducer(parent=self)
        self.dl1 = CameraDL1Calibrator(extractor=self.extractor,
                                       cleaner=self.cleaner,
                                       parent=self)

        self.viewer = BokehEventViewer(parent=self)

        # Setup widgets
        self.viewer.create()
        self.viewer.enable_automatic_index_increment()
        self.create_previous_event_widget()
        self.create_next_event_widget()
        self.create_event_index_widget()
        self.create_goto_event_index_widget()
        self.create_event_id_widget()
        self.create_goto_event_id_widget()
        self.create_telid_widget()
        self.create_channel_widget()
        self.create_dl1_widgets()
        self.update_dl1_widget_values()

        # Setup layout
        self.layout = layout([[self.viewer.layout],
                              [
                                  self.w_previous_event, self.w_next_event,
                                  self.w_goto_event_index, self.w_goto_event_id
                              ], [self.w_event_index, self.w_event_id],
                              [self.w_telid, self.w_channel],
                              [self.wb_extractor]])

    def start(self):
        self.event_index = 0

    def finish(self):
        if not self.disable_server:

            def modify_doc(doc):
                doc.add_root(self.layout)
                doc.title = self.name

                directory = os.path.abspath(os.path.dirname(__file__))
                theme_path = os.path.join(directory, "theme.yaml")
                template_path = os.path.join(directory, "templates")
                doc.theme = Theme(filename=theme_path)
                env = jinja2.Environment(
                    loader=jinja2.FileSystemLoader(template_path))
                doc.template = env.get_template('index.html')

            self.log.info('Opening Bokeh application on '
                          'http://localhost:{}/'.format(self.port))
            server = Server({'/': modify_doc}, num_procs=1, port=self.port)
            server.start()
            server.io_loop.add_callback(server.show, "/")
            server.io_loop.start()

    @property
    def event_index(self):
        return self._event_index

    @event_index.setter
    def event_index(self, val):
        try:
            self.event = self.seeker[val]
        except IndexError:
            self.log.warning(f"Event Index {val} does not exist")

    @property
    def event_id(self):
        return self._event_id

    @event_id.setter
    def event_id(self, val):
        try:
            self.event = self.seeker[str(val)]
        except IndexError:
            self.log.warning(f"Event ID {val} does not exist")

    @property
    def telid(self):
        return self._telid

    @telid.setter
    def telid(self, val):
        self.channel = 0
        tels = list(self.event.r0.tels_with_data)
        if val not in tels:
            val = tels[0]
        self._telid = val
        self.viewer.telid = val
        self.update_telid_widget()

    @property
    def channel(self):
        return self._channel

    @channel.setter
    def channel(self, val):
        self._channel = val
        self.viewer.channel = val
        self.update_channel_widget()

    @property
    def event(self):
        return self._event

    @event.setter
    def event(self, val):

        # Calibrate
        self.r1.calibrate(val)
        self.dl0.reduce(val)
        self.dl1.calibrate(val)

        self._event = val

        self.viewer.event = val

        self._event_index = val.count
        self._event_id = val.r0.event_id
        self.update_event_index_widget()
        self.update_event_id_widget()

        self._telid = self.viewer.telid
        self.update_telid_widget()

        self._channel = self.viewer.channel
        self.update_channel_widget()

    def update_dl1_calibrator(self, extractor=None, cleaner=None):
        """
        Recreate the dl1 calibrator with the specified extractor and cleaner

        Parameters
        ----------
        extractor : ctapipe.image.charge_extractors.ChargeExtractor
        cleaner : ctapipe.image.waveform_cleaning.WaveformCleaner
        """
        if extractor is None:
            extractor = self.dl1.extractor
        if cleaner is None:
            cleaner = self.dl1.cleaner

        self.extractor = extractor
        self.cleaner = cleaner

        self.dl1 = CameraDL1Calibrator(extractor=self.extractor,
                                       cleaner=self.cleaner,
                                       parent=self)
        self.dl1.calibrate(self.event)
        self.viewer.refresh()

    def create_next_event_widget(self):
        self.w_next_event = Button(label=">", button_type="default", width=50)
        self.w_next_event.on_click(self.on_next_event_widget_click)

    def on_next_event_widget_click(self):
        self.event_index += 1

    def create_previous_event_widget(self):
        self.w_previous_event = Button(label="<",
                                       button_type="default",
                                       width=50)
        self.w_previous_event.on_click(self.on_previous_event_widget_click)

    def on_previous_event_widget_click(self):
        self.event_index -= 1

    def create_event_index_widget(self):
        self.w_event_index = TextInput(title="Event Index:", value='')

    def update_event_index_widget(self):
        if self.w_event_index:
            self.w_event_index.value = str(self.event_index)

    def create_event_id_widget(self):
        self.w_event_id = TextInput(title="Event ID:", value='')

    def update_event_id_widget(self):
        if self.w_event_id:
            self.w_event_id.value = str(self.event_id)

    def create_goto_event_index_widget(self):
        self.w_goto_event_index = Button(label="GOTO Index",
                                         button_type="default",
                                         width=100)
        self.w_goto_event_index.on_click(self.on_goto_event_index_widget_click)

    def on_goto_event_index_widget_click(self):
        self.event_index = int(self.w_event_index.value)

    def create_goto_event_id_widget(self):
        self.w_goto_event_id = Button(label="GOTO ID",
                                      button_type="default",
                                      width=70)
        self.w_goto_event_id.on_click(self.on_goto_event_id_widget_click)

    def on_goto_event_id_widget_click(self):
        self.event_id = int(self.w_event_id.value)

    def create_telid_widget(self):
        self.w_telid = Select(title="Telescope:", value="", options=[])
        self.w_telid.on_change('value', self.on_telid_widget_change)

    def update_telid_widget(self):
        if self.w_telid:
            tels = [str(t) for t in self.event.r0.tels_with_data]
            self.w_telid.options = tels
            self.w_telid.value = str(self.telid)

    def on_telid_widget_change(self, _, __, ___):
        if self.telid != int(self.w_telid.value):
            self.telid = int(self.w_telid.value)

    def create_channel_widget(self):
        self.w_channel = Select(title="Channel:", value="", options=[])
        self.w_channel.on_change('value', self.on_channel_widget_change)

    def update_channel_widget(self):
        if self.w_channel:
            try:
                n_chan = self.event.r0.tel[self.telid].waveform.shape[0]
            except AttributeError:
                n_chan = 1
            channels = [str(c) for c in range(n_chan)]
            self.w_channel.options = channels
            self.w_channel.value = str(self.channel)

    def on_channel_widget_change(self, _, __, ___):
        if self.channel != int(self.w_channel.value):
            self.channel = int(self.w_channel.value)

    def create_dl1_widgets(self):
        self.w_dl1_dict = dict(
            cleaner=Select(title="Cleaner:",
                           value='',
                           width=5,
                           options=BokehFileViewer.cleaner_product.values),
            extractor=Select(title="Extractor:",
                             value='',
                             width=5,
                             options=BokehFileViewer.extractor_product.values),
            extractor_t0=TextInput(title="T0:", value=''),
            extractor_window_width=TextInput(title="Window Width:", value=''),
            extractor_window_shift=TextInput(title="Window Shift:", value=''),
            extractor_sig_amp_cut_HG=TextInput(title="Significant Amplitude "
                                               "Cut (HG):",
                                               value=''),
            extractor_sig_amp_cut_LG=TextInput(title="Significant Amplitude "
                                               "Cut (LG):",
                                               value=''),
            extractor_lwt=TextInput(title="Local Pixel Weight:", value=''))

        for val in self.w_dl1_dict.values():
            val.on_change('value', self.on_dl1_widget_change)

        self.wb_extractor = widgetbox(
            PreText(text="Charge Extractor Configuration"),
            self.w_dl1_dict['cleaner'], self.w_dl1_dict['extractor'],
            self.w_dl1_dict['extractor_t0'],
            self.w_dl1_dict['extractor_window_width'],
            self.w_dl1_dict['extractor_window_shift'],
            self.w_dl1_dict['extractor_sig_amp_cut_HG'],
            self.w_dl1_dict['extractor_sig_amp_cut_LG'],
            self.w_dl1_dict['extractor_lwt'])

    def update_dl1_widget_values(self):
        if self.w_dl1_dict:
            for key, val in self.w_dl1_dict.items():
                if 'extractor' in key:
                    if key == 'extractor':
                        val.value = self.extractor.__class__.__name__
                    else:
                        key = key.replace("extractor_", "")
                        try:
                            val.value = str(getattr(self.extractor, key))
                        except AttributeError:
                            val.value = ''
                elif 'cleaner' in key:
                    if key == 'cleaner':
                        val.value = self.cleaner.__class__.__name__
                    else:
                        key = key.replace("cleaner_", "")
                        try:
                            val.value = str(getattr(self.cleaner, key))
                        except AttributeError:
                            val.value = ''

    def on_dl1_widget_change(self, _, __, ___):
        if self.event:
            if not self._updating_dl1:
                self._updating_dl1 = True
                cmdline = []
                for key, val in self.w_dl1_dict.items():
                    if val.value:
                        cmdline.append(f'--{key}')
                        cmdline.append(val.value)
                self.parse_command_line(cmdline)
                extractor = ChargeExtractor.from_name(self.extractor_product,
                                                      parent=self)
                cleaner = WaveformCleaner.from_name(self.cleaner_product,
                                                    parent=self)
                self.update_dl1_calibrator(extractor, cleaner)
                self.update_dl1_widget_values()
                self._updating_dl1 = False
示例#24
0
class DockerSpawner(Spawner):
    """A Spawner for JupyterHub that runs each user's server in a separate docker container"""

    _executor = None

    @property
    def executor(self):
        """single global executor"""
        cls = self.__class__
        if cls._executor is None:
            cls._executor = ThreadPoolExecutor(1)
        return cls._executor

    _client = None

    @property
    def client(self):
        """single global client instance"""
        cls = self.__class__
        if cls._client is None:
            kwargs = {"version": "auto"}
            if self.tls_config:
                kwargs["tls"] = docker.tls.TLSConfig(**self.tls_config)
            kwargs.update(kwargs_from_env())
            kwargs.update(self.client_kwargs)
            client = docker.APIClient(**kwargs)
            cls._client = client
        return cls._client

    # notice when user has set the command
    # default command is that of the container,
    # but user can override it via config
    _user_set_cmd = False

    @observe("cmd")
    def _cmd_changed(self, change):
        self._user_set_cmd = True

    object_id = Unicode()
    # the type of object we create
    object_type = "container"
    # the field containing the object id
    object_id_key = "Id"

    @property
    def container_id(self):
        """alias for object_id"""
        return self.object_id

    @property
    def container_name(self):
        """alias for object_name"""
        return self.object_name

    # deprecate misleading container_ip, since
    # it is not the ip in the container,
    # but the host ip of the port forwarded to the container
    # when use_internal_ip is False
    container_ip = Unicode("127.0.0.1", config=True)

    @observe("container_ip")
    def _container_ip_deprecated(self, change):
        self.log.warning(
            "DockerSpawner.container_ip is deprecated in dockerspawner-0.9."
            "  Use DockerSpawner.host_ip to specify the host ip that is forwarded to the container"
        )
        self.host_ip = change.new

    host_ip = Unicode(
        "127.0.0.1",
        help=
        """The ip address on the host on which to expose the container's port

        Typically 127.0.0.1, but can be public interfaces as well
        in cases where the Hub and/or proxy are on different machines
        from the user containers.

        Only used when use_internal_ip = False.
        """,
        config=True,
    )

    @default('host_ip')
    def _default_host_ip(self):
        docker_host = os.getenv('DOCKER_HOST')
        if docker_host:
            urlinfo = urlparse(docker_host)
            if urlinfo.scheme == 'tcp':
                return urlinfo.hostname
        return '127.0.0.1'

    # unlike container_ip, container_port is the internal port
    # on which the server is bound.
    container_port = Int(8888, min=1, max=65535, config=True)

    @observe("container_port")
    def _container_port_changed(self, change):
        self.log.warning(
            "DockerSpawner.container_port is deprecated in dockerspawner 0.9."
            "  Use DockerSpawner.port")
        self.port = change.new

    # fix default port to 8888, used in the container

    @default("port")
    def _port_default(self):
        return 8888

    # default to listening on all-interfaces in the container

    @default("ip")
    def _ip_default(self):
        return "0.0.0.0"

    container_image = Unicode("jupyterhub/singleuser:%s" % _jupyterhub_xy,
                              config=True)

    @observe("container_image")
    def _container_image_changed(self, change):
        self.log.warning(
            "DockerSpawner.container_image is deprecated in dockerspawner 0.9."
            "  Use DockerSpawner.image")
        self.image = change.new

    image = Unicode(
        "jupyterhub/singleuser:%s" % _jupyterhub_xy,
        config=True,
        help="""The image to use for single-user servers.

        This image should have the same version of jupyterhub as
        the Hub itself installed.

        If the default command of the image does not launch
        jupyterhub-singleuser, set `c.Spawner.cmd` to
        launch jupyterhub-singleuser, e.g.

        Any of the jupyter docker-stacks should work without additional config,
        as long as the version of jupyterhub in the image is compatible.
        """,
    )

    image_whitelist = Union(
        [Any(), Dict(), List()],
        default_value={},
        config=True,
        help="""
        List or dict of images that users can run.

        If specified, users will be presented with a form
        from which they can select an image to run.

        If a dictionary, the keys will be the options presented to users
        and the values the actual images that will be launched.

        If a list, will be cast to a dictionary where keys and values are the same
        (i.e. a shortcut for presenting the actual images directly to users).

        If a callable, will be called with the Spawner instance as its only argument.
        The user is accessible as spawner.user.
        The callable should return a dict or list as above.
        """,
    )

    @validate('image_whitelist')
    def _image_whitelist_dict(self, proposal):
        """cast image_whitelist to a dict

        If passing a list, cast it to a {item:item}
        dict where the keys and values are the same.
        """
        whitelist = proposal.value
        if isinstance(whitelist, list):
            whitelist = {item: item for item in whitelist}
        return whitelist

    def _get_image_whitelist(self):
        """Evaluate image_whitelist callable

        Or return the whitelist as-is if it's already a dict
        """
        if callable(self.image_whitelist):
            whitelist = self.image_whitelist(self)
            if not isinstance(whitelist, dict):
                # always return a dict
                whitelist = {item: item for item in whitelist}
            return whitelist
        return self.image_whitelist

    @default('options_form')
    def _default_options_form(self):
        image_whitelist = self._get_image_whitelist()
        if len(image_whitelist) <= 1:
            # default form only when there are images to choose from
            return ''
        # form derived from wrapspawner.ProfileSpawner
        option_t = '<option value="{image}" {selected}>{image}</option>'
        options = [
            option_t.format(image=image,
                            selected='selected' if image == self.image else '')
            for image in image_whitelist
        ]
        return """
        <label for="image">Select an image:</label>
        <select class="form-control" name="image" required autofocus>
        {options}
        </select>
        """.format(options=options)

    def options_from_form(self, formdata):
        """Turn options formdata into user_options"""
        options = {}
        if 'image' in formdata:
            options['image'] = formdata['image'][0]
        return options

    pull_policy = CaselessStrEnum(
        ["always", "ifnotpresent", "never"],
        default_value="ifnotpresent",
        config=True,
        help="""The policy for pulling the user docker image.

        Choices:

        - ifnotpresent: pull if the image is not already present (default)
        - always: always pull the image to check for updates, even if it is present
        - never: never perform a pull
        """)

    container_prefix = Unicode(config=True,
                               help="DEPRECATED in 0.10. Use prefix")

    container_name_template = Unicode(
        config=True, help="DEPRECATED in 0.10. Use name_template")

    @observe("container_name_template", "container_prefix")
    def _deprecate_container_alias(self, change):
        new_name = change.name[len("container_"):]
        setattr(self, new_name, change.new)

    prefix = Unicode(
        "jupyter",
        config=True,
        help=dedent("""
            Prefix for container names. See name_template for full container name for a particular
            user's server.
            """),
    )

    name_template = Unicode(
        "{prefix}-{username}",
        config=True,
        help=dedent("""
            Name of the container or service: with {username}, {imagename}, {prefix} replacements.
            {raw_username} can be used for the original, not escaped username
            (may contain uppercase, special characters).
            The default name_template is <prefix>-<username> for backward compatibility.
            """),
    )

    client_kwargs = Dict(
        config=True,
        help=
        "Extra keyword arguments to pass to the docker.Client constructor.",
    )

    volumes = Dict(
        config=True,
        help=dedent("""
            Map from host file/directory to container (guest) file/directory
            mount point and (optionally) a mode. When specifying the
            guest mount point (bind) for the volume, you may use a
            dict or str. If a str, then the volume will default to a
            read-write (mode="rw"). With a dict, the bind is
            identified by "bind" and the "mode" may be one of "rw"
            (default), "ro" (read-only), "z" (public/shared SELinux
            volume label), and "Z" (private/unshared SELinux volume
            label).

            If format_volume_name is not set,
            default_format_volume_name is used for naming volumes.
            In this case, if you use {username} in either the host or guest
            file/directory path, it will be replaced with the current
            user's name.
            """),
    )

    move_certs_image = Unicode(
        "busybox:1.30.1",
        config=True,
        help="""The image used to stage internal SSL certificates.

        Busybox is used because we just need an empty container
        that waits while we stage files into the volume via .put_archive.
        """)

    @gen.coroutine
    def move_certs(self, paths):
        self.log.info("Staging internal ssl certs for %s", self._log_name)
        yield self.pull_image(self.move_certs_image)
        # create the volume
        volume_name = self.format_volume_name(self.certs_volume_name, self)
        # create volume passes even if it already exists
        self.log.info("Creating ssl volume %s for %s", volume_name,
                      self._log_name)
        yield self.docker('create_volume', volume_name)

        # create a tar archive of the internal cert files
        # docker.put_archive takes a tarfile and a running container
        # and unpacks the archive into the container
        nb_paths = {}
        tar_buf = BytesIO()
        archive = TarFile(fileobj=tar_buf, mode='w')
        for key, hub_path in paths.items():
            fname = os.path.basename(hub_path)
            nb_paths[key] = '/certs/' + fname
            with open(hub_path, 'rb') as f:
                content = f.read()
            tarinfo = TarInfo(name=fname)
            tarinfo.size = len(content)
            tarinfo.mtime = os.stat(hub_path).st_mtime
            tarinfo.mode = 0o644
            archive.addfile(tarinfo, BytesIO(content))
        archive.close()
        tar_buf.seek(0)

        # run a container to stage the certs,
        # mounting the volume at /certs/
        host_config = self.client.create_host_config(binds={
            volume_name: {
                "bind": "/certs",
                "mode": "rw"
            },
        }, )
        container = yield self.docker(
            'create_container',
            self.move_certs_image,
            volumes=["/certs"],
            host_config=host_config,
        )

        container_id = container['Id']
        self.log.debug(
            "Container %s is creating ssl certs for %s",
            container_id[:12],
            self._log_name,
        )
        # start the container
        yield self.docker('start', container_id)
        # stage the archive to the container
        try:
            yield self.docker(
                'put_archive',
                container=container_id,
                path='/certs',
                data=tar_buf,
            )
        finally:
            yield self.docker('remove_container', container_id)
        return nb_paths

    certs_volume_name = Unicode("{prefix}ssl-{username}",
                                config=True,
                                help="""Volume name

        The same string-templating applies to this
        as other volume names.
        """)

    read_only_volumes = Dict(
        config=True,
        help=dedent("""
            Map from host file/directory to container file/directory.
            Volumes specified here will be read-only in the container.

            If format_volume_name is not set,
            default_format_volume_name is used for naming volumes.
            In this case, if you use {username} in either the host or guest
            file/directory path, it will be replaced with the current
            user's name.
            """),
    )

    format_volume_name = Any(
        help=
        """Any callable that accepts a string template and a DockerSpawner instance as parameters in that order and returns a string.

        Reusable implementations should go in dockerspawner.VolumeNamingStrategy, tests should go in ...
        """).tag(config=True)

    @default("format_volume_name")
    def _get_default_format_volume_name(self):
        return default_format_volume_name

    use_docker_client_env = Bool(
        True,
        config=True,
        help="DEPRECATED. Docker env variables are always used if present.",
    )

    @observe("use_docker_client_env")
    def _client_env_changed(self):
        self.log.warning(
            "DockerSpawner.use_docker_client_env is deprecated and ignored."
            "  Docker environment variables are always used if defined.")

    tls_config = Dict(
        config=True,
        help="""Arguments to pass to docker TLS configuration.

        See docker.client.TLSConfig constructor for options.
        """,
    )
    tls = tls_verify = tls_ca = tls_cert = tls_key = tls_assert_hostname = Any(
        config=True,
        help=
        """DEPRECATED. Use DockerSpawner.tls_config dict to set any TLS options.""",
    )

    @observe("tls", "tls_verify", "tls_ca", "tls_cert", "tls_key",
             "tls_assert_hostname")
    def _tls_changed(self, change):
        self.log.warning(
            "%s config ignored, use %s.tls_config dict to set full TLS configuration.",
            change.name,
            self.__class__.__name__,
        )

    remove_containers = Bool(
        False,
        config=True,
        help="DEPRECATED in DockerSpawner 0.10. Use .remove")

    @observe("remove_containers")
    def _deprecate_remove_containers(self, change):
        # preserve remove_containers alias to .remove
        self.remove = change.new

    remove = Bool(
        False,
        config=True,
        help="""
        If True, delete containers when servers are stopped.

        This will destroy any data in the container not stored in mounted volumes.
        """,
    )

    @property
    def will_resume(self):
        # indicate that we will resume,
        # so JupyterHub >= 0.7.1 won't cleanup our API token
        return not self.remove

    extra_create_kwargs = Dict(
        config=True, help="Additional args to pass for container create")
    extra_host_config = Dict(
        config=True,
        help="Additional args to create_host_config for container create")

    _docker_safe_chars = set(string.ascii_letters + string.digits + "-")
    _docker_escape_char = "_"

    hub_ip_connect = Unicode(
        config=True,
        help=dedent("""
            If set, DockerSpawner will configure the containers to use
            the specified IP to connect the hub api.  This is useful
            when the hub_api is bound to listen on all ports or is
            running inside of a container.
            """),
    )

    @observe("hub_ip_connect")
    def _ip_connect_changed(self, change):
        if jupyterhub.version_info >= (0, 8):
            warnings.warn(
                "DockerSpawner.hub_ip_connect is no longer needed with JupyterHub 0.8."
                "  Use JupyterHub.hub_connect_ip instead.",
                DeprecationWarning,
            )

    use_internal_ip = Bool(
        False,
        config=True,
        help=dedent("""
            Enable the usage of the internal docker ip. This is useful if you are running
            jupyterhub (as a container) and the user containers within the same docker network.
            E.g. by mounting the docker socket of the host into the jupyterhub container.
            Default is True if using a docker network, False if bridge or host networking is used.
            """),
    )

    @default("use_internal_ip")
    def _default_use_ip(self):
        # setting network_name to something other than bridge or host implies use_internal_ip
        if self.network_name not in {"bridge", "host"}:
            return True

        else:
            return False

    use_internal_hostname = Bool(
        False,
        config=True,
        help=dedent("""
            Use the docker hostname for connecting.

            instead of an IP address.
            This should work in general when using docker networks,
            and must be used when internal_ssl is enabled.
            It is enabled by default if internal_ssl is enabled.
            """),
    )

    @default("use_internal_hostname")
    def _default_use_hostname(self):
        # FIXME: replace getattr with self.internal_ssl
        # when minimum jupyterhub is 1.0
        return getattr(self, 'internal_ssl', False)

    links = Dict(
        config=True,
        help=dedent("""
            Specify docker link mapping to add to the container, e.g.

                links = {'jupyterhub': 'jupyterhub'}

            If the Hub is running in a Docker container,
            this can simplify routing because all traffic will be using docker hostnames.
            """),
    )

    network_name = Unicode(
        "bridge",
        config=True,
        help=dedent("""
            Run the containers on this docker network.
            If it is an internal docker network, the Hub should be on the same network,
            as internal docker IP addresses will be used.
            For bridge networking, external ports will be bound.
            """),
    )

    @property
    def tls_client(self):
        """A tuple consisting of the TLS client certificate and key if they
        have been provided, otherwise None.

        """
        if self.tls_cert and self.tls_key:
            return (self.tls_cert, self.tls_key)

        return None

    @property
    def volume_mount_points(self):
        """
        Volumes are declared in docker-py in two stages.  First, you declare
        all the locations where you're going to mount volumes when you call
        create_container.
        Returns a sorted list of all the values in self.volumes or
        self.read_only_volumes.
        """
        return sorted([value["bind"] for value in self.volume_binds.values()])

    @property
    def volume_binds(self):
        """
        The second half of declaring a volume with docker-py happens when you
        actually call start().  The required format is a dict of dicts that
        looks like:

        {
            host_location: {'bind': container_location, 'mode': 'rw'}
        }
        mode may be 'ro', 'rw', 'z', or 'Z'.

        """
        binds = self._volumes_to_binds(self.volumes, {})
        read_only_volumes = {}
        # FIXME: replace getattr with self.internal_ssl
        # when minimum jupyterhub is 1.0
        if getattr(self, 'internal_ssl', False):
            # add SSL volume as read-only
            read_only_volumes[self.certs_volume_name] = '/certs'
        read_only_volumes.update(self.read_only_volumes)
        return self._volumes_to_binds(read_only_volumes, binds, mode="ro")

    _escaped_name = None

    @property
    def escaped_name(self):
        """Escape the username so it's safe for docker objects"""
        if self._escaped_name is None:
            self._escaped_name = self._escape(self.user.name)
        return self._escaped_name

    def _escape(self, s):
        """Escape a string to docker-safe characters"""
        return escape(
            s,
            safe=self._docker_safe_chars,
            escape_char=self._docker_escape_char,
        )

    object_id = Unicode(allow_none=True)

    def template_namespace(self):
        escaped_image = self.image.replace("/", "_")
        server_name = getattr(self, "name", "")
        return {
            "username": self.escaped_name,
            "safe_username": self.user.name,
            "raw_username": self.user.name,
            "imagename": escaped_image,
            "servername": server_name,
            "prefix": self.prefix,
        }

    @property
    def object_name(self):
        """Render the name of our container/service using name_template"""
        return self.name_template.format(**self.template_namespace())

    def load_state(self, state):
        super(DockerSpawner, self).load_state(state)
        if "container_id" in state:
            # backward-compatibility for dockerspawner < 0.10
            self.object_id = state.get("container_id")
        else:
            self.object_id = state.get("object_id", "")

    def get_state(self):
        state = super(DockerSpawner, self).get_state()
        if self.object_id:
            state["object_id"] = self.object_id
        return state

    def _public_hub_api_url(self):
        proto, path = self.hub.api_url.split("://", 1)
        ip, rest = path.split(":", 1)
        return "{proto}://{ip}:{rest}".format(proto=proto,
                                              ip=self.hub_ip_connect,
                                              rest=rest)

    def _env_keep_default(self):
        """Don't inherit any env from the parent process"""
        return []

    def get_args(self):
        args = super().get_args()
        if self.hub_ip_connect:
            # JupyterHub 0.7 specifies --hub-api-url
            # on the command-line, which is hard to update
            for idx, arg in enumerate(list(args)):
                if arg.startswith("--hub-api-url="):
                    args.pop(idx)
                    break

            args.append("--hub-api-url=%s" % self._public_hub_api_url())
        return args

    def _docker(self, method, *args, **kwargs):
        """wrapper for calling docker methods

        to be passed to ThreadPoolExecutor
        """
        m = getattr(self.client, method)
        return m(*args, **kwargs)

    def docker(self, method, *args, **kwargs):
        """Call a docker method in a background thread

        returns a Future
        """
        return self.executor.submit(self._docker, method, *args, **kwargs)

    @gen.coroutine
    def poll(self):
        """Check for my id in `docker ps`"""
        container = yield self.get_object()
        if not container:
            self.log.warning("Container not found: %s", self.container_name)
            return 0

        container_state = container["State"]
        self.log.debug("Container %s status: %s", self.container_id[:7],
                       pformat(container_state))

        if container_state["Running"]:
            return None

        else:
            return ("ExitCode={ExitCode}, "
                    "Error='{Error}', "
                    "FinishedAt={FinishedAt}".format(**container_state))

    @gen.coroutine
    def get_object(self):
        self.log.debug("Getting container '%s'", self.object_name)
        try:
            obj = yield self.docker("inspect_%s" % self.object_type,
                                    self.object_name)
            self.object_id = obj[self.object_id_key]
        except APIError as e:
            if e.response.status_code == 404:
                self.log.info("%s '%s' is gone", self.object_type.title(),
                              self.object_name)
                obj = None
                # my container is gone, forget my id
                self.object_id = ""
            elif e.response.status_code == 500:
                self.log.info(
                    "%s '%s' is on unhealthy node",
                    self.object_type.title(),
                    self.object_name,
                )
                obj = None
                # my container is unhealthy, forget my id
                self.object_id = ""
            else:
                raise

        return obj

    @gen.coroutine
    def get_command(self):
        """Get the command to run (full command + args)"""
        if self._user_set_cmd:
            cmd = self.cmd
        else:
            image_info = yield self.docker("inspect_image", self.image)
            cmd = image_info["Config"]["Cmd"]
        return cmd + self.get_args()

    @gen.coroutine
    def remove_object(self):
        self.log.info("Removing %s %s", self.object_type, self.object_id)
        # remove the container, as well as any associated volumes
        try:
            yield self.docker("remove_" + self.object_type,
                              self.object_id,
                              v=True)
        except docker.errors.APIError as e:
            if e.status_code == 409:
                self.log.debug("Already removing %s: %s", self.object_type,
                               self.object_id)
            else:
                raise

    @gen.coroutine
    def check_image_whitelist(self, image):
        image_whitelist = self._get_image_whitelist()
        if not image_whitelist:
            return image
        if image not in image_whitelist:
            raise web.HTTPError(
                400,
                "Image %s not in whitelist: %s" %
                (image, ', '.join(image_whitelist)),
            )
        # resolve image alias to actual image name
        return image_whitelist[image]

    @default('ssl_alt_names')
    def _get_ssl_alt_names(self):
        return ['DNS:' + self.internal_hostname]

    @gen.coroutine
    def create_object(self):
        """Create the container/service object"""

        create_kwargs = dict(
            image=self.image,
            environment=self.get_env(),
            volumes=self.volume_mount_points,
            name=self.container_name,
            command=(yield self.get_command()),
        )

        # ensure internal port is exposed
        create_kwargs["ports"] = {"%i/tcp" % self.port: None}

        create_kwargs.update(self.extra_create_kwargs)

        # build the dictionary of keyword arguments for host_config
        host_config = dict(binds=self.volume_binds, links=self.links)

        if getattr(self, "mem_limit", None) is not None:
            # If jupyterhub version > 0.7, mem_limit is a traitlet that can
            # be directly configured. If so, use it to set mem_limit.
            # this will still be overriden by extra_host_config
            host_config["mem_limit"] = self.mem_limit

        if not self.use_internal_ip:
            host_config["port_bindings"] = {self.port: (self.host_ip, )}
        host_config.update(self.extra_host_config)
        host_config.setdefault("network_mode", self.network_name)

        self.log.debug("Starting host with config: %s", host_config)

        host_config = self.client.create_host_config(**host_config)
        create_kwargs.setdefault("host_config", {}).update(host_config)

        # create the container
        obj = yield self.docker("create_container", **create_kwargs)
        return obj

    @gen.coroutine
    def start_object(self):
        """Actually start the container/service

        e.g. calling `docker start`
        """
        return self.docker("start", self.container_id)

    @gen.coroutine
    def stop_object(self):
        """Stop the container/service

        e.g. calling `docker stop`. Does not remove the container.
        """
        return self.docker("stop", self.container_id)

    @gen.coroutine
    def pull_image(self, image):
        """Pull the image, if needed

        - pulls it unconditionally if pull_policy == 'always'
        - otherwise, checks if it exists, and
          - raises if pull_policy == 'never'
          - pulls if pull_policy == 'ifnotpresent'
        """
        # docker wants to split repo:tag
        if ':' in image:
            repo, tag = image.split(':', 1)
        else:
            repo = image
            tag = 'latest'

        if self.pull_policy.lower() == 'always':
            # always pull
            self.log.info("pulling %s", image)
            yield self.docker('pull', repo, tag)
            # done
            return
        try:
            # check if the image is present
            yield self.docker('inspect_image', image)
        except docker.errors.NotFound:
            if self.pull_policy == "never":
                # never pull, raise because there is no such image
                raise
            elif self.pull_policy == "ifnotpresent":
                # not present, pull it for the first time
                self.log.info("pulling image %s", image)
                yield self.docker('pull', repo, tag)

    @gen.coroutine
    def start(self,
              image=None,
              extra_create_kwargs=None,
              extra_host_config=None):
        """Start the single-user server in a docker container.

        Additional arguments to create/host config/etc. can be specified
        via .extra_create_kwargs and .extra_host_config attributes.

        If the container exists and `c.DockerSpawner.remove` is true, then
        the container is removed first. Otherwise, the existing containers
        will be restarted.
        """

        if image:
            self.log.warning("Specifying image via .start args is deprecated")
            self.image = image
        if extra_create_kwargs:
            self.log.warning(
                "Specifying extra_create_kwargs via .start args is deprecated")
            self.extra_create_kwargs.update(extra_create_kwargs)
        if extra_host_config:
            self.log.warning(
                "Specifying extra_host_config via .start args is deprecated")
            self.extra_host_config.update(extra_host_config)

        # image priority:
        # 1. user options (from spawn options form)
        # 2. self.image from config
        image_option = self.user_options.get('image')
        if image_option:
            # save choice in self.image
            self.image = yield self.check_image_whitelist(image_option)

        image = self.image
        yield self.pull_image(image)

        obj = yield self.get_object()
        if obj and self.remove:
            self.log.warning(
                "Removing %s that should have been cleaned up: %s (id: %s)",
                self.object_type,
                self.object_name,
                self.object_id[:7],
            )
            yield self.remove_object()

            obj = None

        if obj is None:
            obj = yield self.create_object()
            self.object_id = obj[self.object_id_key]
            self.log.info(
                "Created %s %s (id: %s) from image %s",
                self.object_type,
                self.object_name,
                self.object_id[:7],
                self.image,
            )

        else:
            self.log.info(
                "Found existing %s %s (id: %s)",
                self.object_type,
                self.object_name,
                self.object_id[:7],
            )
            # Handle re-using API token.
            # Get the API token from the environment variables
            # of the running container:
            for line in obj["Config"]["Env"]:
                if line.startswith(
                    ("JPY_API_TOKEN=", "JUPYTERHUB_API_TOKEN=")):
                    self.api_token = line.split("=", 1)[1]
                    break

        # TODO: handle unpause
        self.log.info(
            "Starting %s %s (id: %s)",
            self.object_type,
            self.object_name,
            self.container_id[:7],
        )

        # start the container
        yield self.start_object()

        ip, port = yield self.get_ip_and_port()
        if jupyterhub.version_info < (0, 7):
            # store on user for pre-jupyterhub-0.7:
            self.user.server.ip = ip
            self.user.server.port = port
        # jupyterhub 0.7 prefers returning ip, port:
        return (ip, port)

    @property
    def internal_hostname(self):
        """Return our hostname

        used with internal SSL
        """
        return self.container_name

    @gen.coroutine
    def get_ip_and_port(self):
        """Queries Docker daemon for container's IP and port.

        If you are using network_mode=host, you will need to override
        this method as follows::

            @gen.coroutine
            def get_ip_and_port(self):
                return self.host_ip, self.port

        You will need to make sure host_ip and port
        are correct, which depends on the route to the container
        and the port it opens.
        """
        if self.use_internal_hostname:
            # internal ssl uses hostnames,
            # required for domain-name matching with internal SSL
            # TODO: should we always do this?
            # are there any cases where internal_ip works
            # and internal_hostname doesn't?
            ip = self.internal_hostname
            port = self.port
        elif self.use_internal_ip:
            resp = yield self.docker("inspect_container", self.container_id)
            network_settings = resp["NetworkSettings"]
            if "Networks" in network_settings:
                ip = self.get_network_ip(network_settings)
            else:  # Fallback for old versions of docker (<1.9) without network management
                ip = network_settings["IPAddress"]
            port = self.port
        else:
            resp = yield self.docker("port", self.container_id, self.port)
            if resp is None:
                raise RuntimeError("Failed to get port info for %s" %
                                   self.container_id)

            ip = resp[0]["HostIp"]
            port = int(resp[0]["HostPort"])

        if ip == "0.0.0.0":
            ip = urlparse(self.client.base_url).hostname
            if ip == "localnpipe":
                ip = "localhost"

        return ip, port

    def get_network_ip(self, network_settings):
        networks = network_settings["Networks"]
        if self.network_name not in networks:
            raise Exception(
                "Unknown docker network '{network}'."
                " Did you create it with `docker network create <name>`?".
                format(network=self.network_name))

        network = networks[self.network_name]
        ip = network["IPAddress"]
        return ip

    @gen.coroutine
    def stop(self, now=False):
        """Stop the container

        Will remove the container if `c.DockerSpawner.remove` is `True`.

        Consider using pause/unpause when docker-py adds support.
        """
        self.log.info(
            "Stopping %s %s (id: %s)",
            self.object_type,
            self.object_name,
            self.object_id[:7],
        )
        yield self.stop_object()

        if self.remove:
            yield self.remove_object()

        self.clear_state()

    def _volumes_to_binds(self, volumes, binds, mode="rw"):
        """Extract the volume mount points from volumes property.

        Returns a dict of dict entries of the form::

            {'/host/dir': {'bind': '/guest/dir': 'mode': 'rw'}}
        """
        def _fmt(v):
            return self.format_volume_name(v, self)

        for k, v in volumes.items():
            m = mode
            if isinstance(v, dict):
                if "mode" in v:
                    m = v["mode"]
                v = v["bind"]
            binds[_fmt(k)] = {"bind": _fmt(v), "mode": m}
        return binds
示例#25
0
文件: axes.py 项目: hohn/bqplot
class Axis(BaseAxis):
    """A line axis.

    A line axis is the visual representation of a numerical or date scale.

    Attributes
    ----------

    icon: string (class-level attribute)
        The font-awesome icon name for this object.
    axis_types: dict (class-level attribute)
        A registry of existing axis types.
    orientation: {'horizontal', 'vertical'}
        The orientation of the axis, either vertical or horizontal
    side: {'bottom', 'top', 'left', 'right'} or None (default: None)
        The side of the axis, either bottom, top, left or right.
    label: string (default: '')
        The axis label
    tick_format: string or None (default: '')
        The tick format for the axis.
    scale: Scale
        The scale represented by the axis
    num_ticks: int or None (default: None)
        If tick_values is None, number of ticks
    tick_values: numpy.ndarray or None (default: [])
        Tick values for the axis
    offset: dict (default: {})
        Contains a scale and a value {'scale': scale or None, 'value': value of the offset}
        If offset['scale'] is None, the corresponding figure scale is used
        instead.
    label_location: {'middle', 'start', 'end'}
        The location of the label along the axis, one of 'start', 'end' or
        'middle'
    label_color: Color or None (default: None)
        The color of the axis label
    grid_lines: {'none', 'solid', 'dashed'}
        The display of the grid lines
    grid_color: Color or None (default: None)
        The color of the grid lines
    color: Color or None (default: None)
        The color of the line
    label_offset: string or None (default: None)
        Label displacement from the axis line. Units allowed are 'em', 'px'
        and 'ex'. Positive values are away from the figure and negative
        values are towards the figure with resepect to the axis line.
    visible: bool (default: True)
        A visibility toggle for the axis
    """
    icon = 'fa-arrows'
    orientation = Enum(['horizontal', 'vertical'],
                       default_value='horizontal',
                       sync=True)
    side = Enum(['bottom', 'top', 'left', 'right'],
                allow_none=True,
                default_value=None,
                sync=True)
    label = Unicode(sync=True)
    grid_lines = Enum(['none', 'solid', 'dashed'],
                      default_value='none',
                      sync=True)
    tick_format = Unicode(None, allow_none=True, sync=True)
    scale = Instance(Scale, sync=True, **widget_serialization)
    num_ticks = Int(default_value=None, sync=True, allow_none=True)
    tick_values = NdArray(sync=True, allow_none=True)
    offset = Dict(sync=True, **widget_serialization)
    label_location = Enum(['middle', 'start', 'end'],
                          default_value='middle',
                          sync=True)
    label_color = Color(None, sync=True, allow_none=True)
    grid_color = Color(None, sync=True, allow_none=True)
    color = Color(None, sync=True, allow_none=True)
    label_offset = Unicode(default_value=None, sync=True, allow_none=True)

    visible = Bool(True, sync=True)

    _view_name = Unicode('Axis', sync=True)
    _view_module = Unicode('nbextensions/bqplot/Axis', sync=True)
    _model_name = Unicode('AxisModel', sync=True)
    _model_module = Unicode('nbextensions/bqplot/AxisModel', sync=True)
    _ipython_display_ = None  # We cannot display an axis outside of a figure.
示例#26
0
class Process(HasTraits):
    """ Process (top-level workflow) information """

    type_ = Enum(["Observation", "Simulation", "Other"], "Other")
    subtype = Unicode("")
    id_ = Int()
示例#27
0
class EventSource(Component):
    """
    Parent class for EventFileReaders of different sources.

    A new EventFileReader should be created for each type of event file read
    into ctapipe, e.g. sim_telarray files are read by the `HESSIOEventSource`.

    EventFileReader provides a common high-level interface for accessing event
    information from different data sources (simulation or different camera
    file formats). Creating an EventFileReader for a new
    file format ensures that data can be accessed in a common way,
    irregardless of the file format.

    EventFileReader itself is an abstract class. To use an EventFileReader you
    must use a subclass that is relevant for the file format you
    are reading (for example you must use
    `ctapipe.io.hessiofilereader.HESSIOEventSource` to read a hessio format
    file). Alternatively you can use
    `ctapipe.io.eventfilereader.EventSourceFactory` to automatically
    select the correct EventFileReader subclass for the file format you wish
    to read.

    To create an instance of an EventFileReader you must pass the traitlet
    configuration (containing the input_url) and the
    `ctapipe.core.tool.Tool`. Therefore from inside a Tool you would do:

    >>> event_source = EventSource(self.config, self)

    An example of how to use `ctapipe.core.tool.Tool` and
    `ctapipe.io.eventfilereader.EventSourceFactory` can be found in
    ctapipe/examples/calibration_pipeline.py.

    However if you are not inside a Tool, you can still create an instance and
    supply an input_url via:

    >>> event_source = EventSource( input_url="/path/to/file")

    To loop through the events in a file:

    >>> event_source = EventSource( input_url="/path/to/file")
    >>> for event in event_source:
    >>>    print(event.count)

    **NOTE**: Every time a new loop is started through the event_source, it restarts
    from the first event.

    Alternatively one can use EventFileReader in a `with` statement to ensure
    the correct cleanups are performed when you are finished with the event_source:

    >>> with EventSource( input_url="/path/to/file") as event_source:
    >>>    for event in event_source:
    >>>       print(event.count)

    **NOTE**: The "event" that is returned from the generator is a pointer.
    Any operation that progresses that instance of the generator further will
    change the data pointed to by "event". If you wish to ensure a particular
    event is kept, you should perform a `event_copy = copy.deepcopy(event)`.


    Attributes
    ----------
    input_url : str
        Path to the input event file.
    max_events : int
        Maximum number of events to loop through in generator
    metadata : dict
        A dictionary containing the metadata of the file. This could include:
        * is_simulation (bool indicating if the file contains simulated events)
        * Telescope:Camera names (list if file contains multiple)
        * Information in the file header
        * Observation ID
    """

    input_url = Unicode(
        '', help='Path to the input file containing events.').tag(config=True)
    max_events = Int(
        None,
        allow_none=True,
        help='Maximum number of events that will be read from the file').tag(
            config=True)

    allowed_tels = Set(
        help=('list of allowed tel_ids, others will be ignored. '
              'If left empty, all telescopes in the input stream '
              'will be included')).tag(config=True)

    def __init__(self, config=None, tool=None, **kwargs):
        """
        Class to handle generic input files. Enables obtaining the "source"
        generator, regardless of the type of file (either hessio or camera
        file).

        Parameters
        ----------
        config : traitlets.loader.Config
            Configuration specified by config file or cmdline arguments.
            Used to set traitlet values.
            Set to None if no configuration to pass.
        tool : ctapipe.core.Tool
            Tool executable that is calling this component.
            Passes the correct logger to the component.
            Set to None if no Tool to pass.
        kwargs
        """
        super().__init__(config=config, parent=tool, **kwargs)

        self.metadata = dict(is_simulation=False)

        if not exists(self.input_url):
            raise FileNotFoundError("file path does not exist: '{}'".format(
                self.input_url))
        self.log.info("INPUT PATH = {}".format(self.input_url))

        if self.max_events:
            self.log.info("Max events being read = {}".format(self.max_events))

        Provenance().add_input_file(self.input_url, role='dl0.sub.evt')

    @staticmethod
    @abstractmethod
    def is_compatible(file_path):
        """
        Abstract method to be defined in child class.

        Perform a set of checks to see if the input file is compatible
        with this file event_source.

        Parameters
        ----------
        file_path : str
            File path to the event file.

        Returns
        -------
        compatible : bool
            True if file is compatible, False if it is incompatible
        """

    @property
    def is_stream(self):
        """
        Bool indicating if input is a stream. If it is then it is incompatible
        with `ctapipe.io.eventseeker.EventSeeker`.

        TODO: Define a method to detect if it is a stream

        Returns
        -------
        bool
            If True, then input is a stream.
        """
        return False

    @abstractmethod
    def _generator(self):
        """
        Abstract method to be defined in child class.

        Generator where the filling of the `ctapipe.io.containers` occurs.

        Returns
        -------
        generator
        """

    def __iter__(self):
        """
        Generator that iterates through `_generator`, but keeps track of
        `self.max_events`.

        Returns
        -------
        generator
        """
        for event in self._generator():
            if self.max_events and event.count >= self.max_events:
                break
            yield event

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        pass
示例#28
0
class Select(_Selection):
    """Listbox that only allows one item to be selected at any given time."""
    _view_name = Unicode('SelectView').tag(sync=True)
    _model_name = Unicode('SelectModel').tag(sync=True)
    rows = Int(5, help="The number of rows to display.").tag(sync=True)
示例#29
0
class Circle(Path):
    _view_name = Unicode('LeafletCircleView').tag(sync=True)
    _model_name = Unicode('LeafletCircleModel').tag(sync=True)

    location = List(def_loc).tag(sync=True)
    radius = Int(1000, help="radius of circle in meters").tag(sync=True)
示例#30
0
class Viewer(widgets.DOMWidget):
    """
    Generic object for viewing and labeling Candidate objects in their rendered Contexts.
    """
    _view_name         = Unicode('ViewerView').tag(sync=True)
    _view_module       = Unicode('viewer').tag(sync=True)
    cids               = List().tag(sync=True)
    html               = Unicode('<h3>Error!</h3>').tag(sync=True)
    _labels_serialized = Unicode().tag(sync=True)
    _selected_cid      = Int().tag(sync=True)

    def __init__(self, candidates, session, gold=[], n_per_page=3, height=225, annotator_name=None):
        """
        Initializes a Viewer.

        The Viewer uses the keyword argument annotator_name to define a AnnotatorLabelKey with that name.

        :param candidates: A Python container of Candidates (e.g., not a CandidateSet, but candidate_set.candidates)
        :param session: The SnorkelSession for the database backend
        :param gold: Optional, Python container of Candidates that are know to have positive labels
        :param n_per_page: Optional, number of Contexts to display per page
        :param height: Optional, the height in pixels of the Viewer
        :param annotator_name: Name of the human using the Viewer, for saving their work. Defaults to system username.
        """
        super(Viewer, self).__init__()
        self.session = session

        # By default, use the username as annotator name
        name = annotator_name if annotator_name is not None else getpass.getuser()

        # Sets up the AnnotationKey to use
        self.annotator = self.session.query(GoldLabelKey).filter(GoldLabelKey.name == name).first()
        if self.annotator is None:
            self.annotator = GoldLabelKey(name=name)
            session.add(self.annotator)
            session.commit()

        # Viewer display configs
        self.n_per_page = n_per_page
        self.height     = height

        # Note that the candidates are not necessarily commited to the DB, so they *may not have* non-null ids
        # Hence, we index by their position in this list
        # We get the sorted candidates and all contexts required, either from unary or binary candidates
        self.gold       = list(gold)
        self.candidates = sorted(list(candidates), key=lambda c : c[0].char_start)
        self.contexts   = list(set(c[0].get_parent() for c in self.candidates + self.gold))

        # If committed, sort contexts by id
        try:
            self.contexts = sorted(self.contexts, key=lambda c : c.id)
        except:
            pass

        # Loads existing annotations
        self.annotations        = [None] * len(self.candidates)
        self.annotations_stable = [None] * len(self.candidates)
        init_labels_serialized  = []
        for i, candidate in enumerate(self.candidates):

            # First look for the annotation in the primary annotations table
            existing_annotation = self.session.query(GoldLabel) \
                .filter(GoldLabel.key == self.annotator) \
                .filter(GoldLabel.candidate == candidate) \
                .first()
            if existing_annotation is not None:
                self.annotations[i] = existing_annotation
                if existing_annotation.value == 1:
                    value_string = 'true'
                elif existing_annotation.value == -1:
                    value_string = 'false'
                else:
                    raise ValueError(str(existing_annotation) +
                                     ' has value not in {1, -1}, which Viewer does not support.')
                init_labels_serialized.append(str(i) + '~~' + value_string)

                # If the annotator label is in the main table, also get its stable version
                context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                existing_annotation_stable = self.session.query(StableLabel) \
                                                 .filter(StableLabel.context_stable_ids == context_stable_ids)\
                                                 .filter(StableLabel.annotator_name == name).one_or_none()

                # If stable version is not available, create it here
                # NOTE: This is for versioning issues, should be removed?
                if existing_annotation_stable is None:
                    context_stable_ids         = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                    existing_annotation_stable = StableLabel(context_stable_ids=context_stable_ids,\
                                                             annotator_name=self.annotator.name,\
                                                             split=candidate.split,\
                                                             value=existing_annotation.value)
                    self.session.add(existing_annotation_stable)
                    self.session.commit()

                self.annotations_stable[i] = existing_annotation_stable

        self._labels_serialized = ','.join(init_labels_serialized)

        # Configures message handler
        self.on_msg(self.handle_label_event)

        # display js, construct html and pass on to widget model
        self.render()

    def _tag_span(self, html, cids, gold=False):
        """
        Create the span around a segment of the context associated with one or more candidates / gold annotations
        """
        classes  = ['candidate'] if len(cids) > 0 else []
        classes += ['gold-annotation'] if gold else []
        classes += list(map(str, cids))

        # Scrub for non-ascii characters; replace with ?
        return u'<span class="{classes}">{html}</span>'.format(classes=' '.join(classes), html=html)

    def _tag_context(self, context, candidates, gold):
        """Given the raw context, tag the spans using the generic _tag_span method"""
        raise NotImplementedError()

    def render(self):
        """Renders viewer pane"""
        cids = []

        # Iterate over pages of contexts
        pid   = 0
        pages = []
        N     = len(self.contexts)
        for i in range(0, N, self.n_per_page):
            page_cids = []
            lis       = []
            for j in range(i, min(N, i + self.n_per_page)):
                context = self.contexts[j]

                # Get the candidates in this context
                candidates = [c for c in self.candidates if c[0].get_parent() == context]
                gold = [g for g in self.gold if g.get_parent() == context]

                # Construct the <li> and page view elements
                li_data = self._tag_context(context, candidates, gold)
                lis.append(LI_HTML.format(data=li_data, context_id=context.id))
                page_cids.append([self.candidates.index(c) for c in candidates])

            # Assemble the page...
            pages.append(PAGE_HTML.format(
                pid=pid,
                data=''.join(lis),
                etc=' style="display: block;"' if i == 0 else ''
            ))
            cids.append(page_cids)
            pid += 1

        # Render in primary Viewer template
        self.cids = cids
        self.html = open(os.path.join(directory, 'viewer.html')).read() % (self.height, ''.join(pages))
        display(Javascript(open(os.path.join(directory, 'viewer.js')).read()))

    def _get_labels(self):
        """
        De-serialize labels from Javascript widget, map to internal candidate id, and return as list of tuples
        """
        LABEL_MAP = {'true':1, 'false':-1}
        labels    = [x.split('~~') for x in self._labels_serialized.split(',') if len(x) > 0]
        vals      = [(int(cid), LABEL_MAP.get(l, 0)) for cid,l in labels]
        return vals

    def handle_label_event(self, _, content, buffers):
        """
        Handles label event by persisting new label
        """
        if content.get('event', '') == 'set_label':
            cid = content.get('cid', None)
            value = content.get('value', None)
            if value is True:
                value = 1
            elif value is False:
                value = -1
            else:
                raise ValueError('Unexpected label returned from widget: ' + str(value) +
                                 '. Expected values are True and False.')

            # If label already exists, just update value (in both AnnotatorLabel and StableLabel)
            if self.annotations[cid] is not None:
                if self.annotations[cid].value != value:
                    self.annotations[cid].value        = value
                    self.annotations_stable[cid].value = value
                    self.session.commit()

            # Otherwise, create a AnnotatorLabel *and a StableLabel*
            else:
                candidate = self.candidates[cid]

                # Create AnnotatorLabel
                self.annotations[cid] = GoldLabel(key=self.annotator, candidate=candidate, value=value)
                self.session.add(self.annotations[cid])

                # Create StableLabel
                context_stable_ids           = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                self.annotations_stable[cid] = StableLabel(context_stable_ids=context_stable_ids,\
                                                           annotator_name=self.annotator.name,\
                                                           value=value,\
                                                           split=candidate.split)
                self.session.add(self.annotations_stable[cid])
                self.session.commit()

        elif content.get('event', '') == 'delete_label':
            cid = content.get('cid', None)
            self.session.delete(self.annotations[cid])
            self.annotations[cid] = None
            self.session.delete(self.annotations_stable[cid])
            self.annotations_stable[cid] = None
            self.session.commit()

    def get_selected(self):
        return self.candidates[self._selected_cid]