class PipelineProcessor(LoggingConfigurable): # ABC _type = None root_dir = Unicode(allow_none=True) enable_pipeline_info = Bool(config=True, default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true'), help="""Produces formatted logging of informational messages with durations (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)""") def __init__(self, root_dir, **kwargs): super(PipelineProcessor, self).__init__(**kwargs) self.root_dir = root_dir @property @abstractmethod def type(self): raise NotImplementedError() @abstractmethod def process(self, pipeline) -> PipelineProcessorResponse: raise NotImplementedError() @abstractmethod def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite): raise NotImplementedError() def log_pipeline_info(self, pipeline_name: str, action_clause: str, **kwargs): """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated log files to identify steps within a given pipeline and each of its operations. As a result, calls to this method should produce single-line entries in the log (no embedded newlines). Each entry is prefixed with the pipeline name. This functionality can be disabled by setting PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO). General logging should NOT use this method but use logger.<level>() statements directly. :param pipeline_name: str representing the name of the pipeline that is being executed :param action_clause: str representing the action that is being logged :param **kwargs: dict representing the keyword arguments. Recognized keywords include: operation_name: str representing the name of the operation applicable for this entry duration: float value representing the duration of the action being logged """ if self.enable_pipeline_info: duration = kwargs.get('duration') duration_clause = f"({duration:.3f} secs)" if duration else "" operation_name = kwargs.get('operation_name') op_clause = f":'{operation_name}'" if operation_name else "" self.log.info(f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}")
class SSLContext(Configurable): """Class used to create an SSL cert to authenticate the service with Jupyter""" keyfile = Unicode(os.getenv("JUPYTERHUB_SSL_KEYFILE", ""), help="SSL key, use with certfile").tag(config=True) certfile = Unicode(os.getenv("JUPYTERHUB_SSL_CERTFILE", ""), help="SSL cert, use with keyfile").tag(config=True) cafile = Unicode( os.getenv("JUPYTERHUB_SSL_CLIENT_CA", ""), help="SSL CA, use with keyfile and certfile").tag(config=True) def ssl_context(self): if self.keyfile and self.certfile and self.cafile: return make_ssl_context(self.keyfile, self.certfile, cafile=self.cafile, check_hostname=False) else: return None
class MetatabExporter(Exporter): template_path = List(['.']).tag(config=True, affects_environment=True) output_dir = Unicode(help='Output directory').tag(config=True) notebook_dir = Unicode(help='CWD in which notebook will be executed').tag(config=True) package_dir = Unicode(help='Directory in which to store generated package').tag(config=True) package_name = Unicode(help='Name of package to generate. Defaults to the Metatab Root.Name').tag(config=True) def __init__(self, config=None, **kw): # import pdb; pdb.set_trace(); super().__init__(config, **kw) self.log = kw.get('log', logging.getLogger(self.__class__.__name__)) def from_file(self, file_stream, resources=None, **kw): return super().from_file(file_stream, resources, **kw) def from_filename(self, filename, resources=None, **kw): if not self.notebook_dir: self.notebook_dir = dirname(abspath(filename)) return super().from_filename(filename, resources, **kw)
class PipelineProcessor(LoggingConfigurable): # ABC _type = None root_dir = Unicode(allow_none=True) component_registry: ComponentRegistry = ComponentRegistry() enable_pipeline_info = Bool( config=True, default_value=(os.getenv('ELYRA_ENABLE_PIPELINE_INFO', 'true').lower() == 'true'), help= """Produces formatted logging of informational messages with durations (default=True). (ELYRA_ENABLE_PIPELINE_INFO env var)""" ) def __init__(self, root_dir, **kwargs): super(PipelineProcessor, self).__init__(**kwargs) self.root_dir = root_dir @property @abstractmethod def type(self): raise NotImplementedError() def get_components(self): components = self.component_registry.get_all_components( processor_type=self.type) return components @abstractmethod def process(self, pipeline) -> PipelineProcessorResponse: raise NotImplementedError() @abstractmethod def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite): raise NotImplementedError() def log_pipeline_info(self, pipeline_name: str, action_clause: str, **kwargs): """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated log files to identify steps within a given pipeline and each of its operations. As a result, calls to this method should produce single-line entries in the log (no embedded newlines). Each entry is prefixed with the pipeline name. This functionality can be disabled by setting PipelineProcessor.enable_pipeline_info = False (or via env ELYRA_ENABLE_PIPELINE_INFO). General logging should NOT use this method but use logger.<level>() statements directly. :param pipeline_name: str representing the name of the pipeline that is being executed :param action_clause: str representing the action that is being logged :param **kwargs: dict representing the keyword arguments. Recognized keywords include: operation_name: str representing the name of the operation applicable for this entry duration: float value representing the duration of the action being logged """ if self.enable_pipeline_info: duration = kwargs.get('duration') duration_clause = f"({duration:.3f} secs)" if duration else "" operation_name = kwargs.get('operation_name') op_clause = f":'{operation_name}'" if operation_name else "" self.log.info( f"{self._type} '{pipeline_name}'{op_clause} - {action_clause} {duration_clause}" ) @staticmethod def _propagate_operation_inputs_outputs( pipeline: Pipeline, sorted_operations: List[Operation]) -> None: """ All previous operation outputs should be propagated throughout the pipeline. In order to process this recursively, the current operation's inputs should be combined from its parent's inputs (which, themselves are derived from the outputs of their parent) and its parent's outputs. """ for operation in sorted_operations: parent_io = set() # gathers inputs & outputs relative to parent for parent_operation_id in operation.parent_operations: parent_operation = pipeline.operations[parent_operation_id] if parent_operation.inputs: parent_io.update(parent_operation.inputs) if parent_operation.outputs: parent_io.update(parent_operation.outputs) if parent_io: parent_io.update(operation.inputs) operation.inputs = list(parent_io) @staticmethod def _sort_operations(operations_by_id: dict) -> List[Operation]: """ Sort the list of operations based on its dependency graph """ ordered_operations = [] for operation in operations_by_id.values(): PipelineProcessor._sort_operation_dependencies( operations_by_id, ordered_operations, operation) return ordered_operations @staticmethod def _sort_operation_dependencies(operations_by_id: dict, ordered_operations: list, operation: Operation) -> None: """ Helper method to the main sort operation function """ # Optimization: check if already processed if operation not in ordered_operations: # process each of the dependencies that needs to be executed first for parent_operation_id in operation.parent_operations: parent_operation = operations_by_id[parent_operation_id] if parent_operation not in ordered_operations: PipelineProcessor._sort_operation_dependencies( operations_by_id, ordered_operations, parent_operation) ordered_operations.append(operation)
class LTI11Authenticator(Authenticator): """ JupyterHub LTI 1.1 Authenticator which extends the ltiauthenticator.LTIAuthenticator class. Messages sent to this authenticator are sent from a tool consumer (TC), such as an LMS. JupyterHub, as the authenticator, works as the tool provider (TP), also known as the external tool. The LTIAuthenticator base class defines the consumers, defined as 1 or (n) consumer key and shared secret k/v's to verify requests from their tool consumer. """ auto_login = True login_service = "LTI 1.1" consumers = Dict( {}, config=True, help=""" A dict of consumer keys mapped to consumer secrets for those keys. Allows multiple consumers to securely send users to this JupyterHub instance. """, ) username_key = Unicode( "custom_canvas_user_id", allow_none=True, config=True, help=""" Key present in LTI 1.1 launch request used to set the user's JupyterHub's username. Some common examples include: - User's email address: lis_person_contact_email_primary - Canvas LMS custom user id: custom_canvas_user_id Your LMS (Canvas / Open EdX / Moodle / others) may provide additional keys in the LTI 1.1 launch request that you can use to set the username. In most cases these are prefixed with `custom_`. You may also have the option of using variable substitutions to fetch values that aren't provided with your vendor's standard LTI 1.1 launch request. Reference the IMS LTI specification on variable substitutions: https://www.imsglobal.org/specs/ltiv1p1p1/implementation-guide#toc-9. Current default behavior: To preserve legacy behavior, if custom_canvas_user_id is present in the LTI request, it is used as the username. If not, user_id is used. In the future, the default will be just user_id - if you want to use custom_canvas_user_id, you must explicitly set username_key to custom_canvas_user_id. """, ) def get_handlers(self, app: JupyterHub) -> BaseHandler: return [("/lti/launch", LTI11AuthenticateHandler)] def login_url(self, base_url): return url_path_join(base_url, "/lti/launch") async def authenticate( # noqa: C901 self, handler: BaseHandler, data: dict = None) -> dict: # noqa: C901 """ LTI 1.1 Authenticator. One or more consumer keys/values must be set in the jupyterhub config with the LTI11Authenticator.consumers dict. Args: handler: JupyterHub's Authenticator handler object. For LTI 1.1 requests, the handler is an instance of LTIAuthenticateHandler. data: optional data object Returns: Authentication dictionary Raises: HTTPError if the required values are not in the request """ # log deprecation warning when using the default custom_canvas_user_id setting if self.username_key == "custom_canvas_user_id": self.log.warning( dedent( """The default username_key 'custom_canvas_user_id' will be replaced by 'user_id' in a future release. Set c.LTIAuthenticator.username_key to `custom_canvas_user_id` to preserve current behavior. """)) validator = LTI11LaunchValidator(self.consumers) self.log.debug("Original arguments received in request: %s" % handler.request.arguments) # extract the request arguments to a dict args = convert_request_to_dict(handler.request.arguments) self.log.debug("Decoded args from request: %s" % args) # get the origin protocol protocol = get_client_protocol(handler) self.log.debug("Origin protocol is: %s" % protocol) # build the full launch url value required for oauth1 signatures launch_url = f"{protocol}://{handler.request.host}{handler.request.uri}" self.log.debug("Launch url is: %s" % launch_url) if validator.validate_launch_request(launch_url, handler.request.headers, args): # raise an http error if the username_key is not in the request's arguments. if self.username_key not in args.keys(): self.log.warning( "%s the specified username_key did not match any of the launch request arguments." ) # get the username_key. if empty, fetch the username from the request's user_id value. username = args.get(self.username_key) if not username: username = args.get("user_id") # if username is still empty or none, raise an http error. if not username: raise HTTPError( 400, "The %s value in the launch request is empty or None." % self.username_key, ) # return standard authentication where all launch request arguments are added to the auth_state key # except for the oauth_* arguments. return { "name": username, "auth_state": {k: v for k, v in args.items() if not k.startswith("oauth_")}, }
class KernelProvisionerFactory(SingletonConfigurable): """ :class:`KernelProvisionerFactory` is responsible for creating provisioner instances. A singleton instance, `KernelProvisionerFactory` is also used by the :class:`KernelSpecManager` to validate `kernel_provisioner` references found in kernel specifications to confirm their availability (in cases where the kernel specification references a kernel provisioner that has not been installed into the current Python environment). It's `default_provisioner_name` attribute can be used to specify the default provisioner to use when a kernel_spec is found to not reference a provisioner. It's value defaults to `"local-provisioner"` which identifies the local provisioner implemented by :class:`LocalProvisioner`. """ GROUP_NAME = 'jupyter_client.kernel_provisioners' provisioners: Dict[str, EntryPoint] = {} default_provisioner_name_env = "JUPYTER_DEFAULT_PROVISIONER_NAME" default_provisioner_name = Unicode( config=True, help= """Indicates the name of the provisioner to use when no kernel_provisioner entry is present in the kernelspec.""", ) @default('default_provisioner_name') def default_provisioner_name_default(self): return getenv(self.default_provisioner_name_env, "local-provisioner") def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) for ep in KernelProvisionerFactory._get_all_provisioners(): self.provisioners[ep.name] = ep def is_provisioner_available(self, kernel_spec: Any) -> bool: """ Reads the associated ``kernel_spec`` to determine the provisioner and returns whether it exists as an entry_point (True) or not (False). If the referenced provisioner is not in the current cache or cannot be loaded via entry_points, a warning message is issued indicating it is not available. """ is_available: bool = True provisioner_cfg = self._get_provisioner_config(kernel_spec) provisioner_name = str(provisioner_cfg.get('provisioner_name')) if not self._check_availability(provisioner_name): is_available = False self.log.warning( f"Kernel '{kernel_spec.display_name}' is referencing a kernel " f"provisioner ('{provisioner_name}') that is not available. " f"Ensure the appropriate package has been installed and retry." ) return is_available def create_provisioner_instance(self, kernel_id: str, kernel_spec: Any, parent: Any) -> KernelProvisionerBase: """ Reads the associated ``kernel_spec`` to see if it has a `kernel_provisioner` stanza. If one exists, it instantiates an instance. If a kernel provisioner is not specified in the kernel specification, a default provisioner stanza is fabricated and instantiated corresponding to the current value of `default_provisioner_name` trait. The instantiated instance is returned. If the provisioner is found to not exist (not registered via entry_points), `ModuleNotFoundError` is raised. """ provisioner_cfg = self._get_provisioner_config(kernel_spec) provisioner_name = str(provisioner_cfg.get('provisioner_name')) if not self._check_availability(provisioner_name): raise ModuleNotFoundError( f"Kernel provisioner '{provisioner_name}' has not been registered." ) self.log.debug( f"Instantiating kernel '{kernel_spec.display_name}' with " f"kernel provisioner: {provisioner_name}") provisioner_class = self.provisioners[provisioner_name].load() provisioner_config = provisioner_cfg.get('config') provisioner: KernelProvisionerBase = provisioner_class( kernel_id=kernel_id, kernel_spec=kernel_spec, parent=parent, **provisioner_config) return provisioner def _check_availability(self, provisioner_name: str) -> bool: """ Checks that the given provisioner is available. If the given provisioner is not in the current set of loaded provisioners an attempt is made to fetch the named entry point and, if successful, loads it into the cache. :param provisioner_name: :return: """ is_available = True if provisioner_name not in self.provisioners: try: ep = self._get_provisioner(provisioner_name) self.provisioners[provisioner_name] = ep # Update cache except NoSuchEntryPoint: is_available = False return is_available def _get_provisioner_config(self, kernel_spec: Any) -> Dict[str, Any]: """ Return the kernel_provisioner stanza from the kernel_spec. Checks the kernel_spec's metadata dictionary for a kernel_provisioner entry. If found, it is returned, else one is created relative to the DEFAULT_PROVISIONER and returned. Parameters ---------- kernel_spec : Any - this is a KernelSpec type but listed as Any to avoid circular import The kernel specification object from which the provisioner dictionary is derived. Returns ------- dict The provisioner portion of the kernel_spec. If one does not exist, it will contain the default information. If no `config` sub-dictionary exists, an empty `config` dictionary will be added. """ env_provisioner = kernel_spec.metadata.get('kernel_provisioner', {}) if 'provisioner_name' in env_provisioner: # If no provisioner_name, return default if ('config' not in env_provisioner ): # if provisioner_name, but no config stanza, add one env_provisioner.update({"config": {}}) return env_provisioner # Return what we found (plus config stanza if necessary) return { "provisioner_name": self.default_provisioner_name, "config": {} } def get_provisioner_entries(self) -> Dict[str, str]: """ Returns a dictionary of provisioner entries. The key is the provisioner name for its entry point. The value is the colon-separated string of the entry point's module name and object name. """ entries = {} for name, ep in self.provisioners.items(): entries[name] = f"{ep.module_name}:{ep.object_name}" return entries @staticmethod def _get_all_provisioners() -> List[EntryPoint]: """Wrapper around entrypoints.get_group_all() - primarily to facilitate testing.""" return get_group_all(KernelProvisionerFactory.GROUP_NAME) def _get_provisioner(self, name: str) -> EntryPoint: """Wrapper around entrypoints.get_single() - primarily to facilitate testing.""" try: ep = get_single(KernelProvisionerFactory.GROUP_NAME, name) except NoSuchEntryPoint: # Check if the entrypoint name is 'local-provisioner'. Although this should never # happen, we have seen cases where the previous distribution of jupyter_client has # remained which doesn't include kernel-provisioner entrypoints (so 'local-provisioner' # is deemed not found even though its definition is in THIS package). In such cass, # the entrypoints package uses what it first finds - which is the older distribution # resulting in a violation of a supposed invariant condition. To address this scenario, # we will log a warning message indicating this situation, then build the entrypoint # instance ourselves - since we have that information. if name == 'local-provisioner': distros = glob.glob( f"{path.dirname(path.dirname(__file__))}-*") self.log.warning( f"Kernel Provisioning: The 'local-provisioner' is not found. This is likely " f"due to the presence of multiple jupyter_client distributions and a previous " f"distribution is being used as the source for entrypoints - which does not " f"include 'local-provisioner'. That distribution should be removed such that " f"only the version-appropriate distribution remains (version >= 7). Until " f"then, a 'local-provisioner' entrypoint will be automatically constructed " f"and used.\nThe candidate distribution locations are: {distros}" ) ep = EntryPoint('local-provisioner', 'jupyter_client.provisioning', 'LocalProvisioner') else: raise return ep
class KernelProvisionerBase(ABC, LoggingConfigurable, metaclass=KernelProvisionerMeta): """ Abstract base class defining methods for KernelProvisioner classes. A majority of methods are abstract (requiring implementations via a subclass) while some are optional and others provide implementations common to all instances. Subclasses should be aware of which methods require a call to the superclass. Many of these methods model those of :class:`subprocess.Popen` for parity with previous versions where the kernel process was managed directly. """ # The kernel specification associated with this provisioner kernel_spec: Any = Instance('jupyter_client.kernelspec.KernelSpec', allow_none=True) kernel_id: str = Unicode(None, allow_none=True) connection_info: KernelConnectionInfo = {} @property @abstractmethod def has_process(self) -> bool: """ Returns true if this provisioner is currently managing a process. This property is asserted to be True immediately following a call to the provisioner's :meth:`launch_kernel` method. """ pass @abstractmethod async def poll(self) -> Optional[int]: """ Checks if kernel process is still running. If running, None is returned, otherwise the process's integer-valued exit code is returned. This method is called from :meth:`KernelManager.is_alive`. """ pass @abstractmethod async def wait(self) -> Optional[int]: """ Waits for kernel process to terminate. This method is called from `KernelManager.finish_shutdown()` and `KernelManager.kill_kernel()` when terminating a kernel gracefully or immediately, respectively. """ pass @abstractmethod async def send_signal(self, signum: int) -> None: """ Sends signal identified by signum to the kernel process. This method is called from `KernelManager.signal_kernel()` to send the kernel process a signal. """ pass @abstractmethod async def kill(self, restart: bool = False) -> None: """ Kill the kernel process. This is typically accomplished via a SIGKILL signal, which cannot be caught. This method is called from `KernelManager.kill_kernel()` when terminating a kernel immediately. restart is True if this operation will precede a subsequent launch_kernel request. """ pass @abstractmethod async def terminate(self, restart: bool = False) -> None: """ Terminates the kernel process. This is typically accomplished via a SIGTERM signal, which can be caught, allowing the kernel provisioner to perform possible cleanup of resources. This method is called indirectly from `KernelManager.finish_shutdown()` during a kernel's graceful termination. restart is True if this operation precedes a start launch_kernel request. """ pass @abstractmethod async def launch_kernel(self, cmd: List[str], **kwargs: Any) -> KernelConnectionInfo: """ Launch the kernel process and return its connection information. This method is called from `KernelManager.launch_kernel()` during the kernel manager's start kernel sequence. """ pass @abstractmethod async def cleanup(self, restart: bool = False) -> None: """ Cleanup any resources allocated on behalf of the kernel provisioner. This method is called from `KernelManager.cleanup_resources()` as part of its shutdown kernel sequence. restart is True if this operation precedes a start launch_kernel request. """ pass async def shutdown_requested(self, restart: bool = False) -> None: """ Allows the provisioner to determine if the kernel's shutdown has been requested. This method is called from `KernelManager.request_shutdown()` as part of its shutdown sequence. This method is optional and is primarily used in scenarios where the provisioner may need to perform other operations in preparation for a kernel's shutdown. """ pass async def pre_launch(self, **kwargs: Any) -> Dict[str, Any]: """ Perform any steps in preparation for kernel process launch. This includes applying additional substitutions to the kernel launch command and environment. It also includes preparation of launch parameters. NOTE: Subclass implementations are advised to call this method as it applies environment variable substitutions from the local environment and calls the provisioner's :meth:`_finalize_env()` method to allow each provisioner the ability to cleanup the environment variables that will be used by the kernel. This method is called from `KernelManager.pre_start_kernel()` as part of its start kernel sequence. Returns the (potentially updated) keyword arguments that are passed to :meth:`launch_kernel()`. """ env = kwargs.pop('env', os.environ).copy() env.update(self.__apply_env_substitutions(env)) self._finalize_env(env) kwargs['env'] = env return kwargs async def post_launch(self, **kwargs: Any) -> None: """ Perform any steps following the kernel process launch. This method is called from `KernelManager.post_start_kernel()` as part of its start kernel sequence. """ pass async def get_provisioner_info(self) -> Dict[str, Any]: """ Captures the base information necessary for persistence relative to this instance. This enables applications that subclass `KernelManager` to persist a kernel provisioner's relevant information to accomplish functionality like disaster recovery or high availability by calling this method via the kernel manager's `provisioner` attribute. NOTE: The superclass method must always be called first to ensure proper serialization. """ provisioner_info: Dict[str, Any] = {} provisioner_info['kernel_id'] = self.kernel_id provisioner_info['connection_info'] = self.connection_info return provisioner_info async def load_provisioner_info(self, provisioner_info: Dict) -> None: """ Loads the base information necessary for persistence relative to this instance. The inverse of `get_provisioner_info()`, this enables applications that subclass `KernelManager` to re-establish communication with a provisioner that is managing a (presumably) remote kernel from an entirely different process that the original provisioner. NOTE: The superclass method must always be called first to ensure proper deserialization. """ self.kernel_id = provisioner_info['kernel_id'] self.connection_info = provisioner_info['connection_info'] def get_shutdown_wait_time(self, recommended: float = 5.0) -> float: """ Returns the time allowed for a complete shutdown. This may vary by provisioner. This method is called from `KernelManager.finish_shutdown()` during the graceful phase of its kernel shutdown sequence. The recommended value will typically be what is configured in the kernel manager. """ return recommended def get_stable_start_time(self, recommended: float = 10.0) -> float: """ Returns the expected upper bound for a kernel (re-)start to complete. This may vary by provisioner. The recommended value will typically be what is configured in the kernel restarter. """ return recommended def _finalize_env(self, env: Dict[str, str]) -> None: """ Ensures env is appropriate prior to launch. This method is called from `KernelProvisionerBase.pre_launch()` during the kernel's start sequence. NOTE: Subclasses should be sure to call super()._finalize_env(env) """ if self.kernel_spec.language and self.kernel_spec.language.lower( ).startswith("python"): # Don't allow PYTHONEXECUTABLE to be passed to kernel process. # If set, it can bork all the things. env.pop('PYTHONEXECUTABLE', None) def __apply_env_substitutions(self, substitution_values: Dict[str, str]): """ Walks entries in the kernelspec's env stanza and applies substitutions from current env. This method is called from `KernelProvisionerBase.pre_launch()` during the kernel's start sequence. Returns the substituted list of env entries. NOTE: This method is private and is not intended to be overridden by provisioners. """ substituted_env = {} if self.kernel_spec: from string import Template # For each templated env entry, fill any templated references # matching names of env variables with those values and build # new dict with substitutions. templated_env = self.kernel_spec.env for k, v in templated_env.items(): substituted_env.update( {k: Template(v).safe_substitute(substitution_values)}) return substituted_env
class LTI13Authenticator(OAuthenticator): """ JupyterHub LTI 1.3 Authenticator which extends the `OAuthenticator` class. (LTI 1.3 is basically an extension of OIDC/OAuth2). Messages sent to this authenticator are sent from a LTI 1.3 Platform, such as an LMS. JupyterHub, as the authenticator, works as the LTI 1.3 External Tool. The basic login flow uses the authorization code grant type. As such, the client id is only required if the JupyterHub is configured to send information back to the LTI 1.3 Platform, in which case it would require the client credentials grant type. This class utilizes the following configurables defined in the `OAuthenticator` base class (all are required unless stated otherwise): - authorize_url - oauth_callback_url - token_url - (Optional) client_id Ref: - https://github.com/jupyterhub/oauthenticator/blob/master/oauthenticator/oauth2.py - http://www.imsglobal.org/spec/lti/v1p3/ """ login_service = "LTI 1.3" # handlers used for login, callback, and jwks endpoints login_handler = LTI13LoginHandler callback_handler = LTI13CallbackHandler endpoint = Unicode( os.getenv("LTI13_ENDPOINT", ""), allow_none=False, config=True, help=""" The platform's base endpoint used when redirecting requests to the platform after receiving the initial login request. """, ) username_key = Unicode( "email", allow_none=False, config=True, help=""" JWT claim present in LTI 1.3 login initiation flow used to set the user's JupyterHub's username. Some common examples include: - User's email address: email - Given name: given_name Your LMS (Canvas / Open EdX / Moodle / others) may provide additional keys in the LTI 1.3 login initiatino flow that you can use to set the username. In most cases these are located in the `https://purl.imsglobal.org/spec/lti/claim/custom` claim. You may also have the option of using variable substitutions to fetch values that aren't provided with your vendor's standard LTI 1.3 login initiation flow request. If your platform's LTI 1.3 settings are defined with privacy enabled, then by default the `sub` claim is used to set the username. Reference the IMS LTI specification on variable substitutions: http://www.imsglobal.org/spec/lti/v1p3/#customproperty. """, ) tool_name = Unicode( "JupyterHub", config=True, help=""" Name of tool provided to the LMS when installed via the config URL. This is primarily used for display purposes. """, ) tool_description = Unicode( "Launch interactive Jupyter Notebooks with JupyterHub", config=True, help=""" Description of tool provided to the LMS when installed via the config URL. This is primarily used for display purposes. """, ) def get_handlers(self, app: JupyterHub) -> List[BaseHandler]: return [ ("/lti13/config", LTI13ConfigHandler), ] async def authenticate( # noqa: C901 self, handler: LTI13LoginHandler, data: Dict[str, str] = None) -> Dict[str, str]: """ Overrides authenticate from base class to handle LTI 1.3 authentication requests. Args: handler: handler object data: authentication dictionary Returns: Authentication dictionary """ validator = LTI13LaunchValidator() # get jwks endpoint and token to use as args to decode jwt. self.log.debug(f"JWKS platform endpoint is {self.endpoint}") id_token = handler.get_argument("id_token") # extract claims from jwt (id_token) sent by the platform. as tool use the jwks (public key) # to verify the jwt's signature. jwt_decoded = await validator.jwt_verify_and_decode( id_token, self.endpoint, False, audience=self.client_id) self.log.debug(f"Decoded JWT: {jwt_decoded}") if validator.validate_launch_request(jwt_decoded): username = jwt_decoded.get(self.username_key) self.log.debug( f"Username_key is {self.username_key} and value fetched from JWT is {username}" ) if not username: if "sub" in jwt_decoded and jwt_decoded["sub"]: username = jwt_decoded["sub"] else: raise HTTPError(400, "Unable to set the username") self.log.debug(f"username is {username}") return { "name": username, "auth_state": {k: v for k, v in jwt_decoded.items()}, # noqa: E231 }
class MyClass(Configurable): name = Unicode(u'defaultname', help="the name of the object").tag(config=True) raise_config_file_errors = True
class HugoExporter(MarkdownExporter): """ Export a python notebook to markdown, with frontmatter for Hugo. Not much of this is particular to Metapack. The Frontmatter is contained in a cell of type RawNBConvert, tagged with the tag 'frontmatter', and formatted in YAML. For instance https://github.com/sandiegodata/notebooks/blob/master/crime/Crime%20Monthly%20Rhythm%20Maps.ipynb Has this frontmatter: draft: false weight: 3 description: Rhythm maps for San Diego Crime incidents, from 2007 to 2014 toc: false show_input: hide section: notebooks authors: - name: Eric Busboom github: https://github.com/sandiegodata/notebooks/blob/master/crime/Crime%20Monthly%20Rhythm%20Maps.ipynb """ hugo_dir = Unicode(help="Root of the Hugo directory").tag(config=True) section = Unicode( help="Hugo section in which to write the converted notebook").tag( config=True) @default('section') def _section_file_default(self): return 'notebooks' @property def default_config(self): import metapack.jupyter.templates c = Config({}) c.TemplateExporter.template_path = [ dirname(metapack.jupyter.templates.__file__) ] c.TemplateExporter.template_file = 'markdown_hugo.tpl' c.MarkdownExporter.preprocessors = [ 'metapack.jupyter.preprocessors.OrganizeMetadata', HugoOutputExtractor ] c.merge(super(HugoExporter, self).default_config) c.ExtractOutputPreprocessor.enabled = False return c def get_creators(self, meta): for typ in ('wrangler', 'creator'): try: # Multiple authors for e in meta[typ]: d = dict(e.items()) d['type'] = typ yield d except AttributeError: # only one d = meta[typ] d['type'] = typ yield d except KeyError: pass def from_notebook_node(self, nb, resources=None, **kw): nb_copy = copy.deepcopy(nb) resources = self._init_resources(resources) if 'language' in nb['metadata']: resources['language'] = nb['metadata']['language'].lower() # Preprocess nb_copy, resources = self._preprocess(nb_copy, resources) # move over some more metadata if 'authors' not in nb_copy.metadata.frontmatter: nb_copy.metadata.frontmatter['authors'] = list( self.get_creators(nb_copy.metadata.metatab)) # Other useful metadata if not 'date' in nb_copy.metadata.frontmatter: nb_copy.metadata.frontmatter['date'] = datetime.now().isoformat() resources.setdefault('raw_mimetypes', self.raw_mimetypes) resources['global_content_filter'] = { 'include_code': not self.exclude_code_cell, 'include_markdown': not self.exclude_markdown, 'include_raw': not self.exclude_raw, 'include_unknown': not self.exclude_unknown, 'include_input': not self.exclude_input, 'include_output': not self.exclude_output, 'include_input_prompt': not self.exclude_input_prompt, 'include_output_prompt': not self.exclude_output_prompt, 'no_prompt': self.exclude_input_prompt and self.exclude_output_prompt, } slug = nb_copy.metadata.frontmatter.slug # Rebuild all of the image names for cell_index, cell in enumerate(nb_copy.cells): for output_index, out in enumerate(cell.get('outputs', [])): if 'metadata' in out: for type_name, fn in list( out.metadata.get('filenames', {}).items()): if fn in resources['outputs']: html_path = join('img', slug, basename(fn)) file_path = join(self.hugo_dir, 'static', html_path) resources['outputs'][file_path] = resources[ 'outputs'][fn] del resources['outputs'][fn] # Can't put the '/' in the join() or it will be absolute out.metadata.filenames[type_name] = '/' + html_path output = self.template.render(nb=nb_copy, resources=resources) section = nb_copy.metadata.frontmatter.get('section') or self.section # Don't know why this isn't being set from the config # resources['output_file_dir'] = self.config.NbConvertApp.output_base # Setting full path to subvert the join() in the file writer. I can't # figure out how to set the output directories from this function resources['unique_key'] = join(self.hugo_dir, 'content', section, slug) # Probably should be done with a postprocessor. output = re.sub(r'__IMGDIR__', join('/img', slug), output) return output, resources
class DocumentationExporter(MetatabExporter): """Exports multiple forms of documentation""" metadata = Dict(help='Extra metadata, added to the \'metatab\' key', default_value={}).tag(config=True) base_name = Unicode(u'documentation', help="Base name for the documentation file").tag(config=True) @property def default_config(self): import metapack.jupyter.templates c = Config() c.TemplateExporter.template_path = [dirname(metapack.jupyter.templates.__file__)] c.HTMLExporter.preprocessors = [ 'metapack.jupyter.preprocessors.NoShowInput', 'metapack.jupyter.preprocessors.RemoveMetatab', 'metapack.jupyter.preprocessors.HtmlBib' ] c.HTMLExporter.exclude_input_prompt = True c.HTMLExporter.exclude_output_prompt = True c.MarkdownExporter.preprocessors = ['metapack.jupyter.preprocessors.RemoveMagics'] c.PDFExporter.preprocessors = [ # 'metapack.jupyter.preprocessors.NoShowInput', 'metapack.jupyter.preprocessors.RemoveMetatab', 'metapack.jupyter.preprocessors.LatexBib', 'metapack.jupyter.preprocessors.MoveTitleDescription' ] c.PDFExporter.exclude_input_prompt = True # Excluding the output prompt also excludes the output tables. # .PDFExporter.exclude_output_prompt = True c.merge(super(DocumentationExporter, self).default_config) return c def from_notebook_node(self, nb, resources=None, **kw): nb_copy = copy.deepcopy(nb) nb_copy['metadata']['metatab'] = self.metadata # get the Normal HTML output: output, resources = HTMLExporter(config=self.config).from_notebook_node(nb_copy) resources['unique_key'] = 'notebook' # Get all of the image resources nb_copy, resources = self.extract_resources(nb_copy, resources) # Add resources for the html and markdown version of the notebook self.add_pdf(nb_copy, resources) self.add_markdown_doc(nb_copy, resources) self.add_html_doc(nb_copy, resources) self.add_basic_html_doc(nb_copy, resources) return output, resources def extract_resources(self, nb, resources): output_filename_template = "image_{cell_index}_{index}{extension}" return ExtractOutputPreprocessor(output_filename_template=output_filename_template) \ .preprocess(nb, resources) def add_pdf(self, nb, resources): from ipython_genutils.py3compat import which template_file = 'notebook.tplx' exp = PDFExporter(config=self.config, template_file=template_file) if not which(exp.latex_command[0]): return (body, _) = exp.from_notebook_node(nb) resources['outputs'][self.base_name+'.pdf'] = body exp = LatexExporter(config=self.config, template_file=template_file) (body, _) = exp.from_notebook_node(nb) resources['outputs'][self.base_name+'.latex'] = body.encode('utf-8') def add_basic_html_doc(self, nb, resources): html_exp = HTMLExporter(config=self.config, template_file='hide_input_html_basic.tpl') (html_basic_body, _) = html_exp.from_notebook_node(nb) resources['outputs'][self.base_name+'_html_body.html'] = html_basic_body.encode('utf-8') def add_html_doc(self, nb, resources): html_exp = HTMLExporter(config=self.config, template_file='hide_input_html.tpl') (html_full_body, _) = html_exp.from_notebook_node(nb) resources['outputs'][self.base_name+'.html'] = html_full_body.encode('utf-8') def add_markdown_doc(self, nb, resources): exp = MarkdownExporter(config=self.config) (md_body, _) = exp.from_notebook_node(nb) resources['outputs'][self.base_name+'.md'] = md_body.encode('utf-8') def update_metatab(self, doc, resources): """Add documentation entries for resources""" if not 'Documentation' in doc: doc.new_section("Documentation") ds = doc['Documentation'] if not 'Name' in ds.args: ds.add_arg('Name', prepend=True) # This is the main output from the HTML exporter, not a resource. ds.new_term('Root.Documentation', 'docs/notebook.html', name="notebook.html", title='Jupyter Notebook (HTML)') for name, data in resources.get('outputs', {}).items(): if name == 'documentation.html': ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Primary Documentation (HTML)') elif name == 'html_basic_body.html': pass elif name.endswith('.html'): ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (HTML)') elif name.endswith('.md'): ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (Markdown)') elif name.endswith('.pdf'): ds.new_term('Root.Documentation', 'docs/' + name, name=name, title='Documentation (PDF)') elif name.endswith('.png'): ds.new_term('Root.Image', 'docs/' + name, name=name, title='Image for HTML Documentation') else: pass
class DependencyExtractor(Application): name = Unicode(u'nbflow') description = Unicode( u'Extract the hierarchy of dependencies from notebooks in the specified folder.' ) version = __version__ def extract_parameters(self, nb): # find the first code cell defs_cell = None for cell in nb.cells: if cell.cell_type == 'code': defs_cell = cell break if defs_cell is None: return {} defs_code = defs_cell.source globals_dict = {} locals_dict = {} exec(defs_code, globals_dict, locals_dict) return locals_dict def resolve_path(self, source, path): dirname = os.path.dirname(source) return os.path.abspath(os.path.join(dirname, path)) def get_dependencies(self, dirnames): dependencies = {} for dirname in dirnames: files = glob.glob("{}/*.ipynb".format(dirname)) for filename in files: modname = os.path.splitext(os.path.basename(filename))[0] with open(filename, "r") as fh: nb = reads(fh.read()) params = self.extract_parameters(nb) if '__depends__' not in params: continue if '__dest__' not in params: raise ValueError( "__dest__ is not defined in {}".format(filename)) # get sources that are specified in the file sources = [ self.resolve_path(filename, x) for x in params['__depends__'] ] targets = params['__dest__'] if not hasattr(targets, '__iter__'): if targets is None: targets = [] else: targets = [targets] targets = [self.resolve_path(filename, x) for x in targets] dependencies[os.path.join(dirname, '{}.ipynb'.format(modname))] = { 'targets': targets, 'sources': sources } return json.dumps(dependencies, indent=2) def start(self): if len(self.extra_args) == 0: self.log.error("No directory names specified.") sys.exit(1) print(self.get_dependencies(self.extra_args))
class Builder(Application): config_file = Unicode('builder_config.py', config=True) build_name = Unicode(None, allow_none=True, config=True) source_url = Unicode(None, allow_none=True, config=True) source_ref = Unicode('master', allow_none=True, config=True) output_image_spec = Unicode(None, allow_none=True, config=True) git_workdir = Unicode("/tmp/git", config=True) buildpacks = List(None, [DockerBuildPack, PythonBuildPack], config=True) aliases = Dict({ 'source': 'Builder.source_url', 'ref': 'Builder.source_ref', 'output': 'Builder.output_image_spec', 'f': 'Builder.config_file', 'n': 'Builder.build_name' }) def fetch(self, url, ref, output_path): try: for line in execute_cmd(['git', 'clone', url, output_path]): self.log.info(line, extra=dict(phase='fetching')) except subprocess.CalledProcessError: self.log.error('Failed to clone repository!', extra=dict(phase='failed')) sys.exit(1) try: for line in execute_cmd([ 'git', '--git-dir', os.path.join(output_path, '.git'), 'reset', '--hard', ref ]): self.log.info(line, extra=dict(phase='fetching')) except subprocess.CalledProcessError: self.log.error('Failed to check out ref %s', ref, extra=dict(phase='failed')) sys.exit(1) def initialize(self, *args, **kwargs): super().initialize(*args, **kwargs) logHandler = logging.StreamHandler() formatter = jsonlogger.JsonFormatter() logHandler.setFormatter(formatter) # Need to reset existing handlers, or we repeat messages self.log.handlers = [] self.log.addHandler(logHandler) self.log.setLevel(logging.INFO) self.load_config_file(self.config_file) def run(self): # HACK: Try to just pull this and see if that works. # if it does, then just bail. # WHAT WE REALLY WANT IS TO NOT DO ANY WORK IF THE IMAGE EXISTS client = docker.APIClient(base_url='unix://var/run/docker.sock', version='auto') repo, tag = self.output_image_spec.split(':') for line in client.pull( repository=repo, tag=tag, stream=True, ): progress = json.loads(line.decode('utf-8')) if 'error' in progress: break else: return output_path = os.path.join(self.git_workdir, self.build_name) self.fetch(self.source_url, self.source_ref, output_path) for bp_class in self.buildpacks: bp = bp_class() if bp.detect(output_path): self.log.info('Using %s builder', bp.name, extra=dict(phase='building')) bp.build(output_path, self.source_ref, self.output_image_spec) break else: self.log.error( 'Could not figure out how to build this repository! Tell us?', extra=dict(phase='failed')) sys.exit(1) # Build a progress setup for each layer, and only emit per-layer info every 1.5s layers = {} last_emit_time = time.time() for line in client.push(self.output_image_spec, stream=True): progress = json.loads(line.decode('utf-8')) if 'id' not in progress: continue if 'progressDetail' in progress and progress['progressDetail']: layers[progress['id']] = progress['progressDetail'] else: layers[progress['id']] = progress['status'] if time.time() - last_emit_time > 1.5: self.log.info('Pushing image', extra=dict(progress=layers, phase='pushing')) last_emit_time = time.time()